From 8b276b59ccf983517a0d40f8a3ac243a104d4c16 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 29 Nov 2021 19:11:44 +0000 Subject: include: trace: Add new scmi_xfer_response_wait event Having a new step to trace SCMI stack while it waits for synchronous responses is useful to analyze system performance when changing waiting mode between polling and interrupt completion. Link: https://lore.kernel.org/r/20211129191156.29322-5-cristian.marussi@arm.com Reviewed-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/trace/events/scmi.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include') diff --git a/include/trace/events/scmi.h b/include/trace/events/scmi.h index f3a4b4d60714..cee4b2b64ae4 100644 --- a/include/trace/events/scmi.h +++ b/include/trace/events/scmi.h @@ -33,6 +33,34 @@ TRACE_EVENT(scmi_xfer_begin, __entry->seq, __entry->poll) ); +TRACE_EVENT(scmi_xfer_response_wait, + TP_PROTO(int transfer_id, u8 msg_id, u8 protocol_id, u16 seq, + u32 timeout, bool poll), + TP_ARGS(transfer_id, msg_id, protocol_id, seq, timeout, poll), + + TP_STRUCT__entry( + __field(int, transfer_id) + __field(u8, msg_id) + __field(u8, protocol_id) + __field(u16, seq) + __field(u32, timeout) + __field(bool, poll) + ), + + TP_fast_assign( + __entry->transfer_id = transfer_id; + __entry->msg_id = msg_id; + __entry->protocol_id = protocol_id; + __entry->seq = seq; + __entry->timeout = timeout; + __entry->poll = poll; + ), + + TP_printk("transfer_id=%d msg_id=%u protocol_id=%u seq=%u tmo_ms=%u poll=%u", + __entry->transfer_id, __entry->msg_id, __entry->protocol_id, + __entry->seq, __entry->timeout, __entry->poll) +); + TRACE_EVENT(scmi_xfer_end, TP_PROTO(int transfer_id, u8 msg_id, u8 protocol_id, u16 seq, int status), -- cgit v1.2.3 From 7e28d0b26759846485978ada860ef4a427e06c8f Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Fri, 10 Dec 2021 10:48:02 +0530 Subject: drm/i915/adl-n: Enable ADL-N platform Adding PCI device ids and enabling ADL-N platform. ADL-N from i915 point of view is subplatform of ADL-P. BSpec: 68397 Changes since V2: - Added version log history Changes since V1: - replace IS_ALDERLAKE_N with IS_ADLP_N - Jani Nikula Signed-off-by: Tejas Upadhyay Reviewed-by: Anusha Srivatsa Acked-by: Thomas Gleixner Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20211210051802.4063958-1-tejaskumarx.surendrakumar.upadhyay@intel.com --- include/drm/i915_pciids.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index baf3d1d3d566..533890dc9da1 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -666,6 +666,12 @@ INTEL_VGA_DEVICE(0x46C2, info), \ INTEL_VGA_DEVICE(0x46C3, info) +/* ADL-N */ +#define INTEL_ADLN_IDS(info) \ + INTEL_VGA_DEVICE(0x46D0, info), \ + INTEL_VGA_DEVICE(0x46D1, info), \ + INTEL_VGA_DEVICE(0x46D2, info) + /* RPL-S */ #define INTEL_RPLS_IDS(info) \ INTEL_VGA_DEVICE(0xA780, info), \ -- cgit v1.2.3 From 69255e746890274e887ba36a403019380cde0b48 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 20 Dec 2021 19:56:41 +0000 Subject: firmware: arm_scmi: Add support for atomic transports An SCMI transport can be configured as .atomic_enabled in order to signal to the SCMI core that all its TX path is executed in atomic context and that, when requested, polling mode should be used while waiting for command responses. When a specific platform configuration had properly configured such a transport as .atomic_enabled, the SCMI core will also take care not to sleep in the corresponding RX path while waiting for a response if that specific command transaction was requested as atomic using polling mode. Asynchronous commands should not be used in an atomic context and so a warning is emitted if polling was requested for an asynchronous command. Add also a method to check, from the SCMI drivers, if the underlying SCMI transport is currently configured to support atomic transactions: this will be used by upper layers to determine if atomic requests can be supported at all on this SCMI instance. Link: https://lore.kernel.org/r/20211220195646.44498-7-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 80e781c51ddc..9f895cb81818 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -612,6 +612,13 @@ struct scmi_notify_ops { * @devm_protocol_get: devres managed method to acquire a protocol and get specific * operations and a dedicated protocol handler * @devm_protocol_put: devres managed method to release a protocol + * @is_transport_atomic: method to check if the underlying transport for this + * instance handle is configured to support atomic + * transactions for commands. + * Some users of the SCMI stack in the upper layers could + * be interested to know if they can assume SCMI + * command transactions associated to this handle will + * never sleep and act accordingly. * @notify_ops: pointer to set of notifications related operations */ struct scmi_handle { @@ -622,6 +629,7 @@ struct scmi_handle { (*devm_protocol_get)(struct scmi_device *sdev, u8 proto, struct scmi_protocol_handle **ph); void (*devm_protocol_put)(struct scmi_device *sdev, u8 proto); + bool (*is_transport_atomic)(const struct scmi_handle *handle); const struct scmi_notify_ops *notify_ops; }; -- cgit v1.2.3 From 9206a3af4fc0cebbefca2d79876d279bdd8d582b Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 17 Dec 2021 13:55:58 +0200 Subject: clk: ti: Move dra7 clock devices out of the legacy section I accidentally added some dra7 clock defines to the legacy section that we want to stop using. Let's move the defines to the right location. Note that this is just a cosmetic fix. Cc: linux-clk@vger.kernel.org Cc: Stephen Boyd Cc: Tero Kristo Acked-by: Rob Herring Signed-off-by: Tony Lindgren --- include/dt-bindings/clock/dra7.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/dra7.h b/include/dt-bindings/clock/dra7.h index 7d57063b8a65..29ff6b895848 100644 --- a/include/dt-bindings/clock/dra7.h +++ b/include/dt-bindings/clock/dra7.h @@ -84,17 +84,10 @@ #define DRA7_L3_MAIN_2_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_L3_INSTR_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) -/* iva clocks */ -#define DRA7_IVA_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) -#define DRA7_SL2IF_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) - /* dss clocks */ #define DRA7_DSS_CORE_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_BB2D_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) -/* gpu clocks */ -#define DRA7_GPU_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) - /* l3init clocks */ #define DRA7_MMC1_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) #define DRA7_MMC2_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) @@ -267,10 +260,17 @@ #define DRA7_L3INSTR_L3_MAIN_2_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_L3INSTR_L3_INSTR_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) +/* iva clocks */ +#define DRA7_IVA_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) +#define DRA7_SL2IF_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) + /* dss clocks */ #define DRA7_DSS_DSS_CORE_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_DSS_BB2D_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) +/* gpu clocks */ +#define DRA7_GPU_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) + /* l3init clocks */ #define DRA7_L3INIT_MMC1_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) #define DRA7_L3INIT_MMC2_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) -- cgit v1.2.3 From 825ca9ed1c9f5516b30292bb1c7ab648c2a01b92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 21 Dec 2021 21:37:53 +0200 Subject: drm: Always include the debugfs dentry in drm_crtc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the counterproductive CONFIG_DEBUG_FS ifdef and just include the debugfs dentry in drm_crtc always. This way we don't need annoying ifdefs in the actual code with DEBUGFS=n. Also we don't have these ifdefs around any of the other debugfs dentries either so can't see why drm_crtc should be special. This fixes the i915 DEBUGFS=n build because I assumed the dentry would always be there. Cc: Jani Nikula Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Fixes: e74c6aa955ca ("drm/i915/fbc: Register per-crtc debugfs files") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20211221193754.12287-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula Acked-by: Daniel Vetter --- include/drm/drm_crtc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 13eeba2a750a..4d01b4d89775 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -1135,14 +1135,12 @@ struct drm_crtc { */ spinlock_t commit_lock; -#ifdef CONFIG_DEBUG_FS /** * @debugfs_entry: * * Debugfs directory for this CRTC. */ struct dentry *debugfs_entry; -#endif /** * @crc: -- cgit v1.2.3 From c4381d0ee81930097e94e55d1c23f85798ffd093 Mon Sep 17 00:00:00 2001 From: Bokun Zhang Date: Wed, 12 Jan 2022 10:34:11 -0500 Subject: drm/amdgpu: Add interface to load SRIOV cap FW - Add interface to load SRIOV cap FW. If the FW does not exist, simply skip this FW loading routine. This FW will only be loaded under SRIOV. Other driver configuration will not be affected. By adding this interface, it will make us easier to prepare SRIOV Linux guest driver for different users. - Update sysfs interface to read cap FW version. - Refactor PSP FW loading routine under SRIOV to use a unified SWITCH statement instead of using IF statement - Remove redundant amdgpu_sriov_vf() check in FW loading routine Acked-by: Monk Liu Acked-by: Guchun Chen Signed-off-by: Bokun Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 0b94ec7b73e7..be4f9111f478 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -728,6 +728,8 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_DMCUB 0x14 /* Subquery id: Query TOC firmware version */ #define AMDGPU_INFO_FW_TOC 0x15 + /* Subquery id: Query CAP firmware version */ + #define AMDGPU_INFO_FW_CAP 0x16 /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f -- cgit v1.2.3 From dee872e124e8d5de22b68c58f6f6c3f5e8889160 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 14 Jan 2022 22:09:45 +0530 Subject: bpf: Populate kfunc BTF ID sets in struct btf This patch prepares the kernel to support putting all kinds of kfunc BTF ID sets in the struct btf itself. The various kernel subsystems will make register_btf_kfunc_id_set call in the initcalls (for built-in code and modules). The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS, STRUCT_OPS, and 'types' are check (allowed or not), acquire, release, and ret_null (with PTR_TO_BTF_ID_OR_NULL return type). A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a set of certain hook and type for vmlinux sets, since they are allocated on demand, and otherwise set as NULL. Module sets can only be registered once per hook and type, hence they are directly assigned. A new btf_kfunc_id_set_contains function is exposed for use in verifier, this new method is faster than the existing list searching method, and is also automatic. It also lets other code not care whether the set is unallocated or not. Note that module code can only do single register_btf_kfunc_id_set call per hook. This is why sorting is only done for in-kernel vmlinux sets, because there might be multiple sets for the same hook and type that must be concatenated, hence sorting them is required to ensure bsearch in btf_id_set_contains continues to work correctly. Next commit will update the kernel users to make use of this infrastructure. Finally, add __maybe_unused annotation for BTF ID macros for the !CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during build time. The previous patch is also needed to provide synchronization against initialization for module BTF's kfunc_set_tab introduced here, as described below: The kfunc_set_tab pointer in struct btf is write-once (if we consider the registration phase (comprised of multiple register_btf_kfunc_id_set calls) as a single operation). In this sense, once it has been fully prepared, it isn't modified, only used for lookup (from the verifier context). For btf_vmlinux, it is initialized fully during the do_initcalls phase, which happens fairly early in the boot process, before any processes are present. This also eliminates the possibility of bpf_check being called at that point, thus relieving us of ensuring any synchronization between the registration and lookup function (btf_kfunc_id_set_contains). However, the case for module BTF is a bit tricky. The BTF is parsed, prepared, and published from the MODULE_STATE_COMING notifier callback. After this, the module initcalls are invoked, where our registration function will be called to populate the kfunc_set_tab for module BTF. At this point, BTF may be available to userspace while its corresponding module is still intializing. A BTF fd can then be passed to verifier using bpf syscall (e.g. for kfunc call insn). Hence, there is a race window where verifier may concurrently try to lookup the kfunc_set_tab. To prevent this race, we must ensure the operations are serialized, or waiting for the __init functions to complete. In the earlier registration API, this race was alleviated as verifier bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added by the registration function (called usually at the end of module __init function after all module resources have been initialized). If the verifier made the check_kfunc_call before kfunc BTF ID was added to the list, it would fail verification (saying call isn't allowed). The access to list was protected using a mutex. Now, it would still fail verification, but for a different reason (returning ENXIO due to the failed btf_try_get_module call in add_kfunc_call), because if the __init call is in progress the module will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE transition, and the BTF_MODULE_LIVE flag for btf_module instance will not be set, so the btf_try_get_module call will fail. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 39 +++++++++++++++++++++++++++++++++++++++ include/linux/btf_ids.h | 13 +++++++------ 2 files changed, 46 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index 0c74348cbc9d..c451f8e2612a 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -12,11 +12,33 @@ #define BTF_TYPE_EMIT(type) ((void)(type *)0) #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val) +enum btf_kfunc_type { + BTF_KFUNC_TYPE_CHECK, + BTF_KFUNC_TYPE_ACQUIRE, + BTF_KFUNC_TYPE_RELEASE, + BTF_KFUNC_TYPE_RET_NULL, + BTF_KFUNC_TYPE_MAX, +}; + struct btf; struct btf_member; struct btf_type; union bpf_attr; struct btf_show; +struct btf_id_set; + +struct btf_kfunc_id_set { + struct module *owner; + union { + struct { + struct btf_id_set *check_set; + struct btf_id_set *acquire_set; + struct btf_id_set *release_set; + struct btf_id_set *ret_null_set; + }; + struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX]; + }; +}; extern const struct file_operations btf_fops; @@ -307,6 +329,11 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); +bool btf_kfunc_id_set_contains(const struct btf *btf, + enum bpf_prog_type prog_type, + enum btf_kfunc_type type, u32 kfunc_btf_id); +int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, + const struct btf_kfunc_id_set *s); #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) @@ -318,6 +345,18 @@ static inline const char *btf_name_by_offset(const struct btf *btf, { return NULL; } +static inline bool btf_kfunc_id_set_contains(const struct btf *btf, + enum bpf_prog_type prog_type, + enum btf_kfunc_type type, + u32 kfunc_btf_id) +{ + return false; +} +static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, + const struct btf_kfunc_id_set *s) +{ + return 0; +} #endif struct kfunc_btf_id_set { diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 919c0fde1c51..bc5d9cc34e4c 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -11,6 +11,7 @@ struct btf_id_set { #ifdef CONFIG_DEBUG_INFO_BTF #include /* for __PASTE */ +#include /* for __maybe_unused */ /* * Following macros help to define lists of BTF IDs placed @@ -146,14 +147,14 @@ extern struct btf_id_set name; #else -#define BTF_ID_LIST(name) static u32 name[5]; +#define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED -#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n]; -#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1]; -#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1]; -#define BTF_SET_START(name) static struct btf_id_set name = { 0 }; -#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 }; +#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n]; +#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1]; +#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1]; +#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 }; +#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ -- cgit v1.2.3 From b202d84422223b7222cba5031d182f20b37e146e Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 14 Jan 2022 22:09:46 +0530 Subject: bpf: Remove check_kfunc_call callback and old kfunc BTF ID API Completely remove the old code for check_kfunc_call to help it work with modules, and also the callback itself. The previous commit adds infrastructure to register all sets and put them in vmlinux or module BTF, and concatenates all related sets organized by the hook and the type. Once populated, these sets remain immutable for the lifetime of the struct btf. Also, since we don't need the 'owner' module anywhere when doing check_kfunc_call, drop the 'btf_modp' module parameter from find_kfunc_desc_btf. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220114163953.1455836-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 8 -------- include/linux/btf.h | 44 -------------------------------------------- 2 files changed, 52 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6e947cd91152..6d7346c54d83 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -573,7 +573,6 @@ struct bpf_verifier_ops { const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); - bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner); }; struct bpf_prog_offload_ops { @@ -1719,7 +1718,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); -bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@ -1971,12 +1969,6 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, return -ENOTSUPP; } -static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, - struct module *owner) -{ - return false; -} - static inline void bpf_map_put(struct bpf_map *map) { } diff --git a/include/linux/btf.h b/include/linux/btf.h index c451f8e2612a..b12cfe3b12bb 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -359,48 +359,4 @@ static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, } #endif -struct kfunc_btf_id_set { - struct list_head list; - struct btf_id_set *set; - struct module *owner; -}; - -struct kfunc_btf_id_list { - struct list_head list; - struct mutex mutex; -}; - -#ifdef CONFIG_DEBUG_INFO_BTF_MODULES -void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s); -void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s); -bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, - struct module *owner); - -extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; -extern struct kfunc_btf_id_list prog_test_kfunc_list; -#else -static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) -{ -} -static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) -{ -} -static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, - u32 kfunc_id, struct module *owner) -{ - return false; -} - -static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused; -static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused; -#endif - -#define DEFINE_KFUNC_BTF_ID_SET(set, name) \ - struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \ - THIS_MODULE } - #endif -- cgit v1.2.3 From d583691c47dc0424ebe926000339a6d6cd590ff7 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 14 Jan 2022 22:09:47 +0530 Subject: bpf: Introduce mem, size argument pair support for kfunc BPF helpers can associate two adjacent arguments together to pass memory of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments. Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to implement similar support. The ARG_CONST_SIZE processing for helpers is refactored into a common check_mem_size_reg helper that is shared with kfunc as well. kfunc ptr_to_mem support follows logic similar to global functions, where verification is done as if pointer is not null, even when it may be null. This leads to a simple to follow rule for writing kfunc: always check the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the PTR_TO_CTX case is also only safe when the helper expecting pointer to program ctx is not exposed to other programs where same struct is not ctx type. In that case, the type check will fall through to other cases and would permit passing other types of pointers, possibly NULL at runtime. Currently, we require the size argument to be suffixed with "__sz" in the parameter name. This information is then recorded in kernel BTF and verified during function argument checking. In the future we can use BTF tagging instead, and modify the kernel function definitions. This will be a purely kernel-side change. This allows us to have some form of backwards compatibility for structures that are passed in to the kernel function with their size, and allow variable length structures to be passed in if they are accompanied by a size parameter. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 143401d4c9d9..857fd687bdc2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -521,6 +521,8 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); int check_ctx_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno); +int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + u32 regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size); -- cgit v1.2.3 From 5c073f26f9dc78a6c8194b23eac7537c9692c7d7 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 14 Jan 2022 22:09:48 +0530 Subject: bpf: Add reference tracking support to kfunc This patch adds verifier support for PTR_TO_BTF_ID return type of kfunc to be a reference, by reusing acquire_reference_state/release_reference support for existing in-kernel bpf helpers. We make use of the three kfunc types: - BTF_KFUNC_TYPE_ACQUIRE Return true if kfunc_btf_id is an acquire kfunc. This will acquire_reference_state for the returned PTR_TO_BTF_ID (this is the only allow return value). Note that acquire kfunc must always return a PTR_TO_BTF_ID{_OR_NULL}, otherwise the program is rejected. - BTF_KFUNC_TYPE_RELEASE Return true if kfunc_btf_id is a release kfunc. This will release the reference to the passed in PTR_TO_BTF_ID which has a reference state (from earlier acquire kfunc). The btf_check_func_arg_match returns the regno (of argument register, hence > 0) if the kfunc is a release kfunc, and a proper referenced PTR_TO_BTF_ID is being passed to it. This is similar to how helper call check uses bpf_call_arg_meta to store the ref_obj_id that is later used to release the reference. Similar to in-kernel helper, we only allow passing one referenced PTR_TO_BTF_ID as an argument. It can also be passed in to normal kfunc, but in case of release kfunc there must always be one PTR_TO_BTF_ID argument that is referenced. - BTF_KFUNC_TYPE_RET_NULL For kfunc returning PTR_TO_BTF_ID, tells if it can be NULL, hence force caller to mark the pointer not null (using check) before accessing it. Note that taking into account the case fixed by commit 93c230e3f5bd ("bpf: Enforce id generation for all may-be-null register type") we assign a non-zero id for mark_ptr_or_null_reg logic. Later, if more return types are supported by kfunc, which have a _OR_NULL variant, it might be better to move this id generation under a common reg_type_may_be_null check, similar to the case in the commit. Referenced PTR_TO_BTF_ID is currently only limited to kfunc, but can be extended in the future to other BPF helpers as well. For now, we can rely on the btf_struct_ids_match check to ensure we get the pointer to the expected struct type. In the future, care needs to be taken to avoid ambiguity for reference PTR_TO_BTF_ID passed to release function, in case multiple candidates can release same BTF ID. e.g. there might be two release kfuncs (or kfunc and helper): foo(struct abc *p); bar(struct abc *p); ... such that both release a PTR_TO_BTF_ID with btf_id of struct abc. In this case we would need to track the acquire function corresponding to the release function to avoid type confusion, and store this information in the register state so that an incorrect program can be rejected. This is not a problem right now, hence it is left as an exercise for the future patch introducing such a case in the kernel. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220114163953.1455836-6-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 857fd687bdc2..ac4797155412 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -566,4 +566,9 @@ static inline u32 type_flag(u32 type) return type & ~BPF_BASE_TYPE_MASK; } +static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) +{ + return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; +} + #endif /* _LINUX_BPF_VERIFIER_H */ -- cgit v1.2.3 From b4c2b9593a1c4c3a718370e34af28e817fd5e5c6 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 14 Jan 2022 22:09:49 +0530 Subject: net/netfilter: Add unstable CT lookup helpers for XDP and TC-BPF This change adds conntrack lookup helpers using the unstable kfunc call interface for the XDP and TC-BPF hooks. The primary usecase is implementing a synproxy in XDP, see Maxim's patchset [0]. Export get_net_ns_by_id as nf_conntrack_bpf.c needs to call it. This object is only built when CONFIG_DEBUG_INFO_BTF_MODULES is enabled. [0]: https://lore.kernel.org/bpf/20211019144655.3483197-1-maximmi@nvidia.com Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220114163953.1455836-7-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/net/netfilter/nf_conntrack_bpf.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/net/netfilter/nf_conntrack_bpf.h (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h new file mode 100644 index 000000000000..a473b56842c5 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_bpf.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NF_CONNTRACK_BPF_H +#define _NF_CONNTRACK_BPF_H + +#include +#include + +#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + +extern int register_nf_conntrack_bpf(void); + +#else + +static inline int register_nf_conntrack_bpf(void) +{ + return 0; +} + +#endif + +#endif /* _NF_CONNTRACK_BPF_H */ -- cgit v1.2.3 From e40fbbf0572c5e41dc87ad79001748ed399ce32d Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Wed, 19 Jan 2022 11:44:40 +0000 Subject: uapi/bpf: Add missing description and returns for helper documentation Both description and returns section will become mandatory for helpers and syscalls in a later commit to generate man pages. This commit also adds in the documentation that BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN for anyone searching for the syscall in the generated man pages. Signed-off-by: Usama Arif Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220119114442.1452088-1-usama.arif@bytedance.com --- include/uapi/linux/bpf.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b0383d371b9a..a9c96c21330a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -330,6 +330,8 @@ union bpf_iter_link_info { * *ctx_out*, *data_in* and *data_out* must be NULL. * *repeat* must be zero. * + * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN. + * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. @@ -1775,6 +1777,8 @@ union bpf_attr { * 0 on success, or a negative error in case of failure. * * u64 bpf_get_current_pid_tgid(void) + * Description + * Get the current pid and tgid. * Return * A 64-bit integer containing the current tgid and pid, and * created as such: @@ -1782,6 +1786,8 @@ union bpf_attr { * *current_task*\ **->pid**. * * u64 bpf_get_current_uid_gid(void) + * Description + * Get the current uid and gid. * Return * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. @@ -2256,6 +2262,8 @@ union bpf_attr { * The 32-bit hash. * * u64 bpf_get_current_task(void) + * Description + * Get the current task. * Return * A pointer to the current task struct. * @@ -2369,6 +2377,8 @@ union bpf_attr { * indicate that the hash is outdated and to trigger a * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * Return + * void. * * long bpf_get_numa_node_id(void) * Description @@ -2466,6 +2476,8 @@ union bpf_attr { * A 8-byte long unique number or 0 if *sk* is NULL. * * u32 bpf_get_socket_uid(struct sk_buff *skb) + * Description + * Get the owner UID of the socked associated to *skb*. * Return * The owner UID of the socket associated to *skb*. If the socket * is **NULL**, or if it is not a full socket (i.e. if it is a @@ -3240,6 +3252,9 @@ union bpf_attr { * The id is returned or 0 in case the id could not be retrieved. * * u64 bpf_get_current_cgroup_id(void) + * Description + * Get the current cgroup id based on the cgroup within which + * the current task is running. * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. -- cgit v1.2.3 From f10d059661968b01ef61a8b516775f95a18ab8ae Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Thu, 16 Dec 2021 02:04:25 +0000 Subject: bpf: Make BPF_PROG_RUN_ARRAY return -err instead of allow boolean Right now BPF_PROG_RUN_ARRAY and related macros return 1 or 0 for whether the prog array allows or rejects whatever is being hooked. The caller of these macros then return -EPERM or continue processing based on thw macro's return value. Unforunately this is inflexible, since -EPERM is the only err that can be returned. This patch should be a no-op; it prepares for the next patch. The returning of the -EPERM is moved to inside the macros, so the outer functions are directly returning what the macros returned if they are non-zero. Signed-off-by: YiFei Zhu Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/r/788abcdca55886d1f43274c918eaa9f792a9f33b.1639619851.git.zhuyifei@google.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6d7346c54d83..83da1764fcfe 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1277,7 +1277,7 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); -static __always_inline u32 +static __always_inline int BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, const void *ctx, bpf_prog_run_fn run_prog, u32 *ret_flags) @@ -1287,7 +1287,7 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_cg_run_ctx run_ctx; - u32 ret = 1; + int ret = 0; u32 func_ret; migrate_disable(); @@ -1298,7 +1298,8 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, while ((prog = READ_ONCE(item->prog))) { run_ctx.prog_item = item; func_ret = run_prog(prog, ctx); - ret &= (func_ret & 1); + if (!(func_ret & 1)) + ret = -EPERM; *(ret_flags) |= (func_ret >> 1); item++; } @@ -1308,7 +1309,7 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, return ret; } -static __always_inline u32 +static __always_inline int BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, const void *ctx, bpf_prog_run_fn run_prog) { @@ -1317,7 +1318,7 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_cg_run_ctx run_ctx; - u32 ret = 1; + int ret = 0; migrate_disable(); rcu_read_lock(); @@ -1326,7 +1327,8 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); while ((prog = READ_ONCE(item->prog))) { run_ctx.prog_item = item; - ret &= run_prog(prog, ctx); + if (!run_prog(prog, ctx)) + ret = -EPERM; item++; } bpf_reset_run_ctx(old_run_ctx); @@ -1394,7 +1396,7 @@ out: u32 _ret; \ _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \ _cn = _flags & BPF_RET_SET_CN; \ - if (_ret) \ + if (!_ret) \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ else \ _ret = (_cn ? NET_XMIT_DROP : -EPERM); \ -- cgit v1.2.3 From c4dcfdd406aa2167396ac215e351e5e4dfd7efe3 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Thu, 16 Dec 2021 02:04:26 +0000 Subject: bpf: Move getsockopt retval to struct bpf_cg_run_ctx The retval value is moved to struct bpf_cg_run_ctx for ease of access in different prog types with different context structs layouts. The helper implementation (to be added in a later patch in the series) can simply perform a container_of from current->bpf_ctx to retrieve bpf_cg_run_ctx. Unfortunately, there is no easy way to access the current task_struct via the verifier BPF bytecode rewrite, aside from possibly calling a helper, so a pointer to current task is added to struct bpf_sockopt_kern so that the rewritten BPF bytecode can access struct bpf_cg_run_ctx with an indirection. For backward compatibility, if a getsockopt program rejects a syscall by returning 0, an -EPERM will be generated, by having the BPF_PROG_RUN_ARRAY_CG family macros automatically set the retval to -EPERM. Unlike prior to this patch, this -EPERM will be visible to ctx->retval for any other hooks down the line in the prog array. Additionally, the restriction that getsockopt filters can only set the retval to 0 is removed, considering that certain getsockopt implementations may return optlen. Filters are now able to set the value arbitrarily. Signed-off-by: YiFei Zhu Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/r/73b0325f5c29912ccea7ea57ec1ed4d388fc1d37.1639619851.git.zhuyifei@google.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 20 +++++++++++--------- include/linux/filter.h | 5 ++++- 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 83da1764fcfe..7b0c11f414d0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1245,6 +1245,7 @@ struct bpf_run_ctx {}; struct bpf_cg_run_ctx { struct bpf_run_ctx run_ctx; const struct bpf_prog_array_item *prog_item; + int retval; }; struct bpf_trace_run_ctx { @@ -1280,16 +1281,16 @@ typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); static __always_inline int BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, const void *ctx, bpf_prog_run_fn run_prog, - u32 *ret_flags) + int retval, u32 *ret_flags) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_cg_run_ctx run_ctx; - int ret = 0; u32 func_ret; + run_ctx.retval = retval; migrate_disable(); rcu_read_lock(); array = rcu_dereference(array_rcu); @@ -1299,27 +1300,28 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, run_ctx.prog_item = item; func_ret = run_prog(prog, ctx); if (!(func_ret & 1)) - ret = -EPERM; + run_ctx.retval = -EPERM; *(ret_flags) |= (func_ret >> 1); item++; } bpf_reset_run_ctx(old_run_ctx); rcu_read_unlock(); migrate_enable(); - return ret; + return run_ctx.retval; } static __always_inline int BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, - const void *ctx, bpf_prog_run_fn run_prog) + const void *ctx, bpf_prog_run_fn run_prog, + int retval) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_cg_run_ctx run_ctx; - int ret = 0; + run_ctx.retval = retval; migrate_disable(); rcu_read_lock(); array = rcu_dereference(array_rcu); @@ -1328,13 +1330,13 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, while ((prog = READ_ONCE(item->prog))) { run_ctx.prog_item = item; if (!run_prog(prog, ctx)) - ret = -EPERM; + run_ctx.retval = -EPERM; item++; } bpf_reset_run_ctx(old_run_ctx); rcu_read_unlock(); migrate_enable(); - return ret; + return run_ctx.retval; } static __always_inline u32 @@ -1394,7 +1396,7 @@ out: u32 _flags = 0; \ bool _cn; \ u32 _ret; \ - _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \ + _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \ _cn = _flags & BPF_RET_SET_CN; \ if (!_ret) \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ diff --git a/include/linux/filter.h b/include/linux/filter.h index 71fa57b88bfc..d23e999dc032 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern { s32 level; s32 optname; s32 optlen; - s32 retval; + /* for retval in struct bpf_cg_run_ctx */ + struct task_struct *current_task; + /* Temporary "register" for indirect stores to ppos. */ + u64 tmp_reg; }; int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len); -- cgit v1.2.3 From b44123b4a3dcad4664d3a0f72c011ffd4c9c4d93 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Thu, 16 Dec 2021 02:04:27 +0000 Subject: bpf: Add cgroup helpers bpf_{get,set}_retval to get/set syscall return value The helpers continue to use int for retval because all the hooks are int-returning rather than long-returning. The return value of bpf_set_retval is int for future-proofing, in case in the future there may be errors trying to set the retval. After the previous patch, if a program rejects a syscall by returning 0, an -EPERM will be generated no matter if the retval is already set to -err. This patch change it being forced only if retval is not -err. This is because we want to support, for example, invoking bpf_set_retval(-EINVAL) and return 0, and have the syscall return value be -EINVAL not -EPERM. For BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY, the prior behavior is that, if the return value is NET_XMIT_DROP, the packet is silently dropped. We preserve this behavior for backward compatibility reasons, so even if an errno is set, the errno does not return to caller. However, setting a non-err to retval cannot propagate so this is not allowed and we return a -EFAULT in that case. Signed-off-by: YiFei Zhu Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/r/b4013fd5d16bed0b01977c1fafdeae12e1de61fb.1639619851.git.zhuyifei@google.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 10 ++++++---- include/uapi/linux/bpf.h | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7b0c11f414d0..dce54eb0aae8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1299,7 +1299,7 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, while ((prog = READ_ONCE(item->prog))) { run_ctx.prog_item = item; func_ret = run_prog(prog, ctx); - if (!(func_ret & 1)) + if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval)) run_ctx.retval = -EPERM; *(ret_flags) |= (func_ret >> 1); item++; @@ -1329,7 +1329,7 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); while ((prog = READ_ONCE(item->prog))) { run_ctx.prog_item = item; - if (!run_prog(prog, ctx)) + if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval)) run_ctx.retval = -EPERM; item++; } @@ -1389,7 +1389,7 @@ out: * 0: NET_XMIT_SUCCESS skb should be transmitted * 1: NET_XMIT_DROP skb should be dropped and cn * 2: NET_XMIT_CN skb should be transmitted and cn - * 3: -EPERM skb should be dropped + * 3: -err skb should be dropped */ #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ ({ \ @@ -1398,10 +1398,12 @@ out: u32 _ret; \ _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \ _cn = _flags & BPF_RET_SET_CN; \ + if (_ret && !IS_ERR_VALUE((long)_ret)) \ + _ret = -EFAULT; \ if (!_ret) \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ else \ - _ret = (_cn ? NET_XMIT_DROP : -EPERM); \ + _ret = (_cn ? NET_XMIT_DROP : _ret); \ _ret; \ }) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a9c96c21330a..fe2272defcd9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5033,6 +5033,22 @@ union bpf_attr { * * Return * The number of arguments of the traced function. + * + * int bpf_get_retval(void) + * Description + * Get the syscall's return value that will be returned to userspace. + * + * This helper is currently supported by cgroup programs only. + * Return + * The syscall's return value. + * + * int bpf_set_retval(int retval) + * Description + * Set the syscall's return value that will be returned to userspace. + * + * This helper is currently supported by cgroup programs only. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5221,6 +5237,8 @@ union bpf_attr { FN(get_func_arg), \ FN(get_func_ret), \ FN(get_func_arg_cnt), \ + FN(get_retval), \ + FN(set_retval), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 3368aa357f3ba133ae65fc26c04d24a1447a3903 Mon Sep 17 00:00:00 2001 From: Manish Mandlik Date: Tue, 11 Jan 2022 08:14:25 -0800 Subject: Bluetooth: msft: Handle MSFT Monitor Device Event Whenever the controller starts/stops monitoring a bt device, it sends MSFT Monitor Device event. Add handler to read this vendor event. Test performed: - Verified by logs that the MSFT Monitor Device event is received from the controller whenever it starts/stops monitoring a device. Signed-off-by: Manish Mandlik Reviewed-by: Miao-chen Chou Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 586f69d084a2..639fb9f57ae7 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -258,6 +258,15 @@ struct adv_info { #define HCI_ADV_TX_POWER_NO_PREFERENCE 0x7F +struct monitored_device { + struct list_head list; + + bdaddr_t bdaddr; + __u8 addr_type; + __u16 handle; + bool notified; +}; + struct adv_pattern { struct list_head list; __u8 ad_type; @@ -591,6 +600,8 @@ struct hci_dev { struct delayed_work interleave_scan; + struct list_head monitored_devices; + #if IS_ENABLED(CONFIG_BT_LEDS) struct led_trigger *power_led; #endif -- cgit v1.2.3 From 8d7f167752c3e4c45a39e76ffa6f7209413d3fa6 Mon Sep 17 00:00:00 2001 From: Manish Mandlik Date: Tue, 11 Jan 2022 08:14:26 -0800 Subject: Bluetooth: mgmt: Add MGMT Adv Monitor Device Found/Lost events This patch introduces two new MGMT events for notifying the bluetoothd whenever the controller starts/stops monitoring a device. Test performed: - Verified by logs that the MSFT Monitor Device is received from the controller and the bluetoothd is notified whenever the controller starts/stops monitoring a device. Signed-off-by: Manish Mandlik Reviewed-by: Miao-chen Chou Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 3 +++ include/net/bluetooth/mgmt.h | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 639fb9f57ae7..21eadb113a31 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -601,6 +601,7 @@ struct hci_dev { struct delayed_work interleave_scan; struct list_head monitored_devices; + bool advmon_pend_notify; #if IS_ENABLED(CONFIG_BT_LEDS) struct led_trigger *power_led; @@ -1858,6 +1859,8 @@ void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status); int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status); +void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, + bdaddr_t *bdaddr, u8 addr_type); u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 107b25deae68..99266f7aebdc 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -1104,3 +1104,19 @@ struct mgmt_ev_controller_resume { #define MGMT_WAKE_REASON_NON_BT_WAKE 0x0 #define MGMT_WAKE_REASON_UNEXPECTED 0x1 #define MGMT_WAKE_REASON_REMOTE_WAKE 0x2 + +#define MGMT_EV_ADV_MONITOR_DEVICE_FOUND 0x002f +struct mgmt_ev_adv_monitor_device_found { + __le16 monitor_handle; + struct mgmt_addr_info addr; + __s8 rssi; + __le32 flags; + __le16 eir_len; + __u8 eir[0]; +} __packed; + +#define MGMT_EV_ADV_MONITOR_DEVICE_LOST 0x0030 +struct mgmt_ev_adv_monitor_device_lost { + __le16 monitor_handle; + struct mgmt_addr_info addr; +} __packed; -- cgit v1.2.3 From 5298d4bfe80f6ae6ae2777bcd1357b0022d98573 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Jan 2022 07:56:14 +0100 Subject: unicode: clean up the Kconfig symbol confusion Turn the CONFIG_UNICODE symbol into a tristate that generates some always built in code and remove the confusing CONFIG_UNICODE_UTF8_DATA symbol. Note that a lot of the IS_ENABLED() checks could be turned from cpp statements into normal ifs, but this change is intended to be fairly mechanic, so that should be cleaned up later. Fixes: 2b3d04787012 ("unicode: Add utf8-data module") Reported-by: Linus Torvalds Reviewed-by: Eric Biggers Signed-off-by: Christoph Hellwig Signed-off-by: Gabriel Krisman Bertazi --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index c8510da6cc6d..fdac22d16c2b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1490,7 +1490,7 @@ struct super_block { #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) struct unicode_map *s_encoding; __u16 s_encoding_flags; #endif -- cgit v1.2.3 From 748cd5729ac7421091316e32dcdffb0578563880 Mon Sep 17 00:00:00 2001 From: Di Zhu Date: Wed, 19 Jan 2022 09:40:04 +0800 Subject: bpf: support BPF_PROG_QUERY for progs attached to sockmap Right now there is no way to query whether BPF programs are attached to a sockmap or not. we can use the standard interface in libbpf to query, such as: bpf_prog_query(mapFd, BPF_SK_SKB_STREAM_PARSER, 0, NULL, ...); the mapFd is the fd of sockmap. Signed-off-by: Di Zhu Acked-by: Yonghong Song Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20220119014005.1209-1-zhudi2@huawei.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index dce54eb0aae8..80e3387ea3af 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2069,6 +2069,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog, int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); +int sock_map_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr); + void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else @@ -2122,6 +2125,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void { return -EOPNOTSUPP; } + +static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EINVAL; +} #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ -- cgit v1.2.3 From d16697cb6261d4cc23422e6b1cb2759df8aa76d0 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 21 Jan 2022 11:09:44 +0100 Subject: net: skbuff: add size metadata to skb_shared_info for xdp Introduce xdp_frags_size field in skb_shared_info data structure to store xdp_buff/xdp_frame frame paged size (xdp_frags_size will be used in xdp frags support). In order to not increase skb_shared_info size we will use a hole due to skb_shared_info alignment. Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Acked-by: Jesper Dangaard Brouer Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/8a849819a3e0a143d540f78a3a5add76e17e980d.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bf11e1fbd69b..8131d0de7559 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -557,6 +557,7 @@ struct skb_shared_info { * Warning : all fields before dataref are cleared in __alloc_skb() */ atomic_t dataref; + unsigned int xdp_frags_size; /* Intermediate layers must ensure that destructor_arg * remains valid until skb destructor */ -- cgit v1.2.3 From 2e88d4ff03013937028f5397268b21e10cf68713 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 21 Jan 2022 11:09:45 +0100 Subject: xdp: introduce flags field in xdp_buff/xdp_frame Introduce flags field in xdp_frame and xdp_buffer data structures to define additional buffer features. At the moment the only supported buffer feature is frags bit (XDP_FLAGS_HAS_FRAGS). frags bit is used to specify if this is a linear buffer (XDP_FLAGS_HAS_FRAGS not set) or a frags frame (XDP_FLAGS_HAS_FRAGS set). In the latter case the driver is expected to initialize the skb_shared_info structure at the end of the first buffer to link together subsequent buffers belonging to the same frame. Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Acked-by: Jesper Dangaard Brouer Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/e389f14f3a162c0a5bc6a2e1aa8dd01a90be117d.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/net/xdp.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 8f0812e4996d..485e9495a690 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -66,6 +66,10 @@ struct xdp_txq_info { struct net_device *dev; }; +enum xdp_buff_flags { + XDP_FLAGS_HAS_FRAGS = BIT(0), /* non-linear xdp buff */ +}; + struct xdp_buff { void *data; void *data_end; @@ -74,13 +78,30 @@ struct xdp_buff { struct xdp_rxq_info *rxq; struct xdp_txq_info *txq; u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ + u32 flags; /* supported values defined in xdp_buff_flags */ }; +static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp) +{ + return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS); +} + +static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp) +{ + xdp->flags |= XDP_FLAGS_HAS_FRAGS; +} + +static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp) +{ + xdp->flags &= ~XDP_FLAGS_HAS_FRAGS; +} + static __always_inline void xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) { xdp->frame_sz = frame_sz; xdp->rxq = rxq; + xdp->flags = 0; } static __always_inline void @@ -122,8 +143,14 @@ struct xdp_frame { */ struct xdp_mem_info mem; struct net_device *dev_rx; /* used by cpumap */ + u32 flags; /* supported values defined in xdp_buff_flags */ }; +static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame) +{ + return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); +} + #define XDP_BULK_QUEUE_SIZE 16 struct xdp_frame_bulk { int count; @@ -180,6 +207,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp) xdp->data_end = frame->data + frame->len; xdp->data_meta = frame->data - frame->metasize; xdp->frame_sz = frame->frame_sz; + xdp->flags = frame->flags; } static inline @@ -206,6 +234,7 @@ int xdp_update_frame_from_buff(struct xdp_buff *xdp, xdp_frame->headroom = headroom - sizeof(*xdp_frame); xdp_frame->metasize = metasize; xdp_frame->frame_sz = xdp->frame_sz; + xdp_frame->flags = xdp->flags; return 0; } -- cgit v1.2.3 From d65a1906b31246492449eafe9cace188cb59e26c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 21 Jan 2022 11:09:48 +0100 Subject: net: xdp: add xdp_update_skb_shared_info utility routine Introduce xdp_update_skb_shared_info routine to update frags array metadata in skb_shared_info data structure converting to a skb from a xdp_buff or xdp_frame. According to the current skb_shared_info architecture in xdp_frame/xdp_buff and to the xdp frags support, there is no need to run skb_add_rx_frag() and reset frags array converting the buffer to a skb since the frag array will be in the same position for xdp_buff/xdp_frame and for the skb, we just need to update memory metadata. Introduce XDP_FLAGS_PF_MEMALLOC flag in xdp_buff_flags in order to mark the xdp_buff or xdp_frame as under memory-pressure if pages of the frags array are under memory pressure. Doing so we can avoid looping over all fragments in xdp_update_skb_shared_info routine. The driver is expected to set the flag constructing the xdp_buffer using xdp_buff_set_frag_pfmemalloc utility routine. Rely on xdp_update_skb_shared_info in __xdp_build_skb_from_frame routine converting the non-linear xdp_frame to a skb after performing a XDP_REDIRECT. Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Acked-by: Jesper Dangaard Brouer Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/bfd23fb8a8d7438724f7819c567cdf99ffd6226f.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/net/xdp.h | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 485e9495a690..1f8641ec658e 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -67,7 +67,10 @@ struct xdp_txq_info { }; enum xdp_buff_flags { - XDP_FLAGS_HAS_FRAGS = BIT(0), /* non-linear xdp buff */ + XDP_FLAGS_HAS_FRAGS = BIT(0), /* non-linear xdp buff */ + XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under + * pressure + */ }; struct xdp_buff { @@ -96,6 +99,16 @@ static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp) xdp->flags &= ~XDP_FLAGS_HAS_FRAGS; } +static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp) +{ + return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); +} + +static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) +{ + xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; +} + static __always_inline void xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) { @@ -151,6 +164,11 @@ static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame) return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); } +static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame) +{ + return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); +} + #define XDP_BULK_QUEUE_SIZE 16 struct xdp_frame_bulk { int count; @@ -186,6 +204,19 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame) frame->dev_rx = NULL; } +static inline void +xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, + unsigned int size, unsigned int truesize, + bool pfmemalloc) +{ + skb_shinfo(skb)->nr_frags = nr_frags; + + skb->len += size; + skb->data_len += size; + skb->truesize += truesize; + skb->pfmemalloc |= pfmemalloc; +} + /* Avoids inlining WARN macro in fast-path */ void xdp_warn(const char *msg, const char *func, const int line); #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__) -- cgit v1.2.3 From 7c48cb0176c6d6d3b55029f7ff4ffa05faee6446 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 21 Jan 2022 11:09:50 +0100 Subject: xdp: add frags support to xdp_return_{buff/frame} Take into account if the received xdp_buff/xdp_frame is non-linear recycling/returning the frame memory to the allocator or into xdp_frame_bulk. Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/a961069febc868508ce1bdf5e53a343eb4e57cb2.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/net/xdp.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 1f8641ec658e..8463dea8b4db 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -306,10 +306,24 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem); static inline void xdp_release_frame(struct xdp_frame *xdpf) { struct xdp_mem_info *mem = &xdpf->mem; + struct skb_shared_info *sinfo; + int i; /* Curr only page_pool needs this */ - if (mem->type == MEM_TYPE_PAGE_POOL) - __xdp_release_frame(xdpf->data, mem); + if (mem->type != MEM_TYPE_PAGE_POOL) + return; + + if (likely(!xdp_frame_has_frags(xdpf))) + goto out; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + for (i = 0; i < sinfo->nr_frags; i++) { + struct page *page = skb_frag_page(&sinfo->frags[i]); + + __xdp_release_frame(page_address(page), mem); + } +out: + __xdp_release_frame(xdpf->data, mem); } int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, -- cgit v1.2.3 From c2f2cdbeffda7b153c19e0f3d73149c41026c0db Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 21 Jan 2022 11:09:52 +0100 Subject: bpf: introduce BPF_F_XDP_HAS_FRAGS flag in prog_flags loading the ebpf program Introduce BPF_F_XDP_HAS_FRAGS and the related field in bpf_prog_aux in order to notify the driver the loaded program support xdp frags. Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/db2e8075b7032a356003f407d1b0deb99adaa0ed.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 80e3387ea3af..e93ed028a030 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -933,6 +933,7 @@ struct bpf_prog_aux { bool func_proto_unreliable; bool sleepable; bool tail_call_reachable; + bool xdp_has_frags; struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index fe2272defcd9..945649c67e03 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1113,6 +1113,11 @@ enum bpf_link_type { */ #define BPF_F_SLEEPABLE (1U << 4) +/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program + * fully support xdp frags. + */ +#define BPF_F_XDP_HAS_FRAGS (1U << 5) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * -- cgit v1.2.3 From 0165cc817075cf701e4289838f1d925ff1911b3e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 21 Jan 2022 11:09:54 +0100 Subject: bpf: introduce bpf_xdp_get_buff_len helper Introduce bpf_xdp_get_buff_len helper in order to return the xdp buffer total size (linear and paged area) Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/aac9ac3504c84026cf66a3c71b7c5ae89bc991be.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/net/xdp.h | 14 ++++++++++++++ include/uapi/linux/bpf.h | 7 +++++++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 8463dea8b4db..52b593321956 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -145,6 +145,20 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp) return (struct skb_shared_info *)xdp_data_hard_end(xdp); } +static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp) +{ + unsigned int len = xdp->data_end - xdp->data; + struct skb_shared_info *sinfo; + + if (likely(!xdp_buff_has_frags(xdp))) + goto out; + + sinfo = xdp_get_shared_info_from_buff(xdp); + len += sinfo->xdp_frags_size; +out: + return len; +} + struct xdp_frame { void *data; u16 len; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 945649c67e03..5a28772063f6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5054,6 +5054,12 @@ union bpf_attr { * This helper is currently supported by cgroup programs only. * Return * 0 on success, or a negative error in case of failure. + * + * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md) + * Description + * Get the total size of a given xdp buff (linear and paged area) + * Return + * The total size of a given xdp buffer. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5244,6 +5250,7 @@ union bpf_attr { FN(get_func_arg_cnt), \ FN(get_retval), \ FN(set_retval), \ + FN(xdp_get_buff_len), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From bf25146a5595269810b1f47d048f114c5ff9f544 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Fri, 21 Jan 2022 11:09:55 +0100 Subject: bpf: add frags support to the bpf_xdp_adjust_tail() API This change adds support for tail growing and shrinking for XDP frags. When called on a non-linear packet with a grow request, it will work on the last fragment of the packet. So the maximum grow size is the last fragments tailroom, i.e. no new buffer will be allocated. A XDP frags capable driver is expected to set frag_size in xdp_rxq_info data structure to notify the XDP core the fragment size. frag_size set to 0 is interpreted by the XDP core as tail growing is not allowed. Introduce __xdp_rxq_info_reg utility routine to initialize frag_size field. When shrinking, it will work from the last fragment, all the way down to the base buffer depending on the shrinking size. It's important to mention that once you shrink down the fragment(s) are freed, so you can not grow again to the original size. Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Acked-by: Jakub Kicinski Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: Eelco Chaudron Link: https://lore.kernel.org/r/eabda3485dda4f2f158b477729337327e609461d.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/net/xdp.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 52b593321956..b7721c3e4d1f 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -60,6 +60,7 @@ struct xdp_rxq_info { u32 reg_state; struct xdp_mem_info mem; unsigned int napi_id; + u32 frag_size; } ____cacheline_aligned; /* perf critical, avoid false-sharing */ struct xdp_txq_info { @@ -304,6 +305,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) return xdp_frame; } +void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, + struct xdp_buff *xdp); void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); @@ -340,8 +343,17 @@ out: __xdp_release_frame(xdpf->data, mem); } -int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, - struct net_device *dev, u32 queue_index, unsigned int napi_id); +int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index, + unsigned int napi_id, u32 frag_size); +static inline int +xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index, + unsigned int napi_id) +{ + return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0); +} + void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); -- cgit v1.2.3 From f45d5b6ce2e835834c94b8b700787984f02cd662 Mon Sep 17 00:00:00 2001 From: Toke Hoiland-Jorgensen Date: Fri, 21 Jan 2022 11:10:02 +0100 Subject: bpf: generalise tail call map compatibility check The check for tail call map compatibility ensures that tail calls only happen between maps of the same type. To ensure backwards compatibility for XDP frags we need a similar type of check for cpumap and devmap programs, so move the state from bpf_array_aux into bpf_map, add xdp_has_frags to the check, and apply the same check to cpumap and devmap. Acked-by: John Fastabend Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: Toke Hoiland-Jorgensen Link: https://lore.kernel.org/r/f19fd97c0328a39927f3ad03e1ca6b43fd53cdfd.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e93ed028a030..e8ec8d2f2fe3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -194,6 +194,17 @@ struct bpf_map { struct work_struct work; struct mutex freeze_mutex; atomic64_t writecnt; + /* 'Ownership' of program-containing map is claimed by the first program + * that is going to use this map or by the first program which FD is + * stored in the map to make sure that all callers and callees have the + * same prog type, JITed flag and xdp_has_frags flag. + */ + struct { + spinlock_t lock; + enum bpf_prog_type type; + bool jited; + bool xdp_has_frags; + } owner; }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) @@ -994,16 +1005,6 @@ struct bpf_prog_aux { }; struct bpf_array_aux { - /* 'Ownership' of prog array is claimed by the first program that - * is going to use this map or by the first program which FD is - * stored in the map to make sure that all callers and callees have - * the same prog type and JITed flag. - */ - struct { - spinlock_t lock; - enum bpf_prog_type type; - bool jited; - } owner; /* Programs with direct jumps into programs part of this array. */ struct list_head poke_progs; struct bpf_map *map; @@ -1178,7 +1179,14 @@ struct bpf_event_entry { struct rcu_head rcu; }; -bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); +static inline bool map_type_contains_progs(struct bpf_map *map) +{ + return map->map_type == BPF_MAP_TYPE_PROG_ARRAY || + map->map_type == BPF_MAP_TYPE_DEVMAP || + map->map_type == BPF_MAP_TYPE_CPUMAP; +} + +bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp); int bpf_prog_calc_tag(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); -- cgit v1.2.3 From 3f364222d032eea6b245780e845ad213dab28cdd Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 21 Jan 2022 11:10:03 +0100 Subject: net: xdp: introduce bpf_xdp_pointer utility routine Similar to skb_header_pointer, introduce bpf_xdp_pointer utility routine to return a pointer to a given position in the xdp_buff if the requested area (offset + len) is contained in a contiguous memory area otherwise it will be copied in a bounce buffer provided by the caller. Similar to the tc counterpart, introduce the two following xdp helpers: - bpf_xdp_load_bytes - bpf_xdp_store_bytes Reviewed-by: Eelco Chaudron Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Acked-by: Jakub Kicinski Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/ab285c1efdd5b7a9d361348b1e7d3ef49f6382b3.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5a28772063f6..16a7574292a5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5060,6 +5060,22 @@ union bpf_attr { * Get the total size of a given xdp buff (linear and paged area) * Return * The total size of a given xdp buffer. + * + * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * This helper is provided as an easy way to load data from a + * xdp buffer. It can be used to load *len* bytes from *offset* from + * the frame associated to *xdp_md*, into the buffer pointed by + * *buf*. + * Return + * 0 on success, or a negative error in case of failure. + * + * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * Store *len* bytes from buffer *buf* into the frame + * associated to *xdp_md*, at *offset*. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5251,6 +5267,8 @@ union bpf_attr { FN(get_retval), \ FN(set_retval), \ FN(xdp_get_buff_len), \ + FN(xdp_load_bytes), \ + FN(xdp_store_bytes), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 96489c1c0b53131b0e1ec33e2060538379ad6152 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 16 Dec 2021 12:16:38 +0100 Subject: mtd: nand: ecc: Add infrastructure to support hardware engines Add the necessary helpers to register/unregister hardware ECC engines that will be called from ECC engine drivers. Also add helpers to get the right engine from the user perspective. Keep a reference of the in use ECC engine in order to prevent modules to be unloaded. Put the reference when the engine gets retired. A static list of hardware (only) ECC engines is setup to keep track of the registered engines. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211216111654.238086-13-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 32fc7edf65b3..4ddd20fe9c9e 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -263,12 +263,36 @@ struct nand_ecc_engine_ops { struct nand_page_io_req *req); }; +/** + * enum nand_ecc_engine_integration - How the NAND ECC engine is integrated + * @NAND_ECC_ENGINE_INTEGRATION_INVALID: Invalid value + * @NAND_ECC_ENGINE_INTEGRATION_PIPELINED: Pipelined engine, performs on-the-fly + * correction, does not need to copy + * data around + * @NAND_ECC_ENGINE_INTEGRATION_EXTERNAL: External engine, needs to bring the + * data into its own area before use + */ +enum nand_ecc_engine_integration { + NAND_ECC_ENGINE_INTEGRATION_INVALID, + NAND_ECC_ENGINE_INTEGRATION_PIPELINED, + NAND_ECC_ENGINE_INTEGRATION_EXTERNAL, +}; + /** * struct nand_ecc_engine - ECC engine abstraction for NAND devices + * @dev: Host device + * @node: Private field for registration time * @ops: ECC engine operations + * @integration: How the engine is integrated with the host + * (only relevant on %NAND_ECC_ENGINE_TYPE_ON_HOST engines) + * @priv: Private data */ struct nand_ecc_engine { + struct device *dev; + struct list_head node; struct nand_ecc_engine_ops *ops; + enum nand_ecc_engine_integration integration; + void *priv; }; void of_get_nand_ecc_user_config(struct nand_device *nand); @@ -279,8 +303,12 @@ int nand_ecc_prepare_io_req(struct nand_device *nand, int nand_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req); bool nand_ecc_is_strong_enough(struct nand_device *nand); +int nand_ecc_register_on_host_hw_engine(struct nand_ecc_engine *engine); +int nand_ecc_unregister_on_host_hw_engine(struct nand_ecc_engine *engine); struct nand_ecc_engine *nand_ecc_get_sw_engine(struct nand_device *nand); struct nand_ecc_engine *nand_ecc_get_on_die_hw_engine(struct nand_device *nand); +struct nand_ecc_engine *nand_ecc_get_on_host_hw_engine(struct nand_device *nand); +void nand_ecc_put_on_host_hw_engine(struct nand_device *nand); #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_HAMMING) struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void); -- cgit v1.2.3 From cda32a618debd3fad8e42757b198719ae180f8f4 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 16 Dec 2021 12:16:39 +0100 Subject: mtd: nand: Add a new helper to retrieve the ECC context Introduce nand_to_ecc_ctx() which will allow to easily jump to the private pointer of an ECC context given a NAND device. This is very handy, from the prepare or finish ECC hook, to get the internal context out of the NAND device object. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211216111654.238086-14-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 4ddd20fe9c9e..b617efa0a881 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -990,6 +990,11 @@ int nanddev_markbad(struct nand_device *nand, const struct nand_pos *pos); int nanddev_ecc_engine_init(struct nand_device *nand); void nanddev_ecc_engine_cleanup(struct nand_device *nand); +static inline void *nand_to_ecc_ctx(struct nand_device *nand) +{ + return nand->ecc.ctx.priv; +} + /* BBT related functions */ enum nand_bbt_block_status { NAND_BBT_BLOCK_STATUS_UNKNOWN, -- cgit v1.2.3 From 02d1d0e4dfc3fdc5aa05b78e7def00dc1e62257e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 7 Jan 2022 10:46:11 -0800 Subject: mtd: rawnand: brcmnand: Add platform data structure for BCMA Update the BCMA's chipcommon nand flash driver to detect which chip-select is used and pass that information via platform data to the brcmnand driver. Make sure that the brcmnand platform data structure is always at the beginning of the platform data of the "nflash" device created by BCMA to allow brcmnand to safely de-reference it. Signed-off-by: Florian Fainelli Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220107184614.2670254-7-f.fainelli@gmail.com --- include/linux/bcma/bcma_driver_chipcommon.h | 5 +++++ include/linux/platform_data/brcmnand.h | 12 ++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 include/linux/platform_data/brcmnand.h (limited to 'include') diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index d35b9206096d..e3314f746bfa 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -3,6 +3,7 @@ #define LINUX_BCMA_DRIVER_CC_H_ #include +#include #include /** ChipCommon core registers. **/ @@ -599,6 +600,10 @@ struct bcma_sflash { #ifdef CONFIG_BCMA_NFLASH struct bcma_nflash { + /* Must be the fist member for the brcmnand driver to + * de-reference that structure. + */ + struct brcmnand_platform_data brcmnand_info; bool present; bool boot; /* This is the flash the SoC boots from */ }; diff --git a/include/linux/platform_data/brcmnand.h b/include/linux/platform_data/brcmnand.h new file mode 100644 index 000000000000..8b8777985dce --- /dev/null +++ b/include/linux/platform_data/brcmnand.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef BRCMNAND_PLAT_DATA_H +#define BRCMNAND_PLAT_DATA_H + +struct brcmnand_platform_data { + int chip_select; + const char * const *part_probe_types; + unsigned int ecc_stepsize; + unsigned int ecc_strength; +}; + +#endif /* BRCMNAND_PLAT_DATA_H */ -- cgit v1.2.3 From 1e73d7f689c7a8fa13f78fe8d6be908fdceef17a Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 15 Dec 2021 16:13:35 +0100 Subject: iio: core: Fix the kernel doc regarding the currentmode iio_dev entry This is an internal variable, which should be accessed in a very sporadic way and in no case changed by any device driver. Signed-off-by: Miquel Raynal Reviewed-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20211215151344.163036-2-miquel.raynal@bootlin.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 07025d6b3de1..faf00f2c0be6 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -489,7 +489,7 @@ struct iio_buffer_setup_ops { /** * struct iio_dev - industrial I/O device * @modes: [DRIVER] operating modes supported by device - * @currentmode: [DRIVER] current operating mode + * @currentmode: [INTERN] current operating mode * @dev: [DRIVER] device structure, should be assigned a parent * and owner * @buffer: [DRIVER] any buffer present -- cgit v1.2.3 From da5936770517aef8b28888f1123fa654c78cc2f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Sat, 22 Jan 2022 14:09:04 +0100 Subject: adis: simplify 'adis_update_bits' macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no need to use '__builtin_choose_expr' to choose the right call to 'adis_update_bits_base()'. We can change the 'BUILD_BUG_ON()' condition so that it makes sure only the supported sizes are passed in. With that, we can just use 'sizeof(val)' as the size argument of 'adis_update_bits_base()'. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20220122130905.99-2-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 7c02f5292eea..11754f97d8bb 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -381,10 +381,8 @@ static inline int adis_update_bits_base(struct adis *adis, unsigned int reg, * @val can lead to undesired behavior if the register to update is 16bit. */ #define adis_update_bits(adis, reg, mask, val) ({ \ - BUILD_BUG_ON(sizeof(val) == 1 || sizeof(val) == 8); \ - __builtin_choose_expr(sizeof(val) == 4, \ - adis_update_bits_base(adis, reg, mask, val, 4), \ - adis_update_bits_base(adis, reg, mask, val, 2)); \ + BUILD_BUG_ON(sizeof(val) != 2 && sizeof(val) != 4); \ + adis_update_bits_base(adis, reg, mask, val, sizeof(val)); \ }) /** @@ -399,10 +397,8 @@ static inline int adis_update_bits_base(struct adis *adis, unsigned int reg, * @val can lead to undesired behavior if the register to update is 16bit. */ #define __adis_update_bits(adis, reg, mask, val) ({ \ - BUILD_BUG_ON(sizeof(val) == 1 || sizeof(val) == 8); \ - __builtin_choose_expr(sizeof(val) == 4, \ - __adis_update_bits_base(adis, reg, mask, val, 4), \ - __adis_update_bits_base(adis, reg, mask, val, 2)); \ + BUILD_BUG_ON(sizeof(val) != 2 && sizeof(val) != 4); \ + __adis_update_bits_base(adis, reg, mask, val, sizeof(val)); \ }) int adis_enable_irq(struct adis *adis, bool enable); -- cgit v1.2.3 From c39010ea6ba13bdf0003bd353e1d4c663aaac0a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Sat, 22 Jan 2022 14:09:05 +0100 Subject: iio: adis: stylistic changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Minor stylistic changes to address checkptach complains when called with '--strict'. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20220122130905.99-3-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 48 +++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 11754f97d8bb..515ca09764fe 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -32,6 +32,7 @@ struct adis_timeout { u16 sw_reset_ms; u16 self_test_ms; }; + /** * struct adis_data - ADIS chip variant specific data * @read_delay: SPI delay for read operations in us @@ -45,7 +46,7 @@ struct adis_timeout { * @self_test_mask: Bitmask of supported self-test operations * @self_test_reg: Register address to request self test command * @self_test_no_autoclear: True if device's self-test needs clear of ctrl reg - * @status_error_msgs: Array of error messgaes + * @status_error_msgs: Array of error messages * @status_error_mask: Bitmask of errors supported by the device * @timeouts: Chip specific delays * @enable_irq: Hook for ADIS devices that have a special IRQ enable/disable @@ -130,12 +131,12 @@ struct adis { unsigned long irq_flag; void *buffer; - uint8_t tx[10] ____cacheline_aligned; - uint8_t rx[4]; + u8 tx[10] ____cacheline_aligned; + u8 rx[4]; }; int adis_init(struct adis *adis, struct iio_dev *indio_dev, - struct spi_device *spi, const struct adis_data *data); + struct spi_device *spi, const struct adis_data *data); int __adis_reset(struct adis *adis); /** @@ -156,9 +157,9 @@ static inline int adis_reset(struct adis *adis) } int __adis_write_reg(struct adis *adis, unsigned int reg, - unsigned int val, unsigned int size); + unsigned int val, unsigned int size); int __adis_read_reg(struct adis *adis, unsigned int reg, - unsigned int *val, unsigned int size); + unsigned int *val, unsigned int size); /** * __adis_write_reg_8() - Write single byte to a register (unlocked) @@ -167,7 +168,7 @@ int __adis_read_reg(struct adis *adis, unsigned int reg, * @value: The value to write */ static inline int __adis_write_reg_8(struct adis *adis, unsigned int reg, - uint8_t val) + u8 val) { return __adis_write_reg(adis, reg, val, 1); } @@ -179,7 +180,7 @@ static inline int __adis_write_reg_8(struct adis *adis, unsigned int reg, * @value: Value to be written */ static inline int __adis_write_reg_16(struct adis *adis, unsigned int reg, - uint16_t val) + u16 val) { return __adis_write_reg(adis, reg, val, 2); } @@ -191,7 +192,7 @@ static inline int __adis_write_reg_16(struct adis *adis, unsigned int reg, * @value: Value to be written */ static inline int __adis_write_reg_32(struct adis *adis, unsigned int reg, - uint32_t val) + u32 val) { return __adis_write_reg(adis, reg, val, 4); } @@ -203,7 +204,7 @@ static inline int __adis_write_reg_32(struct adis *adis, unsigned int reg, * @val: The value read back from the device */ static inline int __adis_read_reg_16(struct adis *adis, unsigned int reg, - uint16_t *val) + u16 *val) { unsigned int tmp; int ret; @@ -222,7 +223,7 @@ static inline int __adis_read_reg_16(struct adis *adis, unsigned int reg, * @val: The value read back from the device */ static inline int __adis_read_reg_32(struct adis *adis, unsigned int reg, - uint32_t *val) + u32 *val) { unsigned int tmp; int ret; @@ -242,7 +243,7 @@ static inline int __adis_read_reg_32(struct adis *adis, unsigned int reg, * @size: The size of the @value (in bytes) */ static inline int adis_write_reg(struct adis *adis, unsigned int reg, - unsigned int val, unsigned int size) + unsigned int val, unsigned int size) { int ret; @@ -261,7 +262,7 @@ static inline int adis_write_reg(struct adis *adis, unsigned int reg, * @size: The size of the @val buffer */ static int adis_read_reg(struct adis *adis, unsigned int reg, - unsigned int *val, unsigned int size) + unsigned int *val, unsigned int size) { int ret; @@ -279,7 +280,7 @@ static int adis_read_reg(struct adis *adis, unsigned int reg, * @value: The value to write */ static inline int adis_write_reg_8(struct adis *adis, unsigned int reg, - uint8_t val) + u8 val) { return adis_write_reg(adis, reg, val, 1); } @@ -291,7 +292,7 @@ static inline int adis_write_reg_8(struct adis *adis, unsigned int reg, * @value: Value to be written */ static inline int adis_write_reg_16(struct adis *adis, unsigned int reg, - uint16_t val) + u16 val) { return adis_write_reg(adis, reg, val, 2); } @@ -303,7 +304,7 @@ static inline int adis_write_reg_16(struct adis *adis, unsigned int reg, * @value: Value to be written */ static inline int adis_write_reg_32(struct adis *adis, unsigned int reg, - uint32_t val) + u32 val) { return adis_write_reg(adis, reg, val, 4); } @@ -315,7 +316,7 @@ static inline int adis_write_reg_32(struct adis *adis, unsigned int reg, * @val: The value read back from the device */ static inline int adis_read_reg_16(struct adis *adis, unsigned int reg, - uint16_t *val) + u16 *val) { unsigned int tmp; int ret; @@ -334,7 +335,7 @@ static inline int adis_read_reg_16(struct adis *adis, unsigned int reg, * @val: The value read back from the device */ static inline int adis_read_reg_32(struct adis *adis, unsigned int reg, - uint32_t *val) + u32 *val) { unsigned int tmp; int ret; @@ -439,8 +440,8 @@ static inline void adis_dev_unlock(struct adis *adis) } int adis_single_conversion(struct iio_dev *indio_dev, - const struct iio_chan_spec *chan, unsigned int error_mask, - int *val); + const struct iio_chan_spec *chan, + unsigned int error_mask, int *val); #define ADIS_VOLTAGE_CHAN(addr, si, chan, name, info_all, bits) { \ .type = IIO_VOLTAGE, \ @@ -489,7 +490,7 @@ int adis_single_conversion(struct iio_dev *indio_dev, .modified = 1, \ .channel2 = IIO_MOD_ ## mod, \ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ - info_sep, \ + (info_sep), \ .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), \ .info_mask_shared_by_all = info_all, \ .address = (addr), \ @@ -523,7 +524,7 @@ devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev, int devm_adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev); int adis_update_scan_mode(struct iio_dev *indio_dev, - const unsigned long *scan_mask); + const unsigned long *scan_mask); #else /* CONFIG_IIO_BUFFER */ @@ -547,7 +548,8 @@ static inline int devm_adis_probe_trigger(struct adis *adis, #ifdef CONFIG_DEBUG_FS int adis_debugfs_reg_access(struct iio_dev *indio_dev, - unsigned int reg, unsigned int writeval, unsigned int *readval); + unsigned int reg, unsigned int writeval, + unsigned int *readval); #else -- cgit v1.2.3 From 7a3b3dc3bb2eeac4adf8b01b1b6b519bcbc48cfc Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Fri, 7 Jan 2022 16:54:54 +0100 Subject: media: hevc: Remove RPS named flags Marking a picture as long-term reference is valid for DPB but not for RPS. Change flag name to match with the description in HEVC spec chapter "8.3.2 Decoding process for reference picture set". PocStCurrBefore, PocStCurrAfter, PocLtCurr lists could be built by the kernel from the DPB entries struct v4l2_hevc_dpb_entry, using the information in the rps field. This way RPS flags becomes useless and are removed. This patch breaks the staging HEVC API because it introduces a new flag, changes a field name in v4l2_hevc_dpb_entry structure and removes V4L2_HEVC_DPB_ENTRY_RPS_* flags. [hverkuil: fixed some typos] Signed-off-by: Benjamin Gaignard Reviewed-by: Jernej Skrabec Reviewed-by: Ezequiel Garcia Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/hevc-ctrls.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/media/hevc-ctrls.h b/include/media/hevc-ctrls.h index ef63bc205756..01ccda48d8c5 100644 --- a/include/media/hevc-ctrls.h +++ b/include/media/hevc-ctrls.h @@ -127,15 +127,13 @@ struct v4l2_ctrl_hevc_pps { __u64 flags; }; -#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE 0x01 -#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER 0x02 -#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR 0x03 +#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE 0x01 #define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 struct v4l2_hevc_dpb_entry { __u64 timestamp; - __u8 rps; + __u8 flags; __u8 field_pic; __u16 pic_order_cnt[2]; __u8 padding[2]; -- cgit v1.2.3 From d49a14a946db0a8e0713aa43034879f967ab75e2 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 13 Jan 2022 12:29:01 +0100 Subject: media: lirc: simplify gap calculation When a driver reports a timeout, no more IR activity will be reported until the next pulse. A space is inserted between the timeout and the next pulse, based on ktime. The timeout reports already a duration, so this duration should not be added to the gap. Otherwise there is no change to the functionality. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/media/rc-core.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/media/rc-core.h b/include/media/rc-core.h index ab9d3b7cd799..33b3f7fcf92e 100644 --- a/include/media/rc-core.h +++ b/include/media/rc-core.h @@ -130,9 +130,7 @@ struct lirc_fh { * @tx_resolution: resolution (in us) of output sampler * @lirc_dev: lirc device * @lirc_cdev: lirc char cdev - * @gap_start: time when gap starts - * @gap_duration: duration of initial gap - * @gap: true if we're in a gap + * @gap_start: start time for gap after timeout if non-zero * @lirc_fh_lock: protects lirc_fh list * @lirc_fh: list of open files * @registered: set to true by rc_register_device(), false by @@ -201,8 +199,6 @@ struct rc_dev { struct device lirc_dev; struct cdev lirc_cdev; ktime_t gap_start; - u64 gap_duration; - bool gap; spinlock_t lirc_fh_lock; struct list_head lirc_fh; #endif -- cgit v1.2.3 From 1dafede34dda19fc2878724145dc16c0b51dc174 Mon Sep 17 00:00:00 2001 From: Keyon Jie Date: Thu, 20 Jan 2022 17:15:28 -0600 Subject: ASoC: SOF: add _D3_PERSISTENT flag to fw_ready message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a bit definition to the fw_ready message, to denote if the FW supports the IMR (Isolated Memory Region) restoring feature. If the bit is set, the driver can skip downloading the firmware again during system resume or runtime resume. Bump the ABI version to 3.19 to make it aligned with FW side. Reviewed-by: Ranjani Sridharan Reviewed-by: Péter Ujfalusi Signed-off-by: Keyon Jie Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20220120231532.196926-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof/info.h | 1 + include/uapi/sound/sof/abi.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/sof/info.h b/include/sound/sof/info.h index 0b7101aef596..65e86e4e9fd8 100644 --- a/include/sound/sof/info.h +++ b/include/sound/sof/info.h @@ -25,6 +25,7 @@ #define SOF_IPC_INFO_LOCKS BIT(1) #define SOF_IPC_INFO_LOCKSV BIT(2) #define SOF_IPC_INFO_GDB BIT(3) +#define SOF_IPC_INFO_D3_PERSISTENT BIT(4) /* extended data types that can be appended onto end of sof_ipc_fw_ready */ enum sof_ipc_ext_data { diff --git a/include/uapi/sound/sof/abi.h b/include/uapi/sound/sof/abi.h index fe2cfae94b45..f4232d289a22 100644 --- a/include/uapi/sound/sof/abi.h +++ b/include/uapi/sound/sof/abi.h @@ -26,7 +26,7 @@ /* SOF ABI version major, minor and patch numbers */ #define SOF_ABI_MAJOR 3 -#define SOF_ABI_MINOR 18 +#define SOF_ABI_MINOR 19 #define SOF_ABI_PATCH 0 /* SOF ABI version number. Format within 32bit word is MMmmmppp */ -- cgit v1.2.3 From 85f856f790b5fd427cb31b3f62755713174da0aa Mon Sep 17 00:00:00 2001 From: Ariel D'Alessandro Date: Mon, 17 Jan 2022 10:21:05 -0300 Subject: ASoC: Rename tlv320aic31xx-micbias.h as tlv320aic31xx.h Let's use a more generic name, so other definitions for tlv320aic31xx can be included. Signed-off-by: Ariel D'Alessandro Acked-by: Rob Herring Link: https://lore.kernel.org/r/20220117132109.283365-2-ariel.dalessandro@collabora.com Signed-off-by: Mark Brown --- include/dt-bindings/sound/tlv320aic31xx-micbias.h | 9 --------- include/dt-bindings/sound/tlv320aic31xx.h | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-) delete mode 100644 include/dt-bindings/sound/tlv320aic31xx-micbias.h create mode 100644 include/dt-bindings/sound/tlv320aic31xx.h (limited to 'include') diff --git a/include/dt-bindings/sound/tlv320aic31xx-micbias.h b/include/dt-bindings/sound/tlv320aic31xx-micbias.h deleted file mode 100644 index c6895a18a455..000000000000 --- a/include/dt-bindings/sound/tlv320aic31xx-micbias.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __DT_TLV320AIC31XX_MICBIAS_H -#define __DT_TLV320AIC31XX_MICBIAS_H - -#define MICBIAS_2_0V 1 -#define MICBIAS_2_5V 2 -#define MICBIAS_AVDDV 3 - -#endif /* __DT_TLV320AIC31XX_MICBIAS_H */ diff --git a/include/dt-bindings/sound/tlv320aic31xx.h b/include/dt-bindings/sound/tlv320aic31xx.h new file mode 100644 index 000000000000..3a845fbba992 --- /dev/null +++ b/include/dt-bindings/sound/tlv320aic31xx.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __DT_TLV320AIC31XX_H +#define __DT_TLV320AIC31XX_H + +#define MICBIAS_2_0V 1 +#define MICBIAS_2_5V 2 +#define MICBIAS_AVDDV 3 + +#endif /* __DT_TLV320AIC31XX_H */ -- cgit v1.2.3 From 6045ffd366283236f0de79c8a0e98ae766e9a8f9 Mon Sep 17 00:00:00 2001 From: Ariel D'Alessandro Date: Mon, 17 Jan 2022 10:21:06 -0300 Subject: ASoC: tlv320aic31xx: Define PLL clock inputs Add constants for the different PLL clock inputs in tlv320aic31xx. Signed-off-by: Ariel D'Alessandro Link: https://lore.kernel.org/r/20220117132109.283365-3-ariel.dalessandro@collabora.com Signed-off-by: Mark Brown --- include/dt-bindings/sound/tlv320aic31xx.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/sound/tlv320aic31xx.h b/include/dt-bindings/sound/tlv320aic31xx.h index 3a845fbba992..4a80238ab250 100644 --- a/include/dt-bindings/sound/tlv320aic31xx.h +++ b/include/dt-bindings/sound/tlv320aic31xx.h @@ -6,4 +6,9 @@ #define MICBIAS_2_5V 2 #define MICBIAS_AVDDV 3 +#define PLL_CLKIN_MCLK 0x00 +#define PLL_CLKIN_BCLK 0x01 +#define PLL_CLKIN_GPIO1 0x02 +#define PLL_CLKIN_DIN 0x03 + #endif /* __DT_TLV320AIC31XX_H */ -- cgit v1.2.3 From f1ba938e4f98941dc2b77795062e49444ec1fee1 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 19 Jan 2022 00:09:13 +0100 Subject: spi: s3c64xx: Delete unused boardfile helpers The helpers to use SPI host 1 and 2 are unused in the kernel and taking up space and maintenance hours. New systems should use device tree and not this, so delete the code. Cc: linux-samsung-soc@vger.kernel.org Cc: Krzysztof Kozlowski Cc: Sylwester Nawrocki Signed-off-by: Linus Walleij Reviewed-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220118230915.157797-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- include/linux/platform_data/spi-s3c64xx.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h index 773daf7915a3..19d690f34670 100644 --- a/include/linux/platform_data/spi-s3c64xx.h +++ b/include/linux/platform_data/spi-s3c64xx.h @@ -52,17 +52,9 @@ struct s3c64xx_spi_info { */ extern void s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, int num_cs); -extern void s3c64xx_spi1_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs); -extern void s3c64xx_spi2_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs); /* defined by architecture to configure gpio */ extern int s3c64xx_spi0_cfg_gpio(void); -extern int s3c64xx_spi1_cfg_gpio(void); -extern int s3c64xx_spi2_cfg_gpio(void); extern struct s3c64xx_spi_info s3c64xx_spi0_pdata; -extern struct s3c64xx_spi_info s3c64xx_spi1_pdata; -extern struct s3c64xx_spi_info s3c64xx_spi2_pdata; #endif /*__SPI_S3C64XX_H */ -- cgit v1.2.3 From 3b5529ae7f3578da633e8ae2ec0715a55a248f9f Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 19 Jan 2022 00:09:14 +0100 Subject: spi: s3c64xx: Drop custom gpio setup argument The SPI0 platform population function was taking a custom gpio setup callback but the only user pass NULL as argument so drop this argument. Cc: linux-samsung-soc@vger.kernel.org Cc: Krzysztof Kozlowski Cc: Sylwester Nawrocki Signed-off-by: Linus Walleij Reviewed-by: Krzysztof Kozlowski Reviewed-by: Sam Protsenko Link: https://lore.kernel.org/r/20220118230915.157797-2-linus.walleij@linaro.org Signed-off-by: Mark Brown --- include/linux/platform_data/spi-s3c64xx.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h index 19d690f34670..10890a4b55b9 100644 --- a/include/linux/platform_data/spi-s3c64xx.h +++ b/include/linux/platform_data/spi-s3c64xx.h @@ -43,15 +43,13 @@ struct s3c64xx_spi_info { /** * s3c64xx_spi_set_platdata - SPI Controller configure callback by the board * initialization code. - * @cfg_gpio: Pointer to gpio setup function. * @src_clk_nr: Clock the SPI controller is to use to generate SPI clocks. * @num_cs: Number of elements in the 'cs' array. * * Call this from machine init code for each SPI Controller that * has some chips attached to it. */ -extern void s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs); +extern void s3c64xx_spi0_set_platdata(int src_clk_nr, int num_cs); /* defined by architecture to configure gpio */ extern int s3c64xx_spi0_cfg_gpio(void); -- cgit v1.2.3 From a45cf3cc72dd9cfde9db8af32cdf9c431f53f9bc Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 19 Jan 2022 00:09:15 +0100 Subject: spi: s3c64xx: Convert to use GPIO descriptors Convert the S3C64xx SPI host to use GPIO descriptors. Provide GPIO descriptor tables for the one user with CS 0 and 1. Cc: linux-samsung-soc@vger.kernel.org Cc: Sylwester Nawrocki Reviewed-by: Krzysztof Kozlowski Reviewed-by: Sam Protsenko Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220118230915.157797-3-linus.walleij@linaro.org Signed-off-by: Mark Brown --- include/linux/platform_data/spi-s3c64xx.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h index 10890a4b55b9..5df1ace6d2c9 100644 --- a/include/linux/platform_data/spi-s3c64xx.h +++ b/include/linux/platform_data/spi-s3c64xx.h @@ -16,7 +16,6 @@ struct platform_device; * struct s3c64xx_spi_csinfo - ChipSelect description * @fb_delay: Slave specific feedback delay. * Refer to FB_CLK_SEL register definition in SPI chapter. - * @line: Custom 'identity' of the CS line. * * This is per SPI-Slave Chipselect information. * Allocate and initialize one in machine init code and make the @@ -24,7 +23,6 @@ struct platform_device; */ struct s3c64xx_spi_csinfo { u8 fb_delay; - unsigned line; }; /** -- cgit v1.2.3 From 7f2a3cf4e6077a1525092f114be7819e505773a1 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 19 Jan 2022 01:09:14 +0100 Subject: spi: s3c24xx: Convert to GPIO descriptors This driver has a bunch of custom oldstyle GPIO number-passing fields and a custom set-up callback. The good thing is: nothing in the kernel is using it. Convert the driver to use GPIO descriptors with a SPI_MASTER_GPIO_SS flag so that the local CS callback also get invoked as the hardware needs this. New users of this driver can provide GPIO descriptor tables like the other converted drivers. Cc: linux-samsung-soc@vger.kernel.org Cc: Krzysztof Kozlowski Cc: Sylwester Nawrocki Signed-off-by: Linus Walleij Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220119000914.192553-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- include/linux/spi/s3c24xx.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/spi/s3c24xx.h b/include/linux/spi/s3c24xx.h index 440a71593162..9b8bb22d5b0c 100644 --- a/include/linux/spi/s3c24xx.h +++ b/include/linux/spi/s3c24xx.h @@ -10,14 +10,9 @@ #define __LINUX_SPI_S3C24XX_H __FILE__ struct s3c2410_spi_info { - int pin_cs; /* simple gpio cs */ unsigned int num_cs; /* total chipselects */ int bus_num; /* bus number to use. */ - unsigned int use_fiq:1; /* use fiq */ - - void (*gpio_setup)(struct s3c2410_spi_info *spi, int enable); - void (*set_cs)(struct s3c2410_spi_info *spi, int cs, int pol); }; extern int s3c24xx_set_fiq(unsigned int irq, u32 *ack_ptr, bool on); -- cgit v1.2.3 From 5ca2ab4598179a2690a38420f3fde9f2ad79d55c Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Thu, 20 Jan 2022 13:58:32 -0600 Subject: ASoC: simple-card-utils: Add new system-clock-fixed flag Add a new system-clock-fixed flag, which can be used to specify that the driver cannot or should not allow the clock frequency of the mapped clock to be modified. This behavior is also implied if the system-clock-frequency parameter is set explicitly - the flag is meant for cases where a clock is mapped to the DAI but which is, or should be treated as, fixed. When mclk-fs is also specified, this causes a PCM constraint to be added which enforces that only the corresponding valid sample rate can be used. Signed-off-by: Robert Hancock Link: https://lore.kernel.org/r/20220120195832.1742271-7-robert.hancock@calian.com Signed-off-by: Mark Brown --- include/sound/simple_card_utils.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h index df430f1c2a10..5ee269c59aac 100644 --- a/include/sound/simple_card_utils.h +++ b/include/sound/simple_card_utils.h @@ -25,6 +25,7 @@ struct asoc_simple_dai { unsigned int tx_slot_mask; unsigned int rx_slot_mask; struct clk *clk; + bool clk_fixed; }; struct asoc_simple_data { -- cgit v1.2.3 From 2145bb687e3fdb128066872fd952f4563d89177e Mon Sep 17 00:00:00 2001 From: Daire McNamara Date: Thu, 16 Dec 2021 14:00:21 +0000 Subject: dt-bindings: clk: microchip: Add Microchip PolarFire host binding Add device tree bindings for the Microchip PolarFire system clock controller Signed-off-by: Daire McNamara Signed-off-by: Conor Dooley Reviewed-by: Geert Uytterhoeven Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20211216140022.16146-2-conor.dooley@microchip.com Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/microchip,mpfs-clock.h | 45 ++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 include/dt-bindings/clock/microchip,mpfs-clock.h (limited to 'include') diff --git a/include/dt-bindings/clock/microchip,mpfs-clock.h b/include/dt-bindings/clock/microchip,mpfs-clock.h new file mode 100644 index 000000000000..73f2a9324857 --- /dev/null +++ b/include/dt-bindings/clock/microchip,mpfs-clock.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Daire McNamara, + * Copyright (C) 2020 Microchip Technology Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ +#define _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ + +#define CLK_CPU 0 +#define CLK_AXI 1 +#define CLK_AHB 2 + +#define CLK_ENVM 3 +#define CLK_MAC0 4 +#define CLK_MAC1 5 +#define CLK_MMC 6 +#define CLK_TIMER 7 +#define CLK_MMUART0 8 +#define CLK_MMUART1 9 +#define CLK_MMUART2 10 +#define CLK_MMUART3 11 +#define CLK_MMUART4 12 +#define CLK_SPI0 13 +#define CLK_SPI1 14 +#define CLK_I2C0 15 +#define CLK_I2C1 16 +#define CLK_CAN0 17 +#define CLK_CAN1 18 +#define CLK_USB 19 +#define CLK_RESERVED 20 +#define CLK_RTC 21 +#define CLK_QSPI 22 +#define CLK_GPIO0 23 +#define CLK_GPIO1 24 +#define CLK_GPIO2 25 +#define CLK_DDRC 26 +#define CLK_FIC0 27 +#define CLK_FIC1 28 +#define CLK_FIC2 29 +#define CLK_FIC3 30 +#define CLK_ATHENA 31 +#define CLK_CFM 32 + +#endif /* _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ */ -- cgit v1.2.3 From ea2be22f3d37b1e77fbfe46cd6536eab667107ec Mon Sep 17 00:00:00 2001 From: Zixun LI Date: Tue, 11 Jan 2022 14:20:50 +0000 Subject: clk: at91: allow setting PMC_AUDIOPINCK clock parents via DT Make AUDIOPINCK accessible via phandle to select it as peripheral clock parent using assigned-clock-parents in DT where available. Signed-off-by: Zixun LI Reviewed-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220111142051.37957-1-admin@hifiphile.com Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/at91.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/at91.h b/include/dt-bindings/clock/at91.h index 98e1b2ab6403..573cf8c25eb4 100644 --- a/include/dt-bindings/clock/at91.h +++ b/include/dt-bindings/clock/at91.h @@ -24,6 +24,7 @@ #define PMC_PLLACK 7 #define PMC_PLLBCK 8 #define PMC_AUDIOPLLCK 9 +#define PMC_AUDIOPINCK 10 /* SAMA7G5 */ #define PMC_CPUPLL (PMC_MAIN + 1) -- cgit v1.2.3 From a5ab04af49434aef532bf6cd4baa08a13665d608 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 11 Jan 2022 14:53:10 +0200 Subject: clk: at91: sama7g5: Allow MCK1 to be exported and referenced in DT MCK1 feeds the External Bus Interface (EBI). EBI's clock rate is used to translate EBI's timmings to SMC timings, thus we need to handle MCK1 in the EBI driver. Allow MCK1 to be referenced as a PMC_TYPE_CORE clock from phandle in DT. Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20220111125310.902856-1-tudor.ambarus@microchip.com Reviewed-by: Claudiu Beznea Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/at91.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/at91.h b/include/dt-bindings/clock/at91.h index 573cf8c25eb4..3e3972a814c1 100644 --- a/include/dt-bindings/clock/at91.h +++ b/include/dt-bindings/clock/at91.h @@ -36,6 +36,7 @@ #define PMC_AUDIOIOPLL (PMC_MAIN + 7) #define PMC_ETHPLL (PMC_MAIN + 8) #define PMC_CPU (PMC_MAIN + 9) +#define PMC_MCK1 (PMC_MAIN + 10) #ifndef AT91_PMC_MOSCS #define AT91_PMC_MOSCS 0 /* MOSCS Flag */ -- cgit v1.2.3 From d5ebaa7c5f6f688959e8d40840b2249ede63b8ed Mon Sep 17 00:00:00 2001 From: Soenke Huster Date: Sun, 23 Jan 2022 15:06:24 +0100 Subject: Bluetooth: hci_event: Ignore multiple conn complete events When one of the three connection complete events is received multiple times for the same handle, the device is registered multiple times which leads to memory corruptions. Therefore, consequent events for a single connection are ignored. The conn->state can hold different values, therefore HCI_CONN_HANDLE_UNSET is introduced to identify new connections. To make sure the events do not contain this or another invalid handle HCI_CONN_HANDLE_MAX and checks are introduced. Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=215497 Signed-off-by: Soenke Huster Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 21eadb113a31..f5caff1ddb29 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -303,6 +303,9 @@ struct adv_monitor { #define HCI_MAX_SHORT_NAME_LENGTH 10 +#define HCI_CONN_HANDLE_UNSET 0xffff +#define HCI_CONN_HANDLE_MAX 0x0eff + /* Min encryption key size to match with SMP */ #define HCI_MIN_ENC_KEY_SIZE 7 -- cgit v1.2.3 From 376040e47334c6dc6a939a32197acceb00fe4acf Mon Sep 17 00:00:00 2001 From: Kenny Yu Date: Mon, 24 Jan 2022 10:54:01 -0800 Subject: bpf: Add bpf_copy_from_user_task() helper This adds a helper for bpf programs to read the memory of other tasks. As an example use case at Meta, we are using a bpf task iterator program and this new helper to print C++ async stack traces for all threads of a given process. Signed-off-by: Kenny Yu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220124185403.468466-3-kennyyu@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 11 +++++++++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8c92c974bd12..394305a5e02f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2243,6 +2243,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_strncmp_proto; +extern const struct bpf_func_proto bpf_copy_from_user_task_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 16a7574292a5..4a2f7041ebae 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5076,6 +5076,16 @@ union bpf_attr { * associated to *xdp_md*, at *offset*. * Return * 0 on success, or a negative error in case of failure. + * + * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags) + * Description + * Read *size* bytes from user space address *user_ptr* in *tsk*'s + * address space, and stores the data in *dst*. *flags* is not + * used yet and is provided for future extensibility. This helper + * can only be used by sleepable programs. + * Return + * 0 on success, or a negative error in case of failure. On error + * *dst* buffer is zeroed out. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5269,6 +5279,7 @@ union bpf_attr { FN(xdp_get_buff_len), \ FN(xdp_load_bytes), \ FN(xdp_store_bytes), \ + FN(copy_from_user_task), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From d507204d3c5cc57d9a8bdf0a477615bb59ea1611 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Jan 2022 12:24:52 -0800 Subject: tcp/dccp: add tw->tw_bslot We want to allow inet_twsk_kill() working even if netns has been dismantled/freed, to get rid of inet_twsk_purge(). This patch adds tw->tw_bslot to cache the bind bucket slot so that inet_twsk_kill() no longer needs to dereference twsk_net(tw) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index dfd919b3119e..c221fe2b77dd 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -72,6 +72,7 @@ struct inet_timewait_sock { tw_tos : 8; u32 tw_txhash; u32 tw_priority; + u32 tw_bslot; /* bind bucket slot */ struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; }; -- cgit v1.2.3 From 27dd35e02235902933626469a1c9198612d72564 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Jan 2022 12:24:53 -0800 Subject: tcp/dccp: no longer use twsk_net(tw) from tw_timer_handler() We will soon get rid of inet_twsk_purge(). This means that tw_timer_handler() might fire after a netns has been dismantled/freed. Instead of adding a function (and data structure) to find a netns from tw->tw_net_cookie, just update the SNMP counters a bit earlier, when the netns is known to be alive. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index c221fe2b77dd..b323db969b8b 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -65,10 +65,9 @@ struct inet_timewait_sock { /* these three are in inet_sock */ __be16 tw_sport; /* And these are ours. */ - unsigned int tw_kill : 1, - tw_transparent : 1, + unsigned int tw_transparent : 1, tw_flowlabel : 20, - tw_pad : 2, /* 2 bits hole */ + tw_pad : 3, /* 3 bits hole */ tw_tos : 8; u32 tw_txhash; u32 tw_priority; -- cgit v1.2.3 From 0dad4087a86a2cbe177404dc73f18ada26a2c390 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Jan 2022 12:24:54 -0800 Subject: tcp/dccp: get rid of inet_twsk_purge() Prior patches in the series made sure tw_timer_handler() can be fired after netns has been dismantled/freed. We no longer have to scan a potentially big TCP ehash table at netns dismantle. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index b323db969b8b..463ae5d33eb0 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -110,8 +110,6 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo void inet_twsk_deschedule_put(struct inet_timewait_sock *tw); -void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family); - static inline struct net *twsk_net(const struct inet_timewait_sock *twsk) { -- cgit v1.2.3 From a15c89c703d43490ea68ea4516553d4ea4f6b1e1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Jan 2022 12:24:55 -0800 Subject: ipv4: do not use per netns icmp sockets Back in linux-2.6.25 (commit 4a6ad7a141cb "[NETNS]: Make icmp_sk per namespace."), we added private per-cpu/per-netns ipv4 icmp sockets. This adds memory and cpu costs, which do not seem needed. Now typical servers have 256 or more cores, this adds considerable tax to netns users. icmp sockets are used from BH context, are not receiving packets, and do not store any persistent state but the 'struct net' pointer. icmp_xmit_lock() already makes sure to lock the chosen per-cpu socket. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 78557643526e..639a31638159 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -70,7 +70,6 @@ struct netns_ipv4 { struct hlist_head *fib_table_hash; struct sock *fibnl; - struct sock * __percpu *icmp_sk; struct sock *mc_autojoin_sk; struct inet_peer_base *peers; -- cgit v1.2.3 From 6a17b961ec19cd61ca646a6655ab93e8f6fe15c0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Jan 2022 12:24:56 -0800 Subject: ipv6: do not use per netns icmp sockets Back in linux-2.6.25 (commit 98c6d1b261e7 "[NETNS]: Make icmpv6_sk per namespace.", we added private per-cpu/per-netns ipv6 icmp sockets. This adds memory and cpu costs, which do not seem needed. Now typical servers have 256 or more cores, this adds considerable tax to netns users. icmp sockets are used from BH context, are not receiving packets, and do not store any persistent state but the 'struct net' pointer. icmpv6_xmit_lock() already makes sure to lock the chosen per-cpu socket. This patch has a considerable impact on the number of netns that the worker thread in cleanup_net() can dismantle per second, because ip6mr_sk_done() is no longer called, meaning we no longer acquire the rtnl mutex, competing with other threads adding new netns. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index a4b550380316..30cdfc4e1615 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -88,7 +88,6 @@ struct netns_ipv6 { struct fib6_table *fib6_local_tbl; struct fib_rules_ops *fib6_rules_ops; #endif - struct sock * __percpu *icmp_sk; struct sock *ndisc_sk; struct sock *tcp_sk; struct sock *igmp_sk; -- cgit v1.2.3 From 37ba017dcc3b1123206808979834655ddcf93251 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Jan 2022 12:24:57 -0800 Subject: ipv4/tcp: do not use per netns ctl sockets TCP ipv4 uses per-cpu/per-netns ctl sockets in order to send RST and some ACK packets (on behalf of TIMEWAIT sockets). This adds memory and cpu costs, which do not seem needed. Now typical servers have 256 or more cores, this adds considerable tax to netns users. tcp sockets are used from BH context, are not receiving packets, and do not store any persistent state but the 'struct net' pointer in order to be able to use IPv4 output functions. Note that I attempted a related change in the past, that had to be hot-fixed in commit bdbbb8527b6f ("ipv4: tcp: get rid of ugly unicast_sock") This patch could very well surface old bugs, on layers not taking care of sk->sk_kern_sock properly. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 639a31638159..22b4c6df1d2b 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -73,7 +73,6 @@ struct netns_ipv4 { struct sock *mc_autojoin_sk; struct inet_peer_base *peers; - struct sock * __percpu *tcp_sk; struct fqdir *fqdir; u8 sysctl_icmp_echo_ignore_all; -- cgit v1.2.3 From ae57857b9b6341096ddfd9c0cf26fb640c561160 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Jan 2022 16:55:25 +0100 Subject: ACPICA: Use uintptr_t and offsetof() in Linux kernel builds To avoid "performing pointer subtraction with a null pointer has undefined behavior" compiler warnings, use uintptr_t and offsetof() that are always available during Linux kernel builds to define acpi_uintptr_t and the ACPI_TO_INTEGER() and ACPI_OFFSET() macros. Based on earlier proposal from Arnd Bergmann. Link: https://lore.kernel.org/linux-acpi/20210927121338.938994-1-arnd@kernel.org Signed-off-by: Rafael J. Wysocki Reviewed-by: Arnd Bergmann --- include/acpi/actypes.h | 4 ++++ include/acpi/platform/aclinux.h | 5 +++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 69e89d572b9e..02c1fa16e638 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -507,8 +507,12 @@ typedef u64 acpi_integer; /* Pointer/Integer type conversions */ #define ACPI_TO_POINTER(i) ACPI_CAST_PTR (void, (acpi_size) (i)) +#ifndef ACPI_TO_INTEGER #define ACPI_TO_INTEGER(p) ACPI_PTR_DIFF (p, (void *) 0) +#endif +#ifndef ACPI_OFFSET #define ACPI_OFFSET(d, f) ACPI_PTR_DIFF (&(((d *) 0)->f), (void *) 0) +#endif #define ACPI_PTR_TO_PHYSADDR(i) ACPI_TO_INTEGER(i) /* Optimizations for 4-character (32-bit) acpi_name manipulation */ diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index b3ffb9bbf664..cec41e004ecf 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -114,6 +114,11 @@ #define acpi_raw_spinlock raw_spinlock_t * #define acpi_cpu_flags unsigned long +#define acpi_uintptr_t uintptr_t + +#define ACPI_TO_INTEGER(p) ((uintptr_t)(p)) +#define ACPI_OFFSET(d, f) offsetof(d, f) + /* Use native linux version of acpi_os_allocate_zeroed */ #define USE_NATIVE_ALLOCATE_ZEROED -- cgit v1.2.3 From 4fdacef8ac5a5382eeb1bc6fc2632d71a09d52cd Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Thu, 13 Jan 2022 08:59:27 -0800 Subject: kunit: move check if assertion passed into the macros Currently the code always calls kunit_do_assertion() even though it does nothing when `pass` is true. This change moves the `if(!(pass))` check into the macro instead and renames the function to kunit_do_failed_assertion(). I feel this a bit easier to read and understand. This has the potential upside of avoiding a function call that does nothing most of the time (assuming your tests are passing) but comes with the downside of generating a bit more code and branches. We try to mitigate the branches by tagging them with `unlikely()`. This also means we don't have to initialize structs that we don't need, which will become a tiny bit more expensive if we switch over to using static variables to try and reduce stack usage. (There's runtime code to check if the variable has been initialized yet or not). Signed-off-by: Daniel Latypov Reviewed-by: Brendan Higgins Reviewed-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/test.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index b26400731c02..12cabd15449a 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -770,18 +771,18 @@ void __printf(2, 3) kunit_log_append(char *log, const char *fmt, ...); */ #define KUNIT_SUCCEED(test) do {} while (0) -void kunit_do_assertion(struct kunit *test, - struct kunit_assert *assert, - bool pass, - const char *fmt, ...); +void kunit_do_failed_assertion(struct kunit *test, + struct kunit_assert *assert, + const char *fmt, ...); #define KUNIT_ASSERTION(test, pass, assert_class, INITIALIZER, fmt, ...) do { \ - struct assert_class __assertion = INITIALIZER; \ - kunit_do_assertion(test, \ - &__assertion.assert, \ - pass, \ - fmt, \ - ##__VA_ARGS__); \ + if (unlikely(!(pass))) { \ + struct assert_class __assertion = INITIALIZER; \ + kunit_do_failed_assertion(test, \ + &__assertion.assert, \ + fmt, \ + ##__VA_ARGS__); \ + } \ } while (0) -- cgit v1.2.3 From a91e9ade402c48ee35cd1e4b671e3938db798d17 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Thu, 13 Jan 2022 08:59:28 -0800 Subject: kunit: drop unused kunit* field in kunit_assert The `struct kunit* test` field in kunit_assert is unused. Note: string_stream needs it, but it has its own `test` field. I assume `test` in `kunit_assert` predates this and was leftover after some refactoring. This patch removes the field and cleans up the macros to avoid needlessly passing around `test`. Signed-off-by: Daniel Latypov Reviewed-by: Brendan Higgins Reviewed-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/assert.h | 45 +++++++++++++-------------------------------- include/kunit/test.h | 14 +++++--------- 2 files changed, 18 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/kunit/assert.h b/include/kunit/assert.h index ccbc36c0b02f..f568166ef034 100644 --- a/include/kunit/assert.h +++ b/include/kunit/assert.h @@ -30,7 +30,6 @@ enum kunit_assert_type { /** * struct kunit_assert - Data for printing a failed assertion or expectation. - * @test: the test case this expectation/assertion is associated with. * @type: the type (either an expectation or an assertion) of this kunit_assert. * @line: the source code line number that the expectation/assertion is at. * @file: the file path of the source file that the expectation/assertion is in. @@ -41,7 +40,6 @@ enum kunit_assert_type { * format a string to a user reporting the failure. */ struct kunit_assert { - struct kunit *test; enum kunit_assert_type type; int line; const char *file; @@ -60,14 +58,12 @@ struct kunit_assert { /** * KUNIT_INIT_ASSERT_STRUCT() - Initializer for a &struct kunit_assert. - * @kunit: The test case that this expectation/assertion is associated with. * @assert_type: The type (assertion or expectation) of this kunit_assert. * @fmt: The formatting function which builds a string out of this kunit_assert. * * The base initializer for a &struct kunit_assert. */ -#define KUNIT_INIT_ASSERT_STRUCT(kunit, assert_type, fmt) { \ - .test = kunit, \ +#define KUNIT_INIT_ASSERT_STRUCT(assert_type, fmt) { \ .type = assert_type, \ .file = __FILE__, \ .line = __LINE__, \ @@ -96,15 +92,13 @@ void kunit_fail_assert_format(const struct kunit_assert *assert, /** * KUNIT_INIT_FAIL_ASSERT_STRUCT() - Initializer for &struct kunit_fail_assert. - * @test: The test case that this expectation/assertion is associated with. * @type: The type (assertion or expectation) of this kunit_assert. * * Initializes a &struct kunit_fail_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ -#define KUNIT_INIT_FAIL_ASSERT_STRUCT(test, type) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(test, \ - type, \ +#define KUNIT_INIT_FAIL_ASSERT_STRUCT(type) { \ + .assert = KUNIT_INIT_ASSERT_STRUCT(type, \ kunit_fail_assert_format) \ } @@ -129,7 +123,6 @@ void kunit_unary_assert_format(const struct kunit_assert *assert, /** * KUNIT_INIT_UNARY_ASSERT_STRUCT() - Initializes &struct kunit_unary_assert. - * @test: The test case that this expectation/assertion is associated with. * @type: The type (assertion or expectation) of this kunit_assert. * @cond: A string representation of the expression asserted true or false. * @expect_true: True if of type KUNIT_{EXPECT|ASSERT}_TRUE, false otherwise. @@ -137,9 +130,8 @@ void kunit_unary_assert_format(const struct kunit_assert *assert, * Initializes a &struct kunit_unary_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ -#define KUNIT_INIT_UNARY_ASSERT_STRUCT(test, type, cond, expect_true) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(test, \ - type, \ +#define KUNIT_INIT_UNARY_ASSERT_STRUCT(type, cond, expect_true) { \ + .assert = KUNIT_INIT_ASSERT_STRUCT(type, \ kunit_unary_assert_format), \ .condition = cond, \ .expected_true = expect_true \ @@ -167,7 +159,6 @@ void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert, /** * KUNIT_INIT_PTR_NOT_ERR_ASSERT_STRUCT() - Initializes a * &struct kunit_ptr_not_err_assert. - * @test: The test case that this expectation/assertion is associated with. * @type: The type (assertion or expectation) of this kunit_assert. * @txt: A string representation of the expression passed to the expectation. * @val: The actual evaluated pointer value of the expression. @@ -175,9 +166,8 @@ void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert, * Initializes a &struct kunit_ptr_not_err_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ -#define KUNIT_INIT_PTR_NOT_ERR_STRUCT(test, type, txt, val) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(test, \ - type, \ +#define KUNIT_INIT_PTR_NOT_ERR_STRUCT(type, txt, val) { \ + .assert = KUNIT_INIT_ASSERT_STRUCT(type, \ kunit_ptr_not_err_assert_format), \ .text = txt, \ .value = val \ @@ -212,7 +202,6 @@ void kunit_binary_assert_format(const struct kunit_assert *assert, /** * KUNIT_INIT_BINARY_ASSERT_STRUCT() - Initializes a * &struct kunit_binary_assert. - * @test: The test case that this expectation/assertion is associated with. * @type: The type (assertion or expectation) of this kunit_assert. * @op_str: A string representation of the comparison operator (e.g. "=="). * @left_str: A string representation of the expression in the left slot. @@ -223,15 +212,13 @@ void kunit_binary_assert_format(const struct kunit_assert *assert, * Initializes a &struct kunit_binary_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ -#define KUNIT_INIT_BINARY_ASSERT_STRUCT(test, \ - type, \ +#define KUNIT_INIT_BINARY_ASSERT_STRUCT(type, \ op_str, \ left_str, \ left_val, \ right_str, \ right_val) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(test, \ - type, \ + .assert = KUNIT_INIT_ASSERT_STRUCT(type, \ kunit_binary_assert_format), \ .operation = op_str, \ .left_text = left_str, \ @@ -269,7 +256,6 @@ void kunit_binary_ptr_assert_format(const struct kunit_assert *assert, /** * KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT() - Initializes a * &struct kunit_binary_ptr_assert. - * @test: The test case that this expectation/assertion is associated with. * @type: The type (assertion or expectation) of this kunit_assert. * @op_str: A string representation of the comparison operator (e.g. "=="). * @left_str: A string representation of the expression in the left slot. @@ -280,15 +266,13 @@ void kunit_binary_ptr_assert_format(const struct kunit_assert *assert, * Initializes a &struct kunit_binary_ptr_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ -#define KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT(test, \ - type, \ +#define KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT(type, \ op_str, \ left_str, \ left_val, \ right_str, \ right_val) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(test, \ - type, \ + .assert = KUNIT_INIT_ASSERT_STRUCT(type, \ kunit_binary_ptr_assert_format), \ .operation = op_str, \ .left_text = left_str, \ @@ -326,7 +310,6 @@ void kunit_binary_str_assert_format(const struct kunit_assert *assert, /** * KUNIT_INIT_BINARY_STR_ASSERT_STRUCT() - Initializes a * &struct kunit_binary_str_assert. - * @test: The test case that this expectation/assertion is associated with. * @type: The type (assertion or expectation) of this kunit_assert. * @op_str: A string representation of the comparison operator (e.g. "=="). * @left_str: A string representation of the expression in the left slot. @@ -337,15 +320,13 @@ void kunit_binary_str_assert_format(const struct kunit_assert *assert, * Initializes a &struct kunit_binary_str_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ -#define KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(test, \ - type, \ +#define KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(type, \ op_str, \ left_str, \ left_val, \ right_str, \ right_val) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(test, \ - type, \ + .assert = KUNIT_INIT_ASSERT_STRUCT(type, \ kunit_binary_str_assert_format), \ .operation = op_str, \ .left_text = left_str, \ diff --git a/include/kunit/test.h b/include/kunit/test.h index 12cabd15449a..25ea3bce6663 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -790,7 +790,7 @@ void kunit_do_failed_assertion(struct kunit *test, KUNIT_ASSERTION(test, \ false, \ kunit_fail_assert, \ - KUNIT_INIT_FAIL_ASSERT_STRUCT(test, assert_type), \ + KUNIT_INIT_FAIL_ASSERT_STRUCT(assert_type), \ fmt, \ ##__VA_ARGS__) @@ -820,8 +820,7 @@ void kunit_do_failed_assertion(struct kunit *test, KUNIT_ASSERTION(test, \ !!(condition) == !!expected_true, \ kunit_unary_assert, \ - KUNIT_INIT_UNARY_ASSERT_STRUCT(test, \ - assert_type, \ + KUNIT_INIT_UNARY_ASSERT_STRUCT(assert_type, \ #condition, \ expected_true), \ fmt, \ @@ -879,8 +878,7 @@ do { \ KUNIT_ASSERTION(test, \ __left op __right, \ assert_class, \ - ASSERT_CLASS_INIT(test, \ - assert_type, \ + ASSERT_CLASS_INIT(assert_type, \ #op, \ #left, \ __left, \ @@ -1234,8 +1232,7 @@ do { \ KUNIT_ASSERTION(test, \ strcmp(__left, __right) op 0, \ kunit_binary_str_assert, \ - KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(test, \ - assert_type, \ + KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(assert_type, \ #op, \ #left, \ __left, \ @@ -1294,8 +1291,7 @@ do { \ KUNIT_ASSERTION(test, \ !IS_ERR_OR_NULL(__ptr), \ kunit_ptr_not_err_assert, \ - KUNIT_INIT_PTR_NOT_ERR_STRUCT(test, \ - assert_type, \ + KUNIT_INIT_PTR_NOT_ERR_STRUCT(assert_type, \ #ptr, \ __ptr), \ fmt, \ -- cgit v1.2.3 From 21957f90b28f6bc118c055e3e564d45f6e4df45d Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Thu, 13 Jan 2022 08:59:30 -0800 Subject: kunit: split out part of kunit_assert into a static const This is per Linus's suggestion in [1]. The issue there is that every KUNIT_EXPECT/KUNIT_ASSERT puts a kunit_assert object onto the stack. Normally we rely on compilers to elide this, but when that doesn't work out, this blows up the stack usage of kunit test functions. We can move some data off the stack by making it static. This change introduces a new `struct kunit_loc` to hold the file and line number and then just passing assert_type (EXPECT or ASSERT) as an argument. In [1], it was suggested to also move out the format string as well, but users could theoretically craft a format string at runtime, so we can't. This change leaves a copy of `assert_type` in kunit_assert for now because cleaning up all the macros to not pass it around is a bit more involved. Here's an example of the expanded code for KUNIT_FAIL(): if (__builtin_expect(!!(!(false)), 0)) { static const struct kunit_loc loc = { .file = ... }; struct kunit_fail_assert __assertion = { .assert = { .type ... }; kunit_do_failed_assertion(test, &loc, KUNIT_EXPECTATION, &__assertion.assert, ...); }; [1] https://groups.google.com/g/kunit-dev/c/i3fZXgvBrfA/m/VULQg1z6BAAJ Signed-off-by: Daniel Latypov Suggested-by: Linus Torvalds Reviewed-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/assert.h | 25 +++++++++++++++++-------- include/kunit/test.h | 12 +++++++++++- 2 files changed, 28 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/kunit/assert.h b/include/kunit/assert.h index f568166ef034..0da1bbdd1ee8 100644 --- a/include/kunit/assert.h +++ b/include/kunit/assert.h @@ -28,11 +28,21 @@ enum kunit_assert_type { KUNIT_EXPECTATION, }; +/** + * struct kunit_loc - Identifies the source location of a line of code. + * @line: the line number in the file. + * @file: the file name. + */ +struct kunit_loc { + int line; + const char *file; +}; + +#define KUNIT_CURRENT_LOC { .file = __FILE__, .line = __LINE__ } + /** * struct kunit_assert - Data for printing a failed assertion or expectation. * @type: the type (either an expectation or an assertion) of this kunit_assert. - * @line: the source code line number that the expectation/assertion is at. - * @file: the file path of the source file that the expectation/assertion is in. * @message: an optional message to provide additional context. * @format: a function which formats the data in this kunit_assert to a string. * @@ -40,9 +50,9 @@ enum kunit_assert_type { * format a string to a user reporting the failure. */ struct kunit_assert { + // TODO(dlatypov@google.com): delete this unused field when we've + // updated all the related KUNIT_INIT_ASSERT* macros. enum kunit_assert_type type; - int line; - const char *file; struct va_format message; void (*format)(const struct kunit_assert *assert, struct string_stream *stream); @@ -65,14 +75,13 @@ struct kunit_assert { */ #define KUNIT_INIT_ASSERT_STRUCT(assert_type, fmt) { \ .type = assert_type, \ - .file = __FILE__, \ - .line = __LINE__, \ .message = KUNIT_INIT_VA_FMT_NULL, \ .format = fmt \ } -void kunit_base_assert_format(const struct kunit_assert *assert, - struct string_stream *stream); +void kunit_assert_prologue(const struct kunit_loc *loc, + enum kunit_assert_type type, + struct string_stream *stream); void kunit_assert_print_msg(const struct kunit_assert *assert, struct string_stream *stream); diff --git a/include/kunit/test.h b/include/kunit/test.h index 25ea3bce6663..7b752175e614 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -772,13 +772,18 @@ void __printf(2, 3) kunit_log_append(char *log, const char *fmt, ...); #define KUNIT_SUCCEED(test) do {} while (0) void kunit_do_failed_assertion(struct kunit *test, + const struct kunit_loc *loc, + enum kunit_assert_type type, struct kunit_assert *assert, const char *fmt, ...); -#define KUNIT_ASSERTION(test, pass, assert_class, INITIALIZER, fmt, ...) do { \ +#define KUNIT_ASSERTION(test, assert_type, pass, assert_class, INITIALIZER, fmt, ...) do { \ if (unlikely(!(pass))) { \ + static const struct kunit_loc loc = KUNIT_CURRENT_LOC; \ struct assert_class __assertion = INITIALIZER; \ kunit_do_failed_assertion(test, \ + &loc, \ + assert_type, \ &__assertion.assert, \ fmt, \ ##__VA_ARGS__); \ @@ -788,6 +793,7 @@ void kunit_do_failed_assertion(struct kunit *test, #define KUNIT_FAIL_ASSERTION(test, assert_type, fmt, ...) \ KUNIT_ASSERTION(test, \ + assert_type, \ false, \ kunit_fail_assert, \ KUNIT_INIT_FAIL_ASSERT_STRUCT(assert_type), \ @@ -818,6 +824,7 @@ void kunit_do_failed_assertion(struct kunit *test, fmt, \ ...) \ KUNIT_ASSERTION(test, \ + assert_type, \ !!(condition) == !!expected_true, \ kunit_unary_assert, \ KUNIT_INIT_UNARY_ASSERT_STRUCT(assert_type, \ @@ -876,6 +883,7 @@ do { \ typeof(right) __right = (right); \ \ KUNIT_ASSERTION(test, \ + assert_type, \ __left op __right, \ assert_class, \ ASSERT_CLASS_INIT(assert_type, \ @@ -1230,6 +1238,7 @@ do { \ const char *__right = (right); \ \ KUNIT_ASSERTION(test, \ + assert_type, \ strcmp(__left, __right) op 0, \ kunit_binary_str_assert, \ KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(assert_type, \ @@ -1289,6 +1298,7 @@ do { \ typeof(ptr) __ptr = (ptr); \ \ KUNIT_ASSERTION(test, \ + assert_type, \ !IS_ERR_OR_NULL(__ptr), \ kunit_ptr_not_err_assert, \ KUNIT_INIT_PTR_NOT_ERR_STRUCT(assert_type, \ -- cgit v1.2.3 From 05a7da89c15ddb3fc7618a16f5941eca68fc441c Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Thu, 13 Jan 2022 08:59:31 -0800 Subject: kunit: drop unused assert_type from kunit_assert and clean up macros This field has been split out from kunit_assert to make the struct less heavy along with the filename and line number. This change drops the assert_type field and cleans up all the macros that were plumbing assert_type into kunit_assert. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/assert.h | 48 ++++++++++++++---------------------------------- include/kunit/test.h | 14 +++++--------- 2 files changed, 19 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/kunit/assert.h b/include/kunit/assert.h index 0da1bbdd1ee8..f2b3ae5cc2de 100644 --- a/include/kunit/assert.h +++ b/include/kunit/assert.h @@ -42,7 +42,6 @@ struct kunit_loc { /** * struct kunit_assert - Data for printing a failed assertion or expectation. - * @type: the type (either an expectation or an assertion) of this kunit_assert. * @message: an optional message to provide additional context. * @format: a function which formats the data in this kunit_assert to a string. * @@ -50,9 +49,6 @@ struct kunit_loc { * format a string to a user reporting the failure. */ struct kunit_assert { - // TODO(dlatypov@google.com): delete this unused field when we've - // updated all the related KUNIT_INIT_ASSERT* macros. - enum kunit_assert_type type; struct va_format message; void (*format)(const struct kunit_assert *assert, struct string_stream *stream); @@ -68,13 +64,11 @@ struct kunit_assert { /** * KUNIT_INIT_ASSERT_STRUCT() - Initializer for a &struct kunit_assert. - * @assert_type: The type (assertion or expectation) of this kunit_assert. * @fmt: The formatting function which builds a string out of this kunit_assert. * * The base initializer for a &struct kunit_assert. */ -#define KUNIT_INIT_ASSERT_STRUCT(assert_type, fmt) { \ - .type = assert_type, \ +#define KUNIT_INIT_ASSERT_STRUCT(fmt) { \ .message = KUNIT_INIT_VA_FMT_NULL, \ .format = fmt \ } @@ -100,15 +94,13 @@ void kunit_fail_assert_format(const struct kunit_assert *assert, struct string_stream *stream); /** - * KUNIT_INIT_FAIL_ASSERT_STRUCT() - Initializer for &struct kunit_fail_assert. - * @type: The type (assertion or expectation) of this kunit_assert. + * KUNIT_INIT_FAIL_ASSERT_STRUCT - Initializer for &struct kunit_fail_assert. * * Initializes a &struct kunit_fail_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ -#define KUNIT_INIT_FAIL_ASSERT_STRUCT(type) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(type, \ - kunit_fail_assert_format) \ +#define KUNIT_INIT_FAIL_ASSERT_STRUCT { \ + .assert = KUNIT_INIT_ASSERT_STRUCT(kunit_fail_assert_format) \ } /** @@ -132,16 +124,14 @@ void kunit_unary_assert_format(const struct kunit_assert *assert, /** * KUNIT_INIT_UNARY_ASSERT_STRUCT() - Initializes &struct kunit_unary_assert. - * @type: The type (assertion or expectation) of this kunit_assert. * @cond: A string representation of the expression asserted true or false. * @expect_true: True if of type KUNIT_{EXPECT|ASSERT}_TRUE, false otherwise. * * Initializes a &struct kunit_unary_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ -#define KUNIT_INIT_UNARY_ASSERT_STRUCT(type, cond, expect_true) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(type, \ - kunit_unary_assert_format), \ +#define KUNIT_INIT_UNARY_ASSERT_STRUCT(cond, expect_true) { \ + .assert = KUNIT_INIT_ASSERT_STRUCT(kunit_unary_assert_format), \ .condition = cond, \ .expected_true = expect_true \ } @@ -168,16 +158,14 @@ void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert, /** * KUNIT_INIT_PTR_NOT_ERR_ASSERT_STRUCT() - Initializes a * &struct kunit_ptr_not_err_assert. - * @type: The type (assertion or expectation) of this kunit_assert. * @txt: A string representation of the expression passed to the expectation. * @val: The actual evaluated pointer value of the expression. * * Initializes a &struct kunit_ptr_not_err_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ -#define KUNIT_INIT_PTR_NOT_ERR_STRUCT(type, txt, val) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(type, \ - kunit_ptr_not_err_assert_format), \ +#define KUNIT_INIT_PTR_NOT_ERR_STRUCT(txt, val) { \ + .assert = KUNIT_INIT_ASSERT_STRUCT(kunit_ptr_not_err_assert_format), \ .text = txt, \ .value = val \ } @@ -211,7 +199,6 @@ void kunit_binary_assert_format(const struct kunit_assert *assert, /** * KUNIT_INIT_BINARY_ASSERT_STRUCT() - Initializes a * &struct kunit_binary_assert. - * @type: The type (assertion or expectation) of this kunit_assert. * @op_str: A string representation of the comparison operator (e.g. "=="). * @left_str: A string representation of the expression in the left slot. * @left_val: The actual evaluated value of the expression in the left slot. @@ -221,14 +208,12 @@ void kunit_binary_assert_format(const struct kunit_assert *assert, * Initializes a &struct kunit_binary_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ -#define KUNIT_INIT_BINARY_ASSERT_STRUCT(type, \ - op_str, \ +#define KUNIT_INIT_BINARY_ASSERT_STRUCT(op_str, \ left_str, \ left_val, \ right_str, \ right_val) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(type, \ - kunit_binary_assert_format), \ + .assert = KUNIT_INIT_ASSERT_STRUCT(kunit_binary_assert_format), \ .operation = op_str, \ .left_text = left_str, \ .left_value = left_val, \ @@ -275,14 +260,12 @@ void kunit_binary_ptr_assert_format(const struct kunit_assert *assert, * Initializes a &struct kunit_binary_ptr_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ -#define KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT(type, \ - op_str, \ +#define KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT(op_str, \ left_str, \ left_val, \ right_str, \ right_val) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(type, \ - kunit_binary_ptr_assert_format), \ + .assert = KUNIT_INIT_ASSERT_STRUCT(kunit_binary_ptr_assert_format), \ .operation = op_str, \ .left_text = left_str, \ .left_value = left_val, \ @@ -319,7 +302,6 @@ void kunit_binary_str_assert_format(const struct kunit_assert *assert, /** * KUNIT_INIT_BINARY_STR_ASSERT_STRUCT() - Initializes a * &struct kunit_binary_str_assert. - * @type: The type (assertion or expectation) of this kunit_assert. * @op_str: A string representation of the comparison operator (e.g. "=="). * @left_str: A string representation of the expression in the left slot. * @left_val: The actual evaluated value of the expression in the left slot. @@ -329,14 +311,12 @@ void kunit_binary_str_assert_format(const struct kunit_assert *assert, * Initializes a &struct kunit_binary_str_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ -#define KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(type, \ - op_str, \ +#define KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(op_str, \ left_str, \ left_val, \ right_str, \ right_val) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(type, \ - kunit_binary_str_assert_format), \ + .assert = KUNIT_INIT_ASSERT_STRUCT(kunit_binary_str_assert_format), \ .operation = op_str, \ .left_text = left_str, \ .left_value = left_val, \ diff --git a/include/kunit/test.h b/include/kunit/test.h index 7b752175e614..5964af750d93 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -796,7 +796,7 @@ void kunit_do_failed_assertion(struct kunit *test, assert_type, \ false, \ kunit_fail_assert, \ - KUNIT_INIT_FAIL_ASSERT_STRUCT(assert_type), \ + KUNIT_INIT_FAIL_ASSERT_STRUCT, \ fmt, \ ##__VA_ARGS__) @@ -827,8 +827,7 @@ void kunit_do_failed_assertion(struct kunit *test, assert_type, \ !!(condition) == !!expected_true, \ kunit_unary_assert, \ - KUNIT_INIT_UNARY_ASSERT_STRUCT(assert_type, \ - #condition, \ + KUNIT_INIT_UNARY_ASSERT_STRUCT(#condition, \ expected_true), \ fmt, \ ##__VA_ARGS__) @@ -886,8 +885,7 @@ do { \ assert_type, \ __left op __right, \ assert_class, \ - ASSERT_CLASS_INIT(assert_type, \ - #op, \ + ASSERT_CLASS_INIT(#op, \ #left, \ __left, \ #right, \ @@ -1241,8 +1239,7 @@ do { \ assert_type, \ strcmp(__left, __right) op 0, \ kunit_binary_str_assert, \ - KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(assert_type, \ - #op, \ + KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(#op, \ #left, \ __left, \ #right, \ @@ -1301,8 +1298,7 @@ do { \ assert_type, \ !IS_ERR_OR_NULL(__ptr), \ kunit_ptr_not_err_assert, \ - KUNIT_INIT_PTR_NOT_ERR_STRUCT(assert_type, \ - #ptr, \ + KUNIT_INIT_PTR_NOT_ERR_STRUCT(#ptr, \ __ptr), \ fmt, \ ##__VA_ARGS__); \ -- cgit v1.2.3 From 6709d0fe55933d3ac944f3cdd5b66d9786092f90 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Tue, 18 Jan 2022 14:35:02 -0800 Subject: kunit: make KUNIT_EXPECT_EQ() use KUNIT_EXPECT_EQ_MSG(), etc. There's quite a few macros in play for KUnit assertions. The current macro chain looks like: KUNIT_EXPECT_EQ => KUNIT_BINARY_EQ_ASSERTION => KUNIT_BINARY_EQ_MSG_ASSERTION KUNIT_EXPECT_EQ_MSG => KUNIT_BINARY_EQ_MSG_ASSERTION KUNIT_ASSERT_EQ => KUNIT_BINARY_EQ_ASSERTION => KUNIT_BINARY_EQ_MSG_ASSERTION KUNIT_ASSERT_EQ_MSG => KUNIT_BINARY_EQ_MSG_ASSERTION After this change: KUNIT_EXPECT_EQ => KUNIT_EXPECT_EQ_MSG => KUNIT_BINARY_EQ_MSG_ASSERTION KUNIT_ASSERT_EQ => KUNIT_ASSERT_EQ_MSG => KUNIT_BINARY_EQ_MSG_ASSERTION and we can drop the intermediate KUNIT_BINARY_EQ_ASSERTION. This change does this for all the other macros as well. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/test.h | 168 ++++++++------------------------------------------- 1 file changed, 26 insertions(+), 142 deletions(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index 5964af750d93..b032dd6816d2 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -840,9 +840,6 @@ void kunit_do_failed_assertion(struct kunit *test, fmt, \ ##__VA_ARGS__) -#define KUNIT_TRUE_ASSERTION(test, assert_type, condition) \ - KUNIT_TRUE_MSG_ASSERTION(test, assert_type, condition, NULL) - #define KUNIT_FALSE_MSG_ASSERTION(test, assert_type, condition, fmt, ...) \ KUNIT_UNARY_ASSERTION(test, \ assert_type, \ @@ -851,9 +848,6 @@ void kunit_do_failed_assertion(struct kunit *test, fmt, \ ##__VA_ARGS__) -#define KUNIT_FALSE_ASSERTION(test, assert_type, condition) \ - KUNIT_FALSE_MSG_ASSERTION(test, assert_type, condition, NULL) - /* * A factory macro for defining the assertions and expectations for the basic * comparisons defined for the built in types. @@ -1000,13 +994,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_EQ_ASSERTION(test, assert_type, left, right) \ - KUNIT_BINARY_EQ_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - NULL) - #define KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \ assert_type, \ left, \ @@ -1022,13 +1009,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_PTR_EQ_ASSERTION(test, assert_type, left, right) \ - KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - NULL) - #define KUNIT_BINARY_NE_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ KUNIT_BASE_NE_MSG_ASSERTION(test, \ kunit_binary_assert, \ @@ -1039,13 +1019,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_NE_ASSERTION(test, assert_type, left, right) \ - KUNIT_BINARY_NE_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - NULL) - #define KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \ assert_type, \ left, \ @@ -1061,13 +1034,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_PTR_NE_ASSERTION(test, assert_type, left, right) \ - KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - NULL) - #define KUNIT_BINARY_LT_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ KUNIT_BASE_LT_MSG_ASSERTION(test, \ kunit_binary_assert, \ @@ -1078,13 +1044,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_LT_ASSERTION(test, assert_type, left, right) \ - KUNIT_BINARY_LT_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - NULL) - #define KUNIT_BINARY_PTR_LT_MSG_ASSERTION(test, \ assert_type, \ left, \ @@ -1100,13 +1059,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_PTR_LT_ASSERTION(test, assert_type, left, right) \ - KUNIT_BINARY_PTR_LT_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - NULL) - #define KUNIT_BINARY_LE_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ KUNIT_BASE_LE_MSG_ASSERTION(test, \ kunit_binary_assert, \ @@ -1117,13 +1069,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_LE_ASSERTION(test, assert_type, left, right) \ - KUNIT_BINARY_LE_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - NULL) - #define KUNIT_BINARY_PTR_LE_MSG_ASSERTION(test, \ assert_type, \ left, \ @@ -1139,13 +1084,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_PTR_LE_ASSERTION(test, assert_type, left, right) \ - KUNIT_BINARY_PTR_LE_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - NULL) - #define KUNIT_BINARY_GT_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ KUNIT_BASE_GT_MSG_ASSERTION(test, \ kunit_binary_assert, \ @@ -1156,13 +1094,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_GT_ASSERTION(test, assert_type, left, right) \ - KUNIT_BINARY_GT_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - NULL) - #define KUNIT_BINARY_PTR_GT_MSG_ASSERTION(test, \ assert_type, \ left, \ @@ -1178,13 +1109,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_PTR_GT_ASSERTION(test, assert_type, left, right) \ - KUNIT_BINARY_PTR_GT_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - NULL) - #define KUNIT_BINARY_GE_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ KUNIT_BASE_GE_MSG_ASSERTION(test, \ kunit_binary_assert, \ @@ -1195,13 +1119,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_GE_ASSERTION(test, assert_type, left, right) \ - KUNIT_BINARY_GE_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - NULL) - #define KUNIT_BINARY_PTR_GE_MSG_ASSERTION(test, \ assert_type, \ left, \ @@ -1217,13 +1134,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_PTR_GE_ASSERTION(test, assert_type, left, right) \ - KUNIT_BINARY_PTR_GE_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - NULL) - #define KUNIT_BINARY_STR_ASSERTION(test, \ assert_type, \ left, \ @@ -1260,13 +1170,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_STR_EQ_ASSERTION(test, assert_type, left, right) \ - KUNIT_BINARY_STR_EQ_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - NULL) - #define KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \ assert_type, \ left, \ @@ -1279,13 +1182,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_STR_NE_ASSERTION(test, assert_type, left, right) \ - KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - NULL) - #define KUNIT_PTR_NOT_ERR_OR_NULL_MSG_ASSERTION(test, \ assert_type, \ ptr, \ @@ -1304,12 +1200,6 @@ do { \ ##__VA_ARGS__); \ } while (0) -#define KUNIT_PTR_NOT_ERR_OR_NULL_ASSERTION(test, assert_type, ptr) \ - KUNIT_PTR_NOT_ERR_OR_NULL_MSG_ASSERTION(test, \ - assert_type, \ - ptr, \ - NULL) - /** * KUNIT_EXPECT_TRUE() - Causes a test failure when the expression is not true. * @test: The test context object. @@ -1322,7 +1212,7 @@ do { \ * *expectation failure*. */ #define KUNIT_EXPECT_TRUE(test, condition) \ - KUNIT_TRUE_ASSERTION(test, KUNIT_EXPECTATION, condition) + KUNIT_EXPECT_TRUE_MSG(test, condition, NULL) #define KUNIT_EXPECT_TRUE_MSG(test, condition, fmt, ...) \ KUNIT_TRUE_MSG_ASSERTION(test, \ @@ -1341,7 +1231,7 @@ do { \ * KUNIT_EXPECT_TRUE() for more information. */ #define KUNIT_EXPECT_FALSE(test, condition) \ - KUNIT_FALSE_ASSERTION(test, KUNIT_EXPECTATION, condition) + KUNIT_EXPECT_FALSE_MSG(test, condition, NULL) #define KUNIT_EXPECT_FALSE_MSG(test, condition, fmt, ...) \ KUNIT_FALSE_MSG_ASSERTION(test, \ @@ -1362,7 +1252,7 @@ do { \ * more information. */ #define KUNIT_EXPECT_EQ(test, left, right) \ - KUNIT_BINARY_EQ_ASSERTION(test, KUNIT_EXPECTATION, left, right) + KUNIT_EXPECT_EQ_MSG(test, left, right, NULL) #define KUNIT_EXPECT_EQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_EQ_MSG_ASSERTION(test, \ @@ -1384,10 +1274,7 @@ do { \ * more information. */ #define KUNIT_EXPECT_PTR_EQ(test, left, right) \ - KUNIT_BINARY_PTR_EQ_ASSERTION(test, \ - KUNIT_EXPECTATION, \ - left, \ - right) + KUNIT_EXPECT_PTR_EQ_MSG(test, left, right, NULL) #define KUNIT_EXPECT_PTR_EQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \ @@ -1409,7 +1296,7 @@ do { \ * more information. */ #define KUNIT_EXPECT_NE(test, left, right) \ - KUNIT_BINARY_NE_ASSERTION(test, KUNIT_EXPECTATION, left, right) + KUNIT_EXPECT_NE_MSG(test, left, right, NULL) #define KUNIT_EXPECT_NE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_NE_MSG_ASSERTION(test, \ @@ -1431,10 +1318,7 @@ do { \ * more information. */ #define KUNIT_EXPECT_PTR_NE(test, left, right) \ - KUNIT_BINARY_PTR_NE_ASSERTION(test, \ - KUNIT_EXPECTATION, \ - left, \ - right) + KUNIT_EXPECT_PTR_NE_MSG(test, left, right, NULL) #define KUNIT_EXPECT_PTR_NE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \ @@ -1456,7 +1340,7 @@ do { \ * more information. */ #define KUNIT_EXPECT_LT(test, left, right) \ - KUNIT_BINARY_LT_ASSERTION(test, KUNIT_EXPECTATION, left, right) + KUNIT_EXPECT_LT_MSG(test, left, right, NULL) #define KUNIT_EXPECT_LT_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_LT_MSG_ASSERTION(test, \ @@ -1478,7 +1362,7 @@ do { \ * more information. */ #define KUNIT_EXPECT_LE(test, left, right) \ - KUNIT_BINARY_LE_ASSERTION(test, KUNIT_EXPECTATION, left, right) + KUNIT_EXPECT_LE_MSG(test, left, right, NULL) #define KUNIT_EXPECT_LE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_LE_MSG_ASSERTION(test, \ @@ -1500,7 +1384,7 @@ do { \ * more information. */ #define KUNIT_EXPECT_GT(test, left, right) \ - KUNIT_BINARY_GT_ASSERTION(test, KUNIT_EXPECTATION, left, right) + KUNIT_EXPECT_GT_MSG(test, left, right, NULL) #define KUNIT_EXPECT_GT_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_GT_MSG_ASSERTION(test, \ @@ -1522,7 +1406,7 @@ do { \ * more information. */ #define KUNIT_EXPECT_GE(test, left, right) \ - KUNIT_BINARY_GE_ASSERTION(test, KUNIT_EXPECTATION, left, right) + KUNIT_EXPECT_GE_MSG(test, left, right, NULL) #define KUNIT_EXPECT_GE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_GE_MSG_ASSERTION(test, \ @@ -1544,7 +1428,7 @@ do { \ * for more information. */ #define KUNIT_EXPECT_STREQ(test, left, right) \ - KUNIT_BINARY_STR_EQ_ASSERTION(test, KUNIT_EXPECTATION, left, right) + KUNIT_EXPECT_STREQ_MSG(test, left, right, NULL) #define KUNIT_EXPECT_STREQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_STR_EQ_MSG_ASSERTION(test, \ @@ -1566,7 +1450,7 @@ do { \ * for more information. */ #define KUNIT_EXPECT_STRNEQ(test, left, right) \ - KUNIT_BINARY_STR_NE_ASSERTION(test, KUNIT_EXPECTATION, left, right) + KUNIT_EXPECT_STRNEQ_MSG(test, left, right, NULL) #define KUNIT_EXPECT_STRNEQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \ @@ -1587,7 +1471,7 @@ do { \ * more information. */ #define KUNIT_EXPECT_NOT_ERR_OR_NULL(test, ptr) \ - KUNIT_PTR_NOT_ERR_OR_NULL_ASSERTION(test, KUNIT_EXPECTATION, ptr) + KUNIT_EXPECT_NOT_ERR_OR_NULL_MSG(test, ptr, NULL) #define KUNIT_EXPECT_NOT_ERR_OR_NULL_MSG(test, ptr, fmt, ...) \ KUNIT_PTR_NOT_ERR_OR_NULL_MSG_ASSERTION(test, \ @@ -1611,7 +1495,7 @@ do { \ * this is otherwise known as an *assertion failure*. */ #define KUNIT_ASSERT_TRUE(test, condition) \ - KUNIT_TRUE_ASSERTION(test, KUNIT_ASSERTION, condition) + KUNIT_ASSERT_TRUE_MSG(test, condition, NULL) #define KUNIT_ASSERT_TRUE_MSG(test, condition, fmt, ...) \ KUNIT_TRUE_MSG_ASSERTION(test, \ @@ -1630,7 +1514,7 @@ do { \ * (see KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_FALSE(test, condition) \ - KUNIT_FALSE_ASSERTION(test, KUNIT_ASSERTION, condition) + KUNIT_ASSERT_FALSE_MSG(test, condition, NULL) #define KUNIT_ASSERT_FALSE_MSG(test, condition, fmt, ...) \ KUNIT_FALSE_MSG_ASSERTION(test, \ @@ -1650,7 +1534,7 @@ do { \ * failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_EQ(test, left, right) \ - KUNIT_BINARY_EQ_ASSERTION(test, KUNIT_ASSERTION, left, right) + KUNIT_ASSERT_EQ_MSG(test, left, right, NULL) #define KUNIT_ASSERT_EQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_EQ_MSG_ASSERTION(test, \ @@ -1671,7 +1555,7 @@ do { \ * failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_PTR_EQ(test, left, right) \ - KUNIT_BINARY_PTR_EQ_ASSERTION(test, KUNIT_ASSERTION, left, right) + KUNIT_ASSERT_PTR_EQ_MSG(test, left, right, NULL) #define KUNIT_ASSERT_PTR_EQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \ @@ -1692,7 +1576,7 @@ do { \ * failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_NE(test, left, right) \ - KUNIT_BINARY_NE_ASSERTION(test, KUNIT_ASSERTION, left, right) + KUNIT_ASSERT_NE_MSG(test, left, right, NULL) #define KUNIT_ASSERT_NE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_NE_MSG_ASSERTION(test, \ @@ -1714,7 +1598,7 @@ do { \ * failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_PTR_NE(test, left, right) \ - KUNIT_BINARY_PTR_NE_ASSERTION(test, KUNIT_ASSERTION, left, right) + KUNIT_ASSERT_PTR_NE_MSG(test, left, right, NULL) #define KUNIT_ASSERT_PTR_NE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \ @@ -1735,7 +1619,7 @@ do { \ * is not met. */ #define KUNIT_ASSERT_LT(test, left, right) \ - KUNIT_BINARY_LT_ASSERTION(test, KUNIT_ASSERTION, left, right) + KUNIT_ASSERT_LT_MSG(test, left, right, NULL) #define KUNIT_ASSERT_LT_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_LT_MSG_ASSERTION(test, \ @@ -1756,7 +1640,7 @@ do { \ * KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_LE(test, left, right) \ - KUNIT_BINARY_LE_ASSERTION(test, KUNIT_ASSERTION, left, right) + KUNIT_ASSERT_LE_MSG(test, left, right, NULL) #define KUNIT_ASSERT_LE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_LE_MSG_ASSERTION(test, \ @@ -1778,7 +1662,7 @@ do { \ * is not met. */ #define KUNIT_ASSERT_GT(test, left, right) \ - KUNIT_BINARY_GT_ASSERTION(test, KUNIT_ASSERTION, left, right) + KUNIT_ASSERT_GT_MSG(test, left, right, NULL) #define KUNIT_ASSERT_GT_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_GT_MSG_ASSERTION(test, \ @@ -1800,7 +1684,7 @@ do { \ * is not met. */ #define KUNIT_ASSERT_GE(test, left, right) \ - KUNIT_BINARY_GE_ASSERTION(test, KUNIT_ASSERTION, left, right) + KUNIT_ASSERT_GE_MSG(test, left, right, NULL) #define KUNIT_ASSERT_GE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_GE_MSG_ASSERTION(test, \ @@ -1821,7 +1705,7 @@ do { \ * assertion failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_STREQ(test, left, right) \ - KUNIT_BINARY_STR_EQ_ASSERTION(test, KUNIT_ASSERTION, left, right) + KUNIT_ASSERT_STREQ_MSG(test, left, right, NULL) #define KUNIT_ASSERT_STREQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_STR_EQ_MSG_ASSERTION(test, \ @@ -1843,7 +1727,7 @@ do { \ * for more information. */ #define KUNIT_ASSERT_STRNEQ(test, left, right) \ - KUNIT_BINARY_STR_NE_ASSERTION(test, KUNIT_ASSERTION, left, right) + KUNIT_ASSERT_STRNEQ_MSG(test, left, right, NULL) #define KUNIT_ASSERT_STRNEQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \ @@ -1864,7 +1748,7 @@ do { \ * KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr) \ - KUNIT_PTR_NOT_ERR_OR_NULL_ASSERTION(test, KUNIT_ASSERTION, ptr) + KUNIT_ASSERT_NOT_ERR_OR_NULL_MSG(test, ptr, NULL) #define KUNIT_ASSERT_NOT_ERR_OR_NULL_MSG(test, ptr, fmt, ...) \ KUNIT_PTR_NOT_ERR_OR_NULL_MSG_ASSERTION(test, \ -- cgit v1.2.3 From c5855907d388321c1089023489e49ba9a5e9afc7 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Tue, 18 Jan 2022 14:35:03 -0800 Subject: kunit: drop unused intermediate macros for ptr inequality checks We have the intermediate macros for KUNIT_EXPECT_PTR_GT() and friends, but these macros don't exist. I can see niche usecases for these macros existing, but since we've been fine without them for so long, let's drop this dead code. Users can instead cast the pointers and use the other GT/LT macros. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/test.h | 60 ---------------------------------------------------- 1 file changed, 60 deletions(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index b032dd6816d2..c021945a75e3 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -1044,21 +1044,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_PTR_LT_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ...) \ - KUNIT_BASE_LT_MSG_ASSERTION(test, \ - kunit_binary_ptr_assert, \ - KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) - #define KUNIT_BINARY_LE_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ KUNIT_BASE_LE_MSG_ASSERTION(test, \ kunit_binary_assert, \ @@ -1069,21 +1054,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_PTR_LE_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ...) \ - KUNIT_BASE_LE_MSG_ASSERTION(test, \ - kunit_binary_ptr_assert, \ - KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) - #define KUNIT_BINARY_GT_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ KUNIT_BASE_GT_MSG_ASSERTION(test, \ kunit_binary_assert, \ @@ -1094,21 +1064,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_PTR_GT_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ...) \ - KUNIT_BASE_GT_MSG_ASSERTION(test, \ - kunit_binary_ptr_assert, \ - KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) - #define KUNIT_BINARY_GE_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ KUNIT_BASE_GE_MSG_ASSERTION(test, \ kunit_binary_assert, \ @@ -1119,21 +1074,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_PTR_GE_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ...) \ - KUNIT_BASE_GE_MSG_ASSERTION(test, \ - kunit_binary_ptr_assert, \ - KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) - #define KUNIT_BINARY_STR_ASSERTION(test, \ assert_type, \ left, \ -- cgit v1.2.3 From 955df7d85e58b8090f1fd2d10b4b2713e99b552c Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Tue, 18 Jan 2022 14:35:04 -0800 Subject: kunit: reduce layering in string assertion macros The current macro chain looks like: KUNIT_EXPECT_STREQ => KUNIT_EXPECT_STREQ_MSG => KUNIT_BINARY_STR_EQ_MSG_ASSERTION => KUNIT_BINARY_STR_ASSERTION. KUNIT_ASSERT_STREQ => KUNIT_ASSERT_STREQ_MSG => KUNIT_BINARY_STR_EQ_MSG_ASSERTION => KUNIT_BINARY_STR_ASSERTION. After this change: KUNIT_EXPECT_STREQ => KUNIT_EXPECT_STREQ_MSG => KUNIT_BINARY_STR_ASSERTION. KUNIT_ASSERT_STREQ => KUNIT_ASSERT_STREQ_MSG => KUNIT_BINARY_STR_ASSERTION. All the intermediate macro did was pass in "==" or "!=", so it seems better to just drop them at the cost of a bit more copy-paste. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/test.h | 68 ++++++++++++++++------------------------------------ 1 file changed, 20 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index c021945a75e3..d5dc1ef68bfe 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -1098,30 +1098,6 @@ do { \ ##__VA_ARGS__); \ } while (0) -#define KUNIT_BINARY_STR_EQ_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ...) \ - KUNIT_BINARY_STR_ASSERTION(test, \ - assert_type, \ - left, ==, right, \ - fmt, \ - ##__VA_ARGS__) - -#define KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ...) \ - KUNIT_BINARY_STR_ASSERTION(test, \ - assert_type, \ - left, !=, right, \ - fmt, \ - ##__VA_ARGS__) - #define KUNIT_PTR_NOT_ERR_OR_NULL_MSG_ASSERTION(test, \ assert_type, \ ptr, \ @@ -1371,12 +1347,11 @@ do { \ KUNIT_EXPECT_STREQ_MSG(test, left, right, NULL) #define KUNIT_EXPECT_STREQ_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_STR_EQ_MSG_ASSERTION(test, \ - KUNIT_EXPECTATION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_STR_ASSERTION(test, \ + KUNIT_EXPECTATION, \ + left, ==, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_EXPECT_STRNEQ() - Expects that strings @left and @right are not equal. @@ -1393,12 +1368,11 @@ do { \ KUNIT_EXPECT_STRNEQ_MSG(test, left, right, NULL) #define KUNIT_EXPECT_STRNEQ_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \ - KUNIT_EXPECTATION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_STR_ASSERTION(test, \ + KUNIT_EXPECTATION, \ + left, !=, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_EXPECT_NOT_ERR_OR_NULL() - Expects that @ptr is not null and not err. @@ -1648,12 +1622,11 @@ do { \ KUNIT_ASSERT_STREQ_MSG(test, left, right, NULL) #define KUNIT_ASSERT_STREQ_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_STR_EQ_MSG_ASSERTION(test, \ - KUNIT_ASSERTION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_STR_ASSERTION(test, \ + KUNIT_ASSERTION, \ + left, ==, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_ASSERT_STRNEQ() - Expects that strings @left and @right are not equal. @@ -1670,12 +1643,11 @@ do { \ KUNIT_ASSERT_STRNEQ_MSG(test, left, right, NULL) #define KUNIT_ASSERT_STRNEQ_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \ - KUNIT_ASSERTION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_STR_ASSERTION(test, \ + KUNIT_ASSERTION, \ + left, !=, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_ASSERT_NOT_ERR_OR_NULL() - Assertion that @ptr is not null and not err. -- cgit v1.2.3 From 40f39777ce4f8e65ca16c10f1b895bbe8306a42f Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Tue, 18 Jan 2022 14:35:05 -0800 Subject: kunit: decrease macro layering for integer asserts Introduce a KUNIT_BINARY_INT_ASSERTION for the likes of KUNIT_EXPECT_LT. This is analagous to KUNIT_BINARY_STR_ASSERTION. Note: this patch leaves the EQ/NE macros untouched since those share some intermediate macros for the pointer-based macros. The current macro chain looks like: KUNIT_EXPECT_LT_MSG => KUNIT_BASE_LT_MSG_ASSERTION => KUNIT_BASE_BINARY_ASSERTION KUNIT_EXPECT_GT_MSG => KUNIT_BASE_GT_MSG_ASSERTION => KUNIT_BASE_BINARY_ASSERTION After this change: KUNIT_EXPECT_LT_MSG => KUNIT_BINARY_INT_ASSERTION => KUNIT_BASE_BINARY_ASSERTION KUNIT_EXPECT_GT_MSG => KUNIT_BINARY_INT_ASSERTION => KUNIT_BASE_BINARY_ASSERTION I.e. we've traded all the unique intermediary macros for a single shared KUNIT_BINARY_INT_ASSERTION. The only difference is that users of KUNIT_BINARY_INT_ASSERTION also need to pass the operation (==, <, etc.). Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/test.h | 199 +++++++++++++-------------------------------------- 1 file changed, 51 insertions(+), 148 deletions(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index d5dc1ef68bfe..48cf520b69ce 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -920,77 +920,28 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BASE_LT_MSG_ASSERTION(test, \ - assert_class, \ - ASSERT_CLASS_INIT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ...) \ - KUNIT_BASE_BINARY_ASSERTION(test, \ - assert_class, \ - ASSERT_CLASS_INIT, \ - assert_type, \ - left, <, right, \ - fmt, \ - ##__VA_ARGS__) - -#define KUNIT_BASE_LE_MSG_ASSERTION(test, \ - assert_class, \ - ASSERT_CLASS_INIT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ...) \ - KUNIT_BASE_BINARY_ASSERTION(test, \ - assert_class, \ - ASSERT_CLASS_INIT, \ - assert_type, \ - left, <=, right, \ - fmt, \ - ##__VA_ARGS__) - -#define KUNIT_BASE_GT_MSG_ASSERTION(test, \ - assert_class, \ - ASSERT_CLASS_INIT, \ +#define KUNIT_BINARY_EQ_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ + KUNIT_BASE_EQ_MSG_ASSERTION(test, \ + kunit_binary_assert, \ + KUNIT_INIT_BINARY_ASSERT_STRUCT, \ assert_type, \ left, \ right, \ fmt, \ - ...) \ - KUNIT_BASE_BINARY_ASSERTION(test, \ - assert_class, \ - ASSERT_CLASS_INIT, \ - assert_type, \ - left, >, right, \ - fmt, \ ##__VA_ARGS__) -#define KUNIT_BASE_GE_MSG_ASSERTION(test, \ - assert_class, \ - ASSERT_CLASS_INIT, \ - assert_type, \ - left, \ - right, \ - fmt, \ +#define KUNIT_BINARY_INT_ASSERTION(test, \ + assert_type, \ + left, \ + op, \ + right, \ + fmt, \ ...) \ KUNIT_BASE_BINARY_ASSERTION(test, \ - assert_class, \ - ASSERT_CLASS_INIT, \ - assert_type, \ - left, >=, right, \ - fmt, \ - ##__VA_ARGS__) - -#define KUNIT_BINARY_EQ_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ - KUNIT_BASE_EQ_MSG_ASSERTION(test, \ kunit_binary_assert, \ KUNIT_INIT_BINARY_ASSERT_STRUCT, \ assert_type, \ - left, \ - right, \ + left, op, right, \ fmt, \ ##__VA_ARGS__) @@ -1034,46 +985,6 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_LT_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ - KUNIT_BASE_LT_MSG_ASSERTION(test, \ - kunit_binary_assert, \ - KUNIT_INIT_BINARY_ASSERT_STRUCT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) - -#define KUNIT_BINARY_LE_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ - KUNIT_BASE_LE_MSG_ASSERTION(test, \ - kunit_binary_assert, \ - KUNIT_INIT_BINARY_ASSERT_STRUCT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) - -#define KUNIT_BINARY_GT_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ - KUNIT_BASE_GT_MSG_ASSERTION(test, \ - kunit_binary_assert, \ - KUNIT_INIT_BINARY_ASSERT_STRUCT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) - -#define KUNIT_BINARY_GE_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ - KUNIT_BASE_GE_MSG_ASSERTION(test, \ - kunit_binary_assert, \ - KUNIT_INIT_BINARY_ASSERT_STRUCT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) - #define KUNIT_BINARY_STR_ASSERTION(test, \ assert_type, \ left, \ @@ -1259,12 +1170,11 @@ do { \ KUNIT_EXPECT_LT_MSG(test, left, right, NULL) #define KUNIT_EXPECT_LT_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_LT_MSG_ASSERTION(test, \ - KUNIT_EXPECTATION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_INT_ASSERTION(test, \ + KUNIT_EXPECTATION, \ + left, <, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_EXPECT_LE() - Expects that @left is less than or equal to @right. @@ -1281,12 +1191,11 @@ do { \ KUNIT_EXPECT_LE_MSG(test, left, right, NULL) #define KUNIT_EXPECT_LE_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_LE_MSG_ASSERTION(test, \ - KUNIT_EXPECTATION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_INT_ASSERTION(test, \ + KUNIT_ASSERTION, \ + left, <=, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_EXPECT_GT() - An expectation that @left is greater than @right. @@ -1303,12 +1212,11 @@ do { \ KUNIT_EXPECT_GT_MSG(test, left, right, NULL) #define KUNIT_EXPECT_GT_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_GT_MSG_ASSERTION(test, \ - KUNIT_EXPECTATION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_INT_ASSERTION(test, \ + KUNIT_EXPECTATION, \ + left, >, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_EXPECT_GE() - Expects that @left is greater than or equal to @right. @@ -1325,12 +1233,11 @@ do { \ KUNIT_EXPECT_GE_MSG(test, left, right, NULL) #define KUNIT_EXPECT_GE_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_GE_MSG_ASSERTION(test, \ - KUNIT_EXPECTATION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_INT_ASSERTION(test, \ + KUNIT_EXPECTATION, \ + left, >=, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_EXPECT_STREQ() - Expects that strings @left and @right are equal. @@ -1536,12 +1443,11 @@ do { \ KUNIT_ASSERT_LT_MSG(test, left, right, NULL) #define KUNIT_ASSERT_LT_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_LT_MSG_ASSERTION(test, \ - KUNIT_ASSERTION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_INT_ASSERTION(test, \ + KUNIT_EXPECTATION, \ + left, <, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_ASSERT_LE() - An assertion that @left is less than or equal to @right. * @test: The test context object. @@ -1557,12 +1463,11 @@ do { \ KUNIT_ASSERT_LE_MSG(test, left, right, NULL) #define KUNIT_ASSERT_LE_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_LE_MSG_ASSERTION(test, \ - KUNIT_ASSERTION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_INT_ASSERTION(test, \ + KUNIT_ASSERTION, \ + left, <=, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_ASSERT_GT() - An assertion that @left is greater than @right. @@ -1579,12 +1484,11 @@ do { \ KUNIT_ASSERT_GT_MSG(test, left, right, NULL) #define KUNIT_ASSERT_GT_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_GT_MSG_ASSERTION(test, \ - KUNIT_ASSERTION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_INT_ASSERTION(test, \ + KUNIT_EXPECTATION, \ + left, >, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_ASSERT_GE() - Assertion that @left is greater than or equal to @right. @@ -1601,12 +1505,11 @@ do { \ KUNIT_ASSERT_GE_MSG(test, left, right, NULL) #define KUNIT_ASSERT_GE_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_GE_MSG_ASSERTION(test, \ - KUNIT_ASSERTION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_INT_ASSERTION(test, \ + KUNIT_ASSERTION, \ + left, >=, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_ASSERT_STREQ() - An assertion that strings @left and @right are equal. -- cgit v1.2.3 From 6125a5c70acddd9fc1fb7329047a254c74d0173c Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Tue, 18 Jan 2022 14:35:06 -0800 Subject: kunit: decrease macro layering for EQ/NE asserts Introduce KUNIT_BINARY_PTR_ASSERTION to match KUNIT_BINARY_INT_ASSERTION and make KUNIT_EXPECT_EQ and KUNIT_EXPECT_PTREQ use these instead of shared intermediate macros that only remove the need to type "==" or "!=". The current macro chain looks like: KUNIT_EXPECT_EQ_MSG => KUNIT_BINARY_EQ_MSG_ASSERTION => KUNIT_BASE_EQ_MSG_ASSERTION => KUNIT_BASE_BINARY_ASSERTION KUNIT_EXPECT_PTR_EQ_MSG => KUNIT_BINARY_PTR_EQ_MSG_ASSERTION => KUNIT_BASE_EQ_MSG_ASSERTION => KUNIT_BASE_BINARY_ASSERTION After this change: KUNIT_EXPECT_EQ_MSG => KUNIT_BINARY_INT_ASSERTION => KUNIT_BASE_BINARY_ASSERTION KUNIT_EXPECT_PTR_EQ_MSG => KUNIT_BINARY_PTR_ASSERTION => KUNIT_BASE_BINARY_ASSERTION Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/test.h | 173 +++++++++++++++------------------------------------ 1 file changed, 49 insertions(+), 124 deletions(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index 48cf520b69ce..bf82c313223b 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -888,48 +888,6 @@ do { \ ##__VA_ARGS__); \ } while (0) -#define KUNIT_BASE_EQ_MSG_ASSERTION(test, \ - assert_class, \ - ASSERT_CLASS_INIT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ...) \ - KUNIT_BASE_BINARY_ASSERTION(test, \ - assert_class, \ - ASSERT_CLASS_INIT, \ - assert_type, \ - left, ==, right, \ - fmt, \ - ##__VA_ARGS__) - -#define KUNIT_BASE_NE_MSG_ASSERTION(test, \ - assert_class, \ - ASSERT_CLASS_INIT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ...) \ - KUNIT_BASE_BINARY_ASSERTION(test, \ - assert_class, \ - ASSERT_CLASS_INIT, \ - assert_type, \ - left, !=, right, \ - fmt, \ - ##__VA_ARGS__) - -#define KUNIT_BINARY_EQ_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ - KUNIT_BASE_EQ_MSG_ASSERTION(test, \ - kunit_binary_assert, \ - KUNIT_INIT_BINARY_ASSERT_STRUCT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) - #define KUNIT_BINARY_INT_ASSERTION(test, \ assert_type, \ left, \ @@ -945,43 +903,18 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ...) \ - KUNIT_BASE_EQ_MSG_ASSERTION(test, \ - kunit_binary_ptr_assert, \ - KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) - -#define KUNIT_BINARY_NE_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ - KUNIT_BASE_NE_MSG_ASSERTION(test, \ - kunit_binary_assert, \ - KUNIT_INIT_BINARY_ASSERT_STRUCT, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) - -#define KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \ - assert_type, \ - left, \ - right, \ - fmt, \ - ...) \ - KUNIT_BASE_NE_MSG_ASSERTION(test, \ +#define KUNIT_BINARY_PTR_ASSERTION(test, \ + assert_type, \ + left, \ + op, \ + right, \ + fmt, \ + ...) \ + KUNIT_BASE_BINARY_ASSERTION(test, \ kunit_binary_ptr_assert, \ KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \ assert_type, \ - left, \ - right, \ + left, op, right, \ fmt, \ ##__VA_ARGS__) @@ -1082,12 +1015,11 @@ do { \ KUNIT_EXPECT_EQ_MSG(test, left, right, NULL) #define KUNIT_EXPECT_EQ_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_EQ_MSG_ASSERTION(test, \ - KUNIT_EXPECTATION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_INT_ASSERTION(test, \ + KUNIT_EXPECTATION, \ + left, ==, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_EXPECT_PTR_EQ() - Expects that pointers @left and @right are equal. @@ -1104,12 +1036,11 @@ do { \ KUNIT_EXPECT_PTR_EQ_MSG(test, left, right, NULL) #define KUNIT_EXPECT_PTR_EQ_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \ - KUNIT_EXPECTATION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_PTR_ASSERTION(test, \ + KUNIT_EXPECTATION, \ + left, ==, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_EXPECT_NE() - An expectation that @left and @right are not equal. @@ -1126,12 +1057,11 @@ do { \ KUNIT_EXPECT_NE_MSG(test, left, right, NULL) #define KUNIT_EXPECT_NE_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_NE_MSG_ASSERTION(test, \ - KUNIT_EXPECTATION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_INT_ASSERTION(test, \ + KUNIT_EXPECTATION, \ + left, !=, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_EXPECT_PTR_NE() - Expects that pointers @left and @right are not equal. @@ -1148,12 +1078,11 @@ do { \ KUNIT_EXPECT_PTR_NE_MSG(test, left, right, NULL) #define KUNIT_EXPECT_PTR_NE_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \ - KUNIT_EXPECTATION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_PTR_ASSERTION(test, \ + KUNIT_EXPECTATION, \ + left, !=, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_EXPECT_LT() - An expectation that @left is less than @right. @@ -1358,12 +1287,11 @@ do { \ KUNIT_ASSERT_EQ_MSG(test, left, right, NULL) #define KUNIT_ASSERT_EQ_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_EQ_MSG_ASSERTION(test, \ - KUNIT_ASSERTION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_INT_ASSERTION(test, \ + KUNIT_ASSERTION, \ + left, ==, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_ASSERT_PTR_EQ() - Asserts that pointers @left and @right are equal. @@ -1379,12 +1307,11 @@ do { \ KUNIT_ASSERT_PTR_EQ_MSG(test, left, right, NULL) #define KUNIT_ASSERT_PTR_EQ_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \ - KUNIT_ASSERTION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_PTR_ASSERTION(test, \ + KUNIT_ASSERTION, \ + left, ==, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_ASSERT_NE() - An assertion that @left and @right are not equal. @@ -1400,12 +1327,11 @@ do { \ KUNIT_ASSERT_NE_MSG(test, left, right, NULL) #define KUNIT_ASSERT_NE_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_NE_MSG_ASSERTION(test, \ - KUNIT_ASSERTION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_INT_ASSERTION(test, \ + KUNIT_ASSERTION, \ + left, !=, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_ASSERT_PTR_NE() - Asserts that pointers @left and @right are not equal. @@ -1422,12 +1348,11 @@ do { \ KUNIT_ASSERT_PTR_NE_MSG(test, left, right, NULL) #define KUNIT_ASSERT_PTR_NE_MSG(test, left, right, fmt, ...) \ - KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \ - KUNIT_ASSERTION, \ - left, \ - right, \ - fmt, \ - ##__VA_ARGS__) + KUNIT_BINARY_PTR_ASSERTION(test, \ + KUNIT_ASSERTION, \ + left, !=, right, \ + fmt, \ + ##__VA_ARGS__) /** * KUNIT_ASSERT_LT() - An assertion that @left is less than @right. * @test: The test context object. -- cgit v1.2.3 From acb13ea0baf8db8d05a3910c06e997c90825faad Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 13 Jan 2022 16:53:56 +0100 Subject: asm-generic/bitops: Always inline all bit manipulation helpers Make it consistent with the atomic/atomic-instrumented.h helpers. And defconfig size is actually going down! text data bss dec hex filename 22352096 8213152 1917164 32482412 1efa46c vmlinux.x86-64.defconfig.before 22350551 8213184 1917164 32480899 1ef9e83 vmlinux.x86-64.defconfig.after Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Marco Elver Link: https://lore.kernel.org/r/20220113155357.4706-2-bp@alien8.de --- include/asm-generic/bitops/instrumented-atomic.h | 12 ++++++------ include/asm-generic/bitops/instrumented-non-atomic.h | 16 ++++++++-------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bitops/instrumented-atomic.h b/include/asm-generic/bitops/instrumented-atomic.h index c90192b1c755..4225a8ca9c1a 100644 --- a/include/asm-generic/bitops/instrumented-atomic.h +++ b/include/asm-generic/bitops/instrumented-atomic.h @@ -23,7 +23,7 @@ * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void set_bit(long nr, volatile unsigned long *addr) +static __always_inline void set_bit(long nr, volatile unsigned long *addr) { instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long)); arch_set_bit(nr, addr); @@ -36,7 +36,7 @@ static inline void set_bit(long nr, volatile unsigned long *addr) * * This is a relaxed atomic operation (no implied memory barriers). */ -static inline void clear_bit(long nr, volatile unsigned long *addr) +static __always_inline void clear_bit(long nr, volatile unsigned long *addr) { instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long)); arch_clear_bit(nr, addr); @@ -52,7 +52,7 @@ static inline void clear_bit(long nr, volatile unsigned long *addr) * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void change_bit(long nr, volatile unsigned long *addr) +static __always_inline void change_bit(long nr, volatile unsigned long *addr) { instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long)); arch_change_bit(nr, addr); @@ -65,7 +65,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr) * * This is an atomic fully-ordered operation (implied full memory barrier). */ -static inline bool test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { kcsan_mb(); instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); @@ -79,7 +79,7 @@ static inline bool test_and_set_bit(long nr, volatile unsigned long *addr) * * This is an atomic fully-ordered operation (implied full memory barrier). */ -static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { kcsan_mb(); instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); @@ -93,7 +93,7 @@ static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) * * This is an atomic fully-ordered operation (implied full memory barrier). */ -static inline bool test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { kcsan_mb(); instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); diff --git a/include/asm-generic/bitops/instrumented-non-atomic.h b/include/asm-generic/bitops/instrumented-non-atomic.h index 37363d570b9b..7ab1ecc37782 100644 --- a/include/asm-generic/bitops/instrumented-non-atomic.h +++ b/include/asm-generic/bitops/instrumented-non-atomic.h @@ -22,7 +22,7 @@ * region of memory concurrently, the effect may be that only one operation * succeeds. */ -static inline void __set_bit(long nr, volatile unsigned long *addr) +static __always_inline void __set_bit(long nr, volatile unsigned long *addr) { instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___set_bit(nr, addr); @@ -37,7 +37,7 @@ static inline void __set_bit(long nr, volatile unsigned long *addr) * region of memory concurrently, the effect may be that only one operation * succeeds. */ -static inline void __clear_bit(long nr, volatile unsigned long *addr) +static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) { instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___clear_bit(nr, addr); @@ -52,13 +52,13 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr) * region of memory concurrently, the effect may be that only one operation * succeeds. */ -static inline void __change_bit(long nr, volatile unsigned long *addr) +static __always_inline void __change_bit(long nr, volatile unsigned long *addr) { instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___change_bit(nr, addr); } -static inline void __instrument_read_write_bitop(long nr, volatile unsigned long *addr) +static __always_inline void __instrument_read_write_bitop(long nr, volatile unsigned long *addr) { if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC)) { /* @@ -90,7 +90,7 @@ static inline void __instrument_read_write_bitop(long nr, volatile unsigned long * This operation is non-atomic. If two instances of this operation race, one * can appear to succeed but actually fail. */ -static inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) { __instrument_read_write_bitop(nr, addr); return arch___test_and_set_bit(nr, addr); @@ -104,7 +104,7 @@ static inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) * This operation is non-atomic. If two instances of this operation race, one * can appear to succeed but actually fail. */ -static inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) { __instrument_read_write_bitop(nr, addr); return arch___test_and_clear_bit(nr, addr); @@ -118,7 +118,7 @@ static inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) * This operation is non-atomic. If two instances of this operation race, one * can appear to succeed but actually fail. */ -static inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) { __instrument_read_write_bitop(nr, addr); return arch___test_and_change_bit(nr, addr); @@ -129,7 +129,7 @@ static inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) * @nr: bit number to test * @addr: Address to start counting from */ -static inline bool test_bit(long nr, const volatile unsigned long *addr) +static __always_inline bool test_bit(long nr, const volatile unsigned long *addr) { instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); return arch_test_bit(nr, addr); -- cgit v1.2.3 From 1dc01abad6544cb9d884071b626b706e37aa9601 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 13 Jan 2022 16:53:57 +0100 Subject: cpumask: Always inline helpers which use bit manipulation functions Former are always inlined so do that for the latter too, for consistency. Size impact is a whopping 5 bytes increase! :-) text data bss dec hex filename 22350551 8213184 1917164 32480899 1ef9e83 vmlinux.x86-64.defconfig.before 22350556 8213152 1917164 32480872 1ef9e68 vmlinux.x86-64.defconfig.after Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Marco Elver Link: https://lore.kernel.org/r/20220113155357.4706-3-bp@alien8.de --- include/linux/cpumask.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 64dae70d31f5..6b06c698cd2a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -341,12 +341,12 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool * @cpu: cpu number (< nr_cpu_ids) * @dstp: the cpumask pointer */ -static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +static __always_inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } -static inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +static __always_inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { __set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } @@ -357,12 +357,12 @@ static inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) * @cpu: cpu number (< nr_cpu_ids) * @dstp: the cpumask pointer */ -static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp) +static __always_inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp) { clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); } -static inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) +static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) { __clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); } @@ -374,7 +374,7 @@ static inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) * * Returns 1 if @cpu is set in @cpumask, else returns 0 */ -static inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask) +static __always_inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask) { return test_bit(cpumask_check(cpu), cpumask_bits((cpumask))); } @@ -388,7 +388,7 @@ static inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask) * * test_and_set_bit wrapper for cpumasks. */ -static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) +static __always_inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } @@ -402,7 +402,7 @@ static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) * * test_and_clear_bit wrapper for cpumasks. */ -static inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) +static __always_inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) { return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } -- cgit v1.2.3 From 68643c3735102177139237fd6656085eac403b69 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Tue, 25 Jan 2022 10:33:29 +0100 Subject: dt-bindings: clock: cs2000-cp: document aux-output-source This new optional property can be used to control the function of the auxiliary output pin. Introduce a new dt-bindings include file that contains the numerical values. Signed-off-by: Daniel Mack Acked-by: Rob Herring Link: https://lore.kernel.org/r/20220125093336.226787-3-daniel@zonque.org Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/cirrus,cs2000-cp.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/dt-bindings/clock/cirrus,cs2000-cp.h (limited to 'include') diff --git a/include/dt-bindings/clock/cirrus,cs2000-cp.h b/include/dt-bindings/clock/cirrus,cs2000-cp.h new file mode 100644 index 000000000000..fe3ac71750a8 --- /dev/null +++ b/include/dt-bindings/clock/cirrus,cs2000-cp.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (C) 2021 Daniel Mack + */ + +#ifndef __DT_BINDINGS_CS2000CP_CLK_H +#define __DT_BINDINGS_CS2000CP_CLK_H + +#define CS2000CP_AUX_OUTPUT_REF_CLK 0 +#define CS2000CP_AUX_OUTPUT_CLK_IN 1 +#define CS2000CP_AUX_OUTPUT_CLK_OUT 2 +#define CS2000CP_AUX_OUTPUT_PLL_LOCK 3 + +#endif /* __DT_BINDINGS_CS2000CP_CLK_H */ -- cgit v1.2.3 From c1aca3080e382886e2e58e809787441984a2f89b Mon Sep 17 00:00:00 2001 From: Yan Yan Date: Tue, 18 Jan 2022 16:00:13 -0800 Subject: xfrm: Check if_id in xfrm_migrate This patch enables distinguishing SAs and SPs based on if_id during the xfrm_migrate flow. This ensures support for xfrm interfaces throughout the SA/SP lifecycle. When there are multiple existing SPs with the same direction, the same xfrm_selector and different endpoint addresses, xfrm_migrate might fail with ENODATA. Specifically, the code path for performing xfrm_migrate is: Stage 1: find policy to migrate with xfrm_migrate_policy_find(sel, dir, type, net) Stage 2: find and update state(s) with xfrm_migrate_state_find(mp, net) Stage 3: update endpoint address(es) of template(s) with xfrm_policy_migrate(pol, m, num_migrate) Currently "Stage 1" always returns the first xfrm_policy that matches, and "Stage 3" looks for the xfrm_tmpl that matches the old endpoint address. Thus if there are multiple xfrm_policy with same selector, direction, type and net, "Stage 1" might rertun a wrong xfrm_policy and "Stage 3" will fail with ENODATA because it cannot find a xfrm_tmpl with the matching endpoint address. The fix is to allow userspace to pass an if_id and add if_id to the matching rule in Stage 1 and Stage 2 since if_id is a unique ID for xfrm_policy and xfrm_state. For compatibility, if_id will only be checked if the attribute is set. Tested with additions to Android's kernel unit test suite: https://android-review.googlesource.com/c/kernel/tests/+/1668886 Signed-off-by: Yan Yan Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index fdb41e8bb626..743dd1da506e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1681,14 +1681,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap); -struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net); +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, + u32 if_id); struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m, struct xfrm_encap_tmpl *encap); int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k, struct net *net, - struct xfrm_encap_tmpl *encap); + struct xfrm_encap_tmpl *encap, u32 if_id); #endif int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); -- cgit v1.2.3 From d6dc675377261472adda696da456b4ebcc5bb9d9 Mon Sep 17 00:00:00 2001 From: Alim Akhtar Date: Mon, 24 Jan 2022 19:46:30 +0530 Subject: dt-bindings: clock: Add bindings definitions for FSD CMU blocks Clock controller driver of FSD platform is designed to have separate instances for each particular CMU. So clock IDs in this bindings header also start from 1 for each CMU block. Cc: linux-fsd@tesla.com Reported-by: kernel test robot [robot: reported missing #endif] Signed-off-by: Pankaj Dubey Signed-off-by: Alim Akhtar Reviewed-by: Krzysztof Kozlowski Acked-by: Stephen Boyd Acked-by: Sylwester Nawrocki Link: https://lore.kernel.org/r/20220124141644.71052-3-alim.akhtar@samsung.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/fsd-clk.h | 150 ++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 include/dt-bindings/clock/fsd-clk.h (limited to 'include') diff --git a/include/dt-bindings/clock/fsd-clk.h b/include/dt-bindings/clock/fsd-clk.h new file mode 100644 index 000000000000..c8a2af1dd1ad --- /dev/null +++ b/include/dt-bindings/clock/fsd-clk.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2017 - 2022: Samsung Electronics Co., Ltd. + * https://www.samsung.com + * Copyright (c) 2017-2022 Tesla, Inc. + * https://www.tesla.com + * + * The constants defined in this header are being used in dts + * and fsd platform driver. + */ + +#ifndef _DT_BINDINGS_CLOCK_FSD_H +#define _DT_BINDINGS_CLOCK_FSD_H + +/* CMU */ +#define DOUT_CMU_PLL_SHARED0_DIV4 1 +#define DOUT_CMU_PERIC_SHARED1DIV36 2 +#define DOUT_CMU_PERIC_SHARED0DIV3_TBUCLK 3 +#define DOUT_CMU_PERIC_SHARED0DIV20 4 +#define DOUT_CMU_PERIC_SHARED1DIV4_DMACLK 5 +#define DOUT_CMU_PLL_SHARED0_DIV6 6 +#define DOUT_CMU_FSYS0_SHARED1DIV4 7 +#define DOUT_CMU_FSYS0_SHARED0DIV4 8 +#define DOUT_CMU_FSYS1_SHARED0DIV8 9 +#define DOUT_CMU_FSYS1_SHARED0DIV4 10 +#define CMU_CPUCL_SWITCH_GATE 11 +#define DOUT_CMU_IMEM_TCUCLK 12 +#define DOUT_CMU_IMEM_ACLK 13 +#define DOUT_CMU_IMEM_DMACLK 14 +#define GAT_CMU_FSYS0_SHARED0DIV4 15 +#define CMU_NR_CLK 16 + +/* PERIC */ +#define PERIC_SCLK_UART0 1 +#define PERIC_PCLK_UART0 2 +#define PERIC_SCLK_UART1 3 +#define PERIC_PCLK_UART1 4 +#define PERIC_DMA0_IPCLKPORT_ACLK 5 +#define PERIC_DMA1_IPCLKPORT_ACLK 6 +#define PERIC_PWM0_IPCLKPORT_I_PCLK_S0 7 +#define PERIC_PWM1_IPCLKPORT_I_PCLK_S0 8 +#define PERIC_PCLK_SPI0 9 +#define PERIC_SCLK_SPI0 10 +#define PERIC_PCLK_SPI1 11 +#define PERIC_SCLK_SPI1 12 +#define PERIC_PCLK_SPI2 13 +#define PERIC_SCLK_SPI2 14 +#define PERIC_PCLK_TDM0 15 +#define PERIC_PCLK_HSI2C0 16 +#define PERIC_PCLK_HSI2C1 17 +#define PERIC_PCLK_HSI2C2 18 +#define PERIC_PCLK_HSI2C3 19 +#define PERIC_PCLK_HSI2C4 20 +#define PERIC_PCLK_HSI2C5 21 +#define PERIC_PCLK_HSI2C6 22 +#define PERIC_PCLK_HSI2C7 23 +#define PERIC_MCAN0_IPCLKPORT_CCLK 24 +#define PERIC_MCAN0_IPCLKPORT_PCLK 25 +#define PERIC_MCAN1_IPCLKPORT_CCLK 26 +#define PERIC_MCAN1_IPCLKPORT_PCLK 27 +#define PERIC_MCAN2_IPCLKPORT_CCLK 28 +#define PERIC_MCAN2_IPCLKPORT_PCLK 29 +#define PERIC_MCAN3_IPCLKPORT_CCLK 30 +#define PERIC_MCAN3_IPCLKPORT_PCLK 31 +#define PERIC_PCLK_ADCIF 32 +#define PERIC_EQOS_TOP_IPCLKPORT_CLK_PTP_REF_I 33 +#define PERIC_EQOS_TOP_IPCLKPORT_ACLK_I 34 +#define PERIC_EQOS_TOP_IPCLKPORT_HCLK_I 35 +#define PERIC_EQOS_TOP_IPCLKPORT_RGMII_CLK_I 36 +#define PERIC_EQOS_TOP_IPCLKPORT_CLK_RX_I 37 +#define PERIC_BUS_D_PERIC_IPCLKPORT_EQOSCLK 38 +#define PERIC_BUS_P_PERIC_IPCLKPORT_EQOSCLK 39 +#define PERIC_HCLK_TDM0 40 +#define PERIC_PCLK_TDM1 41 +#define PERIC_HCLK_TDM1 42 +#define PERIC_EQOS_PHYRXCLK_MUX 43 +#define PERIC_EQOS_PHYRXCLK 44 +#define PERIC_DOUT_RGMII_CLK 45 +#define PERIC_NR_CLK 46 + +/* FSYS0 */ +#define UFS0_MPHY_REFCLK_IXTAL24 1 +#define UFS0_MPHY_REFCLK_IXTAL26 2 +#define UFS1_MPHY_REFCLK_IXTAL24 3 +#define UFS1_MPHY_REFCLK_IXTAL26 4 +#define UFS0_TOP0_HCLK_BUS 5 +#define UFS0_TOP0_ACLK 6 +#define UFS0_TOP0_CLK_UNIPRO 7 +#define UFS0_TOP0_FMP_CLK 8 +#define UFS1_TOP1_HCLK_BUS 9 +#define UFS1_TOP1_ACLK 10 +#define UFS1_TOP1_CLK_UNIPRO 11 +#define UFS1_TOP1_FMP_CLK 12 +#define PCIE_SUBCTRL_INST0_DBI_ACLK_SOC 13 +#define PCIE_SUBCTRL_INST0_AUX_CLK_SOC 14 +#define PCIE_SUBCTRL_INST0_MSTR_ACLK_SOC 15 +#define PCIE_SUBCTRL_INST0_SLV_ACLK_SOC 16 +#define FSYS0_EQOS_TOP0_IPCLKPORT_CLK_PTP_REF_I 17 +#define FSYS0_EQOS_TOP0_IPCLKPORT_ACLK_I 18 +#define FSYS0_EQOS_TOP0_IPCLKPORT_HCLK_I 19 +#define FSYS0_EQOS_TOP0_IPCLKPORT_RGMII_CLK_I 20 +#define FSYS0_EQOS_TOP0_IPCLKPORT_CLK_RX_I 21 +#define FSYS0_DOUT_FSYS0_PERIBUS_GRP 22 +#define FSYS0_NR_CLK 23 + +/* FSYS1 */ +#define PCIE_LINK0_IPCLKPORT_DBI_ACLK 1 +#define PCIE_LINK0_IPCLKPORT_AUX_ACLK 2 +#define PCIE_LINK0_IPCLKPORT_MSTR_ACLK 3 +#define PCIE_LINK0_IPCLKPORT_SLV_ACLK 4 +#define PCIE_LINK1_IPCLKPORT_DBI_ACLK 5 +#define PCIE_LINK1_IPCLKPORT_AUX_ACLK 6 +#define PCIE_LINK1_IPCLKPORT_MSTR_ACLK 7 +#define PCIE_LINK1_IPCLKPORT_SLV_ACLK 8 +#define FSYS1_NR_CLK 9 + +/* IMEM */ +#define IMEM_DMA0_IPCLKPORT_ACLK 1 +#define IMEM_DMA1_IPCLKPORT_ACLK 2 +#define IMEM_WDT0_IPCLKPORT_PCLK 3 +#define IMEM_WDT1_IPCLKPORT_PCLK 4 +#define IMEM_WDT2_IPCLKPORT_PCLK 5 +#define IMEM_MCT_PCLK 6 +#define IMEM_TMU_CPU0_IPCLKPORT_I_CLK_TS 7 +#define IMEM_TMU_CPU2_IPCLKPORT_I_CLK_TS 8 +#define IMEM_TMU_TOP_IPCLKPORT_I_CLK_TS 9 +#define IMEM_TMU_GPU_IPCLKPORT_I_CLK_TS 10 +#define IMEM_TMU_GT_IPCLKPORT_I_CLK_TS 11 +#define IMEM_NR_CLK 12 + +/* MFC */ +#define MFC_MFC_IPCLKPORT_ACLK 1 +#define MFC_NR_CLK 2 + +/* CAM_CSI */ +#define CAM_CSI0_0_IPCLKPORT_I_ACLK 1 +#define CAM_CSI0_1_IPCLKPORT_I_ACLK 2 +#define CAM_CSI0_2_IPCLKPORT_I_ACLK 3 +#define CAM_CSI0_3_IPCLKPORT_I_ACLK 4 +#define CAM_CSI1_0_IPCLKPORT_I_ACLK 5 +#define CAM_CSI1_1_IPCLKPORT_I_ACLK 6 +#define CAM_CSI1_2_IPCLKPORT_I_ACLK 7 +#define CAM_CSI1_3_IPCLKPORT_I_ACLK 8 +#define CAM_CSI2_0_IPCLKPORT_I_ACLK 9 +#define CAM_CSI2_1_IPCLKPORT_I_ACLK 10 +#define CAM_CSI2_2_IPCLKPORT_I_ACLK 11 +#define CAM_CSI2_3_IPCLKPORT_I_ACLK 12 +#define CAM_CSI_NR_CLK 13 + +#endif /*_DT_BINDINGS_CLOCK_FSD_H */ -- cgit v1.2.3 From be6ec5b7026620b931e0fa9287d24ad2cd2ab9b6 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 26 Jan 2022 10:25:55 +0000 Subject: net: xpcs: add support for retrieving supported interface modes Add a function to the xpcs driver to retrieve the supported PHY interface modes, which can be used by drivers to fill in phylink's supported_interfaces mask. We validate the interface bit index to ensure that it fits within the bitmap as xpcs lists PHY_INTERFACE_MODE_MAX in an entry. Tested-by: Wong Vee Khee # Intel EHL Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/pcs/pcs-xpcs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index add077a81b21..3126a4924d92 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -33,6 +33,7 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, unsigned int mode); void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported, struct phylink_link_state *state); +void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev, -- cgit v1.2.3 From fe70fb74b56407c1a5be347258082f8abbe7956d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 26 Jan 2022 10:26:10 +0000 Subject: net: stmmac/xpcs: convert to pcs_validate() stmmac explicitly calls the xpcs driver to validate the ethtool linkmodes. This is no longer necessary as phylink now supports validation through a PCS method. Convert both drivers to use this new mechanism. Tested-by: Wong Vee Khee # Intel EHL Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/pcs/pcs-xpcs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 3126a4924d92..266eb26fb029 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -31,8 +31,6 @@ void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode, phy_interface_t interface, int speed, int duplex); int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, unsigned int mode); -void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported, - struct phylink_link_state *state); void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); -- cgit v1.2.3 From f383b0770612838e78986231710c0a3afee4db42 Mon Sep 17 00:00:00 2001 From: Sergio Paracuellos Date: Mon, 10 Jan 2022 12:49:27 +0100 Subject: dt-bindings: reset: add dt binding header for Mediatek MT7621 resets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add dt binding header for resets lines in Mediatek MT7621 SoCs. Acked-by: Rob Herring Tested-by: Arınç ÜNAL Signed-off-by: Sergio Paracuellos Link: https://lore.kernel.org/r/20220110114930.1406665-2-sergio.paracuellos@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/dt-bindings/reset/mt7621-reset.h | 37 ++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 include/dt-bindings/reset/mt7621-reset.h (limited to 'include') diff --git a/include/dt-bindings/reset/mt7621-reset.h b/include/dt-bindings/reset/mt7621-reset.h new file mode 100644 index 000000000000..7572c6b41453 --- /dev/null +++ b/include/dt-bindings/reset/mt7621-reset.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2021 Sergio Paracuellos + * Author: Sergio Paracuellos + */ + +#ifndef DT_BINDING_MT7621_RESET_H +#define DT_BINDING_MT7621_RESET_H + +#define MT7621_RST_SYS 0 +#define MT7621_RST_MCM 2 +#define MT7621_RST_HSDMA 5 +#define MT7621_RST_FE 6 +#define MT7621_RST_SPDIFTX 7 +#define MT7621_RST_TIMER 8 +#define MT7621_RST_INT 9 +#define MT7621_RST_MC 10 +#define MT7621_RST_PCM 11 +#define MT7621_RST_PIO 13 +#define MT7621_RST_GDMA 14 +#define MT7621_RST_NFI 15 +#define MT7621_RST_I2C 16 +#define MT7621_RST_I2S 17 +#define MT7621_RST_SPI 18 +#define MT7621_RST_UART1 19 +#define MT7621_RST_UART2 20 +#define MT7621_RST_UART3 21 +#define MT7621_RST_ETH 23 +#define MT7621_RST_PCIE0 24 +#define MT7621_RST_PCIE1 25 +#define MT7621_RST_PCIE2 26 +#define MT7621_RST_AUX_STCK 28 +#define MT7621_RST_CRYPTO 29 +#define MT7621_RST_SDXC 30 +#define MT7621_RST_PPE 31 + +#endif /* DT_BINDING_MT7621_RESET_H */ -- cgit v1.2.3 From 4e2a44c1408b6a6a46122704511234f68cf012b8 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 24 Jan 2022 08:14:22 +0100 Subject: tty: add kfifo to tty_port Define a kfifo inside struct tty_port. We use DECLARE_KFIFO_PTR and let the preexisting tty_port::xmit_buf be also the buffer for the kfifo. And handle the initialization/decomissioning along with xmit_buf, i.e. in tty_port_alloc_xmit_buf() and tty_port_free_xmit_buf(), respectively. This allows for kfifo use in drivers which opt-in, while others still may use the old xmit_buf. mxser will be the first user in the next few patches. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220124071430.14907-4-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index d3ea9ed0b98e..58e9619116b7 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -2,6 +2,7 @@ #ifndef _LINUX_TTY_PORT_H #define _LINUX_TTY_PORT_H +#include #include #include #include @@ -67,6 +68,7 @@ extern const struct tty_port_client_operations tty_port_default_client_ops; * @mutex: locking, for open, shutdown and other port operations * @buf_mutex: @xmit_buf alloc lock * @xmit_buf: optional xmit buffer used by some drivers + * @xmit_fifo: optional xmit buffer used by some drivers * @close_delay: delay in jiffies to wait when closing the port * @closing_wait: delay in jiffies for output to be sent before closing * @drain_delay: set to zero if no pure time based drain is needed else set to @@ -110,6 +112,7 @@ struct tty_port { struct mutex mutex; struct mutex buf_mutex; unsigned char *xmit_buf; + DECLARE_KFIFO_PTR(xmit_fifo, unsigned char); unsigned int close_delay; unsigned int closing_wait; int drain_delay; -- cgit v1.2.3 From ebb7fb1557b1d03b906b668aa2164b51e6b7d19a Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 26 Jan 2022 09:19:20 -0800 Subject: xfs, iomap: limit individual ioend chain lengths in writeback Trond Myklebust reported soft lockups in XFS IO completion such as this: watchdog: BUG: soft lockup - CPU#12 stuck for 23s! [kworker/12:1:3106] CPU: 12 PID: 3106 Comm: kworker/12:1 Not tainted 4.18.0-305.10.2.el8_4.x86_64 #1 Workqueue: xfs-conv/md127 xfs_end_io [xfs] RIP: 0010:_raw_spin_unlock_irqrestore+0x11/0x20 Call Trace: wake_up_page_bit+0x8a/0x110 iomap_finish_ioend+0xd7/0x1c0 iomap_finish_ioends+0x7f/0xb0 xfs_end_ioend+0x6b/0x100 [xfs] xfs_end_io+0xb9/0xe0 [xfs] process_one_work+0x1a7/0x360 worker_thread+0x1fa/0x390 kthread+0x116/0x130 ret_from_fork+0x35/0x40 Ioends are processed as an atomic completion unit when all the chained bios in the ioend have completed their IO. Logically contiguous ioends can also be merged and completed as a single, larger unit. Both of these things can be problematic as both the bio chains per ioend and the size of the merged ioends processed as a single completion are both unbound. If we have a large sequential dirty region in the page cache, write_cache_pages() will keep feeding us sequential pages and we will keep mapping them into ioends and bios until we get a dirty page at a non-sequential file offset. These large sequential runs can will result in bio and ioend chaining to optimise the io patterns. The pages iunder writeback are pinned within these chains until the submission chaining is broken, allowing the entire chain to be completed. This can result in huge chains being processed in IO completion context. We get deep bio chaining if we have large contiguous physical extents. We will keep adding pages to the current bio until it is full, then we'll chain a new bio to keep adding pages for writeback. Hence we can build bio chains that map millions of pages and tens of gigabytes of RAM if the page cache contains big enough contiguous dirty file regions. This long bio chain pins those pages until the final bio in the chain completes and the ioend can iterate all the chained bios and complete them. OTOH, if we have a physically fragmented file, we end up submitting one ioend per physical fragment that each have a small bio or bio chain attached to them. We do not chain these at IO submission time, but instead we chain them at completion time based on file offset via iomap_ioend_try_merge(). Hence we can end up with unbound ioend chains being built via completion merging. XFS can then do COW remapping or unwritten extent conversion on that merged chain, which involves walking an extent fragment at a time and running a transaction to modify the physical extent information. IOWs, we merge all the discontiguous ioends together into a contiguous file range, only to then process them individually as discontiguous extents. This extent manipulation is computationally expensive and can run in a tight loop, so merging logically contiguous but physically discontigous ioends gains us nothing except for hiding the fact the fact we broke the ioends up into individual physical extents at submission and then need to loop over those individual physical extents at completion. Hence we need to have mechanisms to limit ioend sizes and to break up completion processing of large merged ioend chains: 1. bio chains per ioend need to be bound in length. Pure overwrites go straight to iomap_finish_ioend() in softirq context with the exact bio chain attached to the ioend by submission. Hence the only way to prevent long holdoffs here is to bound ioend submission sizes because we can't reschedule in softirq context. 2. iomap_finish_ioends() has to handle unbound merged ioend chains correctly. This relies on any one call to iomap_finish_ioend() being bound in runtime so that cond_resched() can be issued regularly as the long ioend chain is processed. i.e. this relies on mechanism #1 to limit individual ioend sizes to work correctly. 3. filesystems have to loop over the merged ioends to process physical extent manipulations. This means they can loop internally, and so we break merging at physical extent boundaries so the filesystem can easily insert reschedule points between individual extent manipulations. Signed-off-by: Dave Chinner Reported-and-tested-by: Trond Myklebust Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index b55bd49e55f5..97a3a2edb585 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -263,9 +263,11 @@ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ u16 io_type; u16 io_flags; /* IOMAP_F_* */ + u32 io_folios; /* folios added to ioend */ struct inode *io_inode; /* file being written to */ size_t io_size; /* size of the extent */ loff_t io_offset; /* offset in the file */ + sector_t io_sector; /* start sector of ioend */ struct bio *io_bio; /* bio being built */ struct bio io_inline_bio; /* MUST BE LAST! */ }; -- cgit v1.2.3 From 597568e8df046ebf349c706b281a711297ab20fb Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 25 Jan 2022 13:50:07 +0800 Subject: misc: rtsx: Rework runtime power management flow Commit 5b4258f6721f ("misc: rtsx: rts5249 support runtime PM") uses "rtd3_work" and "idle_work" to manage it's own runtime PM state machine. When its child device, rtsx_pci_sdmmc, uses runtime PM refcount correctly, all the additional works can be managed by generic runtime PM helpers. So consolidate "idle_work" and "rtd3_work" into generic runtime idle callback and runtime suspend callback, respectively. Fixes: 5b4258f6721f ("misc: rtsx: rts5249 support runtime PM") Cc: Ricky WU Tested-by: Ricky WU Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20220125055010.1866563-2-kai.heng.feng@canonical.com Signed-off-by: Greg Kroah-Hartman --- include/linux/rtsx_pci.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 4ab7bfc675f1..89b7d34e25b6 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -1201,8 +1201,6 @@ struct rtsx_pcr { unsigned int card_exist; struct delayed_work carddet_work; - struct delayed_work idle_work; - struct delayed_work rtd3_work; spinlock_t lock; struct mutex pcr_mutex; @@ -1212,7 +1210,6 @@ struct rtsx_pcr { unsigned int cur_clock; bool remove_pci; bool msi_en; - bool is_runtime_suspended; #define EXTRA_CAPS_SD_SDR50 (1 << 0) #define EXTRA_CAPS_SD_SDR104 (1 << 1) -- cgit v1.2.3 From 71732e24609b5a7af96efc89aebde55f76c1de3e Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 25 Jan 2022 13:50:09 +0800 Subject: misc: rtsx: Quiesce rts5249 on system suspend Set more registers in force_power_down callback to avoid S3 wakeup from hotplugging cards. This is originally written by Ricky WU. Link: https://lore.kernel.org/lkml/c4525b4738f94483b9b8f8571fc80646@realtek.com/ Cc: Ricky WU Tested-by: Ricky WU Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20220125055010.1866563-4-kai.heng.feng@canonical.com Signed-off-by: Greg Kroah-Hartman --- include/linux/rtsx_pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 89b7d34e25b6..3d780b44e678 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -1095,7 +1095,7 @@ struct pcr_ops { unsigned int (*cd_deglitch)(struct rtsx_pcr *pcr); int (*conv_clk_and_div_n)(int clk, int dir); void (*fetch_vendor_settings)(struct rtsx_pcr *pcr); - void (*force_power_down)(struct rtsx_pcr *pcr, u8 pm_state); + void (*force_power_down)(struct rtsx_pcr *pcr, u8 pm_state, bool runtime); void (*stop_cmd)(struct rtsx_pcr *pcr); void (*set_aspm)(struct rtsx_pcr *pcr, bool enable); -- cgit v1.2.3 From fbb8295248e1d6f576d444309fcf79356008eac1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Jan 2022 10:07:14 -0800 Subject: tcp: allocate tcp_death_row outside of struct netns_ipv4 I forgot tcp had per netns tracking of timewait sockets, and their sysctl to change the limit. After 0dad4087a86a ("tcp/dccp: get rid of inet_twsk_purge()"), whole struct net can be freed before last tw socket is freed. We need to allocate a separate struct inet_timewait_death_row object per netns. tw_count becomes a refcount and gains associated debugging infrastructure. BUG: KASAN: use-after-free in inet_twsk_kill+0x358/0x3c0 net/ipv4/inet_timewait_sock.c:46 Read of size 8 at addr ffff88807d5f9f40 by task kworker/1:7/3690 CPU: 1 PID: 3690 Comm: kworker/1:7 Not tainted 5.16.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events pwq_unbound_release_workfn Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0x8d/0x336 mm/kasan/report.c:255 __kasan_report mm/kasan/report.c:442 [inline] kasan_report.cold+0x83/0xdf mm/kasan/report.c:459 inet_twsk_kill+0x358/0x3c0 net/ipv4/inet_timewait_sock.c:46 call_timer_fn+0x1a5/0x6b0 kernel/time/timer.c:1421 expire_timers kernel/time/timer.c:1466 [inline] __run_timers.part.0+0x67c/0xa30 kernel/time/timer.c:1734 __run_timers kernel/time/timer.c:1715 [inline] run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1747 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu+0x123/0x180 kernel/softirq.c:637 irq_exit_rcu+0x5/0x20 kernel/softirq.c:649 sysvec_apic_timer_interrupt+0x93/0xc0 arch/x86/kernel/apic/apic.c:1097 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638 RIP: 0010:lockdep_unregister_key+0x1c9/0x250 kernel/locking/lockdep.c:6328 Code: 00 00 00 48 89 ee e8 46 fd ff ff 4c 89 f7 e8 5e c9 ff ff e8 09 cc ff ff 9c 58 f6 c4 02 75 26 41 f7 c4 00 02 00 00 74 01 fb 5b <5d> 41 5c 41 5d 41 5e 41 5f e9 19 4a 08 00 0f 0b 5b 5d 41 5c 41 5d RSP: 0018:ffffc90004077cb8 EFLAGS: 00000206 RAX: 0000000000000046 RBX: ffff88807b61b498 RCX: 0000000000000001 RDX: dffffc0000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff888077027128 R08: 0000000000000001 R09: ffffffff8f1ea4fc R10: fffffbfff1ff93ee R11: 000000000000af1e R12: 0000000000000246 R13: 0000000000000000 R14: ffffffff8ffc89b8 R15: ffffffff90157fb0 wq_unregister_lockdep kernel/workqueue.c:3508 [inline] pwq_unbound_release_workfn+0x254/0x340 kernel/workqueue.c:3746 process_one_work+0x9ac/0x1650 kernel/workqueue.c:2307 worker_thread+0x657/0x1110 kernel/workqueue.c:2454 kthread+0x2e9/0x3a0 kernel/kthread.c:377 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Allocated by task 3635: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:437 [inline] __kasan_slab_alloc+0x90/0xc0 mm/kasan/common.c:470 kasan_slab_alloc include/linux/kasan.h:260 [inline] slab_post_alloc_hook mm/slab.h:732 [inline] slab_alloc_node mm/slub.c:3230 [inline] slab_alloc mm/slub.c:3238 [inline] kmem_cache_alloc+0x202/0x3a0 mm/slub.c:3243 kmem_cache_zalloc include/linux/slab.h:705 [inline] net_alloc net/core/net_namespace.c:407 [inline] copy_net_ns+0x125/0x760 net/core/net_namespace.c:462 create_new_namespaces+0x3f6/0xb20 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xc1/0x1f0 kernel/nsproxy.c:226 ksys_unshare+0x445/0x920 kernel/fork.c:3048 __do_sys_unshare kernel/fork.c:3119 [inline] __se_sys_unshare kernel/fork.c:3117 [inline] __x64_sys_unshare+0x2d/0x40 kernel/fork.c:3117 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff88807d5f9a80 which belongs to the cache net_namespace of size 6528 The buggy address is located 1216 bytes inside of 6528-byte region [ffff88807d5f9a80, ffff88807d5fb400) The buggy address belongs to the page: page:ffffea0001f57e00 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88807d5f9a80 pfn:0x7d5f8 head:ffffea0001f57e00 order:3 compound_mapcount:0 compound_pincount:0 memcg:ffff888070023001 flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000010200 ffff888010dd4f48 ffffea0001404e08 ffff8880118fd000 raw: ffff88807d5f9a80 0000000000040002 00000001ffffffff ffff888070023001 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 3634, ts 119694798460, free_ts 119693556950 prep_new_page mm/page_alloc.c:2434 [inline] get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4165 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5389 alloc_pages+0x1aa/0x310 mm/mempolicy.c:2271 alloc_slab_page mm/slub.c:1799 [inline] allocate_slab mm/slub.c:1944 [inline] new_slab+0x28a/0x3b0 mm/slub.c:2004 ___slab_alloc+0x87c/0xe90 mm/slub.c:3018 __slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3105 slab_alloc_node mm/slub.c:3196 [inline] slab_alloc mm/slub.c:3238 [inline] kmem_cache_alloc+0x35c/0x3a0 mm/slub.c:3243 kmem_cache_zalloc include/linux/slab.h:705 [inline] net_alloc net/core/net_namespace.c:407 [inline] copy_net_ns+0x125/0x760 net/core/net_namespace.c:462 create_new_namespaces+0x3f6/0xb20 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xc1/0x1f0 kernel/nsproxy.c:226 ksys_unshare+0x445/0x920 kernel/fork.c:3048 __do_sys_unshare kernel/fork.c:3119 [inline] __se_sys_unshare kernel/fork.c:3117 [inline] __x64_sys_unshare+0x2d/0x40 kernel/fork.c:3117 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1352 [inline] free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1404 free_unref_page_prepare mm/page_alloc.c:3325 [inline] free_unref_page+0x19/0x690 mm/page_alloc.c:3404 skb_free_head net/core/skbuff.c:655 [inline] skb_release_data+0x65d/0x790 net/core/skbuff.c:677 skb_release_all net/core/skbuff.c:742 [inline] __kfree_skb net/core/skbuff.c:756 [inline] consume_skb net/core/skbuff.c:914 [inline] consume_skb+0xc2/0x160 net/core/skbuff.c:908 skb_free_datagram+0x1b/0x1f0 net/core/datagram.c:325 netlink_recvmsg+0x636/0xea0 net/netlink/af_netlink.c:1998 sock_recvmsg_nosec net/socket.c:948 [inline] sock_recvmsg net/socket.c:966 [inline] sock_recvmsg net/socket.c:962 [inline] ____sys_recvmsg+0x2c4/0x600 net/socket.c:2632 ___sys_recvmsg+0x127/0x200 net/socket.c:2674 __sys_recvmsg+0xe2/0x1a0 net/socket.c:2704 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Memory state around the buggy address: ffff88807d5f9e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88807d5f9e80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88807d5f9f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88807d5f9f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88807d5fa000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 0dad4087a86a ("tcp/dccp: get rid of inet_twsk_purge()") Signed-off-by: Eric Dumazet Reported-by: syzbot Reported-by: Paolo Abeni Tested-by: Paolo Abeni Link: https://lore.kernel.org/r/20220126180714.845362-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 22b4c6df1d2b..94568e022001 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -31,18 +31,16 @@ struct ping_group_range { struct inet_hashinfo; struct inet_timewait_death_row { - atomic_t tw_count; - char tw_pad[L1_CACHE_BYTES - sizeof(atomic_t)]; + refcount_t tw_refcount; - struct inet_hashinfo *hashinfo; + struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; int sysctl_max_tw_buckets; }; struct tcp_fastopen_context; struct netns_ipv4 { - /* Please keep tcp_death_row at first field in netns_ipv4 */ - struct inet_timewait_death_row tcp_death_row ____cacheline_aligned_in_smp; + struct inet_timewait_death_row *tcp_death_row; #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; -- cgit v1.2.3 From 8033c6c2fed235b3d571b5a5ede302b752bc5c7d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Jan 2022 10:54:12 -0800 Subject: bpf: remove unused static inlines Remove two dead stubs, sk_msg_clear_meta() was never used, use of xskq_cons_is_full() got replaced by xsk_tx_writeable() in v5.10. Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20220126185412.2776254-1-kuba@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 10 ---------- include/linux/skmsg.h | 5 ----- 2 files changed, 15 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 394305a5e02f..2344f793c4dc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1875,11 +1875,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } -static inline bool dev_map_can_have_prog(struct bpf_map *map) -{ - return false; -} - static inline void __dev_flush(void) { } @@ -1943,11 +1938,6 @@ static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, return -EOPNOTSUPP; } -static inline bool cpu_map_prog_allowed(struct bpf_map *map) -{ - return false; -} - static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) { diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 18a717fe62eb..ddde5f620901 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -171,11 +171,6 @@ static inline u32 sk_msg_iter_dist(u32 start, u32 end) #define sk_msg_iter_next(msg, which) \ sk_msg_iter_var_next(msg->sg.which) -static inline void sk_msg_clear_meta(struct sk_msg *msg) -{ - memset(&msg->sg, 0, offsetofend(struct sk_msg_sg, copy)); -} - static inline void sk_msg_init(struct sk_msg *msg) { BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != NR_MSG_FRAG_IDS); -- cgit v1.2.3 From a6d95c5a628a09be129f25d5663a7e9db8261f51 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Wed, 26 Jan 2022 16:00:18 +0100 Subject: Revert "xfrm: xfrm_state_mtu should return at least 1280 for ipv6" This reverts commit b515d2637276a3810d6595e10ab02c13bfd0b63a. Commit b515d2637276a3810d6595e10ab02c13bfd0b63a ("xfrm: xfrm_state_mtu should return at least 1280 for ipv6") in v5.14 breaks the TCP MSS calculation in ipsec transport mode, resulting complete stalls of TCP connections. This happens when the (P)MTU is 1280 or slighly larger. The desired formula for the MSS is: MSS = (MTU - ESP_overhead) - IP header - TCP header However, the above commit clamps the (MTU - ESP_overhead) to a minimum of 1280, turning the formula into MSS = max(MTU - ESP overhead, 1280) - IP header - TCP header With the (P)MTU near 1280, the calculated MSS is too large and the resulting TCP packets never make it to the destination because they are over the actual PMTU. The above commit also causes suboptimal double fragmentation in xfrm tunnel mode, as described in https://lore.kernel.org/netdev/20210429202529.codhwpc7w6kbudug@dwarf.suse.cz/ The original problem the above commit was trying to fix is now fixed by commit 6596a0229541270fb8d38d989f91b78838e5e9da ("xfrm: fix MTU regression"). Signed-off-by: Jiri Bohac Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 743dd1da506e..76aa6f11a540 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1568,7 +1568,6 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); int xfrm_init_replay(struct xfrm_state *x); -u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu); u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); int xfrm_init_state(struct xfrm_state *x); -- cgit v1.2.3 From ec2444530612a886b406e2830d7f314d1a07d4bb Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 19 Jan 2022 14:39:39 -0800 Subject: psi: Fix "no previous prototype" warnings when CONFIG_CGROUPS=n When CONFIG_CGROUPS is disabled psi code generates the following warnings: kernel/sched/psi.c:1112:21: warning: no previous prototype for 'psi_trigger_create' [-Wmissing-prototypes] 1112 | struct psi_trigger *psi_trigger_create(struct psi_group *group, | ^~~~~~~~~~~~~~~~~~ kernel/sched/psi.c:1182:6: warning: no previous prototype for 'psi_trigger_destroy' [-Wmissing-prototypes] 1182 | void psi_trigger_destroy(struct psi_trigger *t) | ^~~~~~~~~~~~~~~~~~~ kernel/sched/psi.c:1249:10: warning: no previous prototype for 'psi_trigger_poll' [-Wmissing-prototypes] 1249 | __poll_t psi_trigger_poll(void **trigger_ptr, | ^~~~~~~~~~~~~~~~ Change declarations of these functions in the header to provide the prototypes even when they are unused. Fixes: 0e94682b73bf ("psi: introduce psi monitor") Reported-by: kernel test robot Signed-off-by: Suren Baghdasaryan Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220119223940.787748-2-surenb@google.com --- include/linux/psi.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/psi.h b/include/linux/psi.h index a70ca833c6d7..827970278d62 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -25,18 +25,17 @@ void psi_memstall_enter(unsigned long *flags); void psi_memstall_leave(unsigned long *flags); int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); - -#ifdef CONFIG_CGROUPS -int psi_cgroup_alloc(struct cgroup *cgrp); -void psi_cgroup_free(struct cgroup *cgrp); -void cgroup_move_task(struct task_struct *p, struct css_set *to); - struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res); void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait); + +#ifdef CONFIG_CGROUPS +int psi_cgroup_alloc(struct cgroup *cgrp); +void psi_cgroup_free(struct cgroup *cgrp); +void cgroup_move_task(struct task_struct *p, struct css_set *to); #endif #else /* CONFIG_PSI */ -- cgit v1.2.3 From 2e9589ff809e9232f689acd51da73390e135146a Mon Sep 17 00:00:00 2001 From: xu xin Date: Wed, 26 Jan 2022 07:10:58 +0000 Subject: ipv4: Namespaceify min_adv_mss sysctl knob Different netns has different requirement on the setting of min_adv_mss sysctl which the advertised MSS will be never lower than. Enable min_adv_mss to be configured per network namespace. Signed-off-by: xu xin Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 94568e022001..f0687867b5cd 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -83,6 +83,7 @@ struct netns_ipv4 { u32 ip_rt_min_pmtu; int ip_rt_mtu_expires; + int ip_rt_min_advmss; struct local_ports ip_local_ports; -- cgit v1.2.3 From bd5daba2d02479729526d35de85807aadf6fba20 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Jan 2022 11:10:55 -0800 Subject: mii: remove mii_lpa_to_linkmode_lpa_sgmii() The only caller of mii_lpa_to_linkmode_lpa_sgmii() disappeared in v5.10. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/mii.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/mii.h b/include/linux/mii.h index 12ea29e04293..b8a1a17a87dd 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -387,23 +387,6 @@ mii_lpa_mod_linkmode_lpa_sgmii(unsigned long *lp_advertising, u32 lpa) speed_duplex == LPA_SGMII_10FULL); } -/** - * mii_lpa_to_linkmode_adv_sgmii - * @advertising: pointer to destination link mode. - * @lpa: value of the MII_LPA register - * - * A small helper function that translates MII_ADVERTISE bits - * to linkmode advertisement settings when in SGMII mode. - * Clears the old value of advertising. - */ -static inline void mii_lpa_to_linkmode_lpa_sgmii(unsigned long *lp_advertising, - u32 lpa) -{ - linkmode_zero(lp_advertising); - - mii_lpa_mod_linkmode_lpa_sgmii(lp_advertising, lpa); -} - /** * mii_adv_mod_linkmode_adv_t * @advertising:pointer to destination link mode. -- cgit v1.2.3 From b1755400b4be33dbd286272b153579631be2e2ca Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Jan 2022 11:10:57 -0800 Subject: net: remove net_invalid_timestamp() No callers since v3.15. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8131d0de7559..60bd2347708c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3899,11 +3899,6 @@ static inline ktime_t net_timedelta(ktime_t t) return ktime_sub(ktime_get_real(), t); } -static inline ktime_t net_invalid_timestamp(void) -{ - return 0; -} - static inline u8 skb_metadata_len(const struct sk_buff *skb) { return skb_shinfo(skb)->meta_len; -- cgit v1.2.3 From 08dfa5a19e1f4344ce5d3a5eed4c5529adafe0dc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Jan 2022 11:10:58 -0800 Subject: net: remove linkmode_change_bit() No callers since v5.7, the initial use case seems pretty esoteric so removing this should not harm the completeness of the API. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/linkmode.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index f8397f300fcd..15e0e0209da4 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -66,11 +66,6 @@ static inline void linkmode_mod_bit(int nr, volatile unsigned long *addr, linkmode_clear_bit(nr, addr); } -static inline void linkmode_change_bit(int nr, volatile unsigned long *addr) -{ - __change_bit(nr, addr); -} - static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr) { return test_bit(nr, addr); -- cgit v1.2.3 From 8b0fdcdc3a7d44aff907f0103f5ffb86b12bfe71 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Jan 2022 11:10:59 -0800 Subject: net: remove bond_slave_has_mac_rcu() No caller since v3.16. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/bonding.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/net/bonding.h b/include/net/bonding.h index 83cfd2d70247..7dead855a72d 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -698,20 +698,6 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond, return NULL; } -/* Caller must hold rcu_read_lock() for read */ -static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond, - const u8 *mac) -{ - struct list_head *iter; - struct slave *tmp; - - bond_for_each_slave_rcu(bond, tmp, iter) - if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) - return tmp; - - return NULL; -} - /* Caller must hold rcu_read_lock() for read */ static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac) { -- cgit v1.2.3 From 560e08eda7969c3ef0639ab05f718be03a49d387 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Jan 2022 11:11:00 -0800 Subject: net: ax25: remove route refcount Nothing takes the refcount since v4.9. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/ax25.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/net/ax25.h b/include/net/ax25.h index 526e49589197..cb628c5d7c5b 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -187,18 +187,12 @@ typedef struct { typedef struct ax25_route { struct ax25_route *next; - refcount_t refcount; ax25_address callsign; struct net_device *dev; ax25_digi *digipeat; char ip_mode; } ax25_route; -static inline void ax25_hold_route(ax25_route *ax25_rt) -{ - refcount_inc(&ax25_rt->refcount); -} - void __ax25_put_route(ax25_route *ax25_rt); extern rwlock_t ax25_route_lock; @@ -213,12 +207,6 @@ static inline void ax25_route_lock_unuse(void) read_unlock(&ax25_route_lock); } -static inline void ax25_put_route(ax25_route *ax25_rt) -{ - if (refcount_dec_and_test(&ax25_rt->refcount)) - __ax25_put_route(ax25_rt); -} - typedef struct { char slave; /* slave_mode? */ struct timer_list slave_timer; /* timeout timer */ -- cgit v1.2.3 From 8b2d546e23bb3588f897089368b5f09e49f7762d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Jan 2022 11:11:02 -0800 Subject: ipv6: remove inet6_rsk() and tcp_twsk_ipv6only() The stubs under !CONFIG_IPV6 were missed when real functions got deleted ca. v3.13. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ipv6.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a59d25f19385..1e0f8a31f3de 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -371,19 +371,12 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) return NULL; } -static inline struct inet6_request_sock * - inet6_rsk(const struct request_sock *rsk) -{ - return NULL; -} - static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return NULL; } #define inet6_rcv_saddr(__sk) NULL -#define tcp_twsk_ipv6only(__sk) 0 #define inet_v6_ipv6only(__sk) 0 #endif /* IS_ENABLED(CONFIG_IPV6) */ #endif /* _IPV6_H */ -- cgit v1.2.3 From cc81df835c25b108248bad24021a21e77cbb84ac Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Jan 2022 11:11:04 -0800 Subject: udp: remove inner_udp_hdr() Not used since added in v3.8. Signed-off-by: Jakub Kicinski Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/udp.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index ae66dadd8543..254a2654400f 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -23,11 +23,6 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb) return (struct udphdr *)skb_transport_header(skb); } -static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb) -{ - return (struct udphdr *)skb_inner_transport_header(skb); -} - #define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256) static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) -- cgit v1.2.3 From 937fca918aacf54f1c9cb00d16d4e999a0569be0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Jan 2022 11:11:05 -0800 Subject: udplite: remove udplite_csum_outgoing() Not used since v4.0. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/udplite.h | 43 ------------------------------------------- 1 file changed, 43 deletions(-) (limited to 'include') diff --git a/include/net/udplite.h b/include/net/udplite.h index 9185e45b997f..a3c53110d30b 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -70,49 +70,6 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) return 0; } -/* Slow-path computation of checksum. Socket is locked. */ -static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) -{ - const struct udp_sock *up = udp_sk(skb->sk); - int cscov = up->len; - __wsum csum = 0; - - if (up->pcflag & UDPLITE_SEND_CC) { - /* - * Sender has set `partial coverage' option on UDP-Lite socket. - * The special case "up->pcslen == 0" signifies full coverage. - */ - if (up->pcslen < up->len) { - if (0 < up->pcslen) - cscov = up->pcslen; - udp_hdr(skb)->len = htons(up->pcslen); - } - /* - * NOTE: Causes for the error case `up->pcslen > up->len': - * (i) Application error (will not be penalized). - * (ii) Payload too big for send buffer: data is split - * into several packets, each with its own header. - * In this case (e.g. last segment), coverage may - * exceed packet length. - * Since packets with coverage length > packet length are - * illegal, we fall back to the defaults here. - */ - } - - skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ - - skb_queue_walk(&sk->sk_write_queue, skb) { - const int off = skb_transport_offset(skb); - const int len = skb->len - off; - - csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum); - - if ((cscov -= len) <= 0) - break; - } - return csum; -} - /* Fast-path computation of checksum. Socket may not be locked. */ static inline __wsum udplite_csum(struct sk_buff *skb) { -- cgit v1.2.3 From d59a67f2f3f39012271ed3d11c338706a011c5c2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Jan 2022 11:11:06 -0800 Subject: netlink: remove nl_set_extack_cookie_u32() Not used since v5.10. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/netlink.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 1ec631838af9..bda1c385cffb 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -135,15 +135,6 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, extack->cookie_len = sizeof(cookie); } -static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack, - u32 cookie) -{ - if (!extack) - return; - memcpy(extack->cookie, &cookie, sizeof(cookie)); - extack->cookie_len = sizeof(cookie); -} - void netlink_kernel_release(struct sock *sk); int __netlink_change_ngroups(struct sock *sk, unsigned int groups); int netlink_change_ngroups(struct sock *sk, unsigned int groups); -- cgit v1.2.3 From 98b608629746946ecc6cda70bf0fd047785a1197 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Jan 2022 11:11:07 -0800 Subject: net: sched: remove psched_tdiff_bounded() Not used since v3.9. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 9e7b21c0b3a6..44a35531952e 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -63,12 +63,6 @@ static inline psched_time_t psched_get_time(void) return PSCHED_NS2TICKS(ktime_get_ns()); } -static inline psched_tdiff_t -psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound) -{ - return min(tv1 - tv2, bound); -} - struct qdisc_watchdog { u64 last_expires; struct hrtimer timer; -- cgit v1.2.3 From a459bc9a3a68f2975ee6661fb9c86126a5636b25 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Jan 2022 11:11:08 -0800 Subject: net: sched: remove qdisc_qlen_cpu() Never used since it was added in v5.2. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 472843eedbae..9bab396c1f3b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -518,11 +518,6 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) BUILD_BUG_ON(sizeof(qcb->data) < sz); } -static inline int qdisc_qlen_cpu(const struct Qdisc *q) -{ - return this_cpu_ptr(q->cpu_qstats)->qlen; -} - static inline int qdisc_qlen(const struct Qdisc *q) { return q->q.qlen; -- cgit v1.2.3 From d1bc532e99becf104635ed4da6fefa306f452321 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 25 Jan 2022 17:04:43 +0100 Subject: i40e: xsk: Move tmp desc array from driver to pool Move desc_array from the driver to the pool. The reason behind this is that we can then reuse this array as a temporary storage for descriptors in all zero-copy drivers that use the batched interface. This will make it easier to add batching to more drivers. i40e is the only driver that has a batched Tx zero-copy implementation, so no need to touch any other driver. Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/bpf/20220125160446.78976-6-maciej.fijalkowski@intel.com --- include/net/xdp_sock_drv.h | 5 ++--- include/net/xsk_buff_pool.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 443d45951564..4aa031849668 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -13,7 +13,7 @@ void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries); bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc); -u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, u32 max); +u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max); void xsk_tx_release(struct xsk_buff_pool *pool); struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, u16 queue_id); @@ -142,8 +142,7 @@ static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, return false; } -static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, - u32 max) +static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max) { return 0; } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index ddeefc4a1040..5554ee75e7da 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -60,6 +60,7 @@ struct xsk_buff_pool { */ dma_addr_t *dma_pages; struct xdp_buff_xsk *heads; + struct xdp_desc *tx_descs; u64 chunk_mask; u64 addrs_cnt; u32 free_list_cnt; -- cgit v1.2.3 From 46531a30364bd483bfa1b041c15d42a196e77e93 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Jan 2022 14:09:13 +0000 Subject: cgroup/bpf: fast path skb BPF filtering Even though there is a static key protecting from overhead from cgroup-bpf skb filtering when there is nothing attached, in many cases it's not enough as registering a filter for one type will ruin the fast path for all others. It's observed in production servers I've looked at but also in laptops, where registration is done during init by systemd or something else. Add a per-socket fast path check guarding from such overhead. This affects both receive and transmit paths of TCP, UDP and other protocols. It showed ~1% tx/s improvement in small payload UDP send benchmarks using a real NIC and in a server environment and the number jumps to 2-3% for preemtible kernels. Reviewed-by: Stanislav Fomichev Signed-off-by: Pavel Begunkov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/r/d8c58857113185a764927a46f4b5a058d36d3ec3.1643292455.git.asml.silence@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf-cgroup.h | 24 ++++++++++++++++++++---- include/linux/bpf.h | 13 +++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index b525d8cdc25b..88a51b242adc 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -8,6 +8,7 @@ #include #include #include +#include #include struct sock; @@ -165,11 +166,23 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, void *value, u64 flags); +/* Opportunistic check to see whether we have any BPF program attached*/ +static inline bool cgroup_bpf_sock_enabled(struct sock *sk, + enum cgroup_bpf_attach_type type) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_prog_array *array; + + array = rcu_access_pointer(cgrp->bpf.effective[type]); + return array != &bpf_empty_prog_array.hdr; +} + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled(CGROUP_INET_INGRESS)) \ + if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && \ + cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ CGROUP_INET_INGRESS); \ \ @@ -181,7 +194,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \ typeof(sk) __sk = sk_to_full_sk(sk); \ - if (sk_fullsock(__sk)) \ + if (sk_fullsock(__sk) && \ + cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ CGROUP_INET_EGRESS); \ } \ @@ -347,7 +361,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, kernel_optval) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT)) \ + if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT) && \ + cgroup_bpf_sock_enabled(sock, CGROUP_SETSOCKOPT)) \ __ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \ optname, optval, \ optlen, \ @@ -367,7 +382,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, max_optlen, retval) \ ({ \ int __ret = retval; \ - if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \ + if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT) && \ + cgroup_bpf_sock_enabled(sock, CGROUP_GETSOCKOPT)) \ if (!(sock)->sk_prot->bpf_bypass_getsockopt || \ !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \ tcp_bpf_bypass_getsockopt, \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2344f793c4dc..e3b82ce51445 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1233,6 +1233,19 @@ struct bpf_prog_array { struct bpf_prog_array_item items[]; }; +struct bpf_empty_prog_array { + struct bpf_prog_array hdr; + struct bpf_prog *null_prog; +}; + +/* to avoid allocating empty bpf_prog_array for cgroups that + * don't have bpf program attached use one global 'bpf_empty_prog_array' + * It will not be modified the caller of bpf_prog_array_alloc() + * (since caller requested prog_cnt == 0) + * that pointer should be 'freed' by bpf_prog_array_free() + */ +extern struct bpf_empty_prog_array bpf_empty_prog_array; + struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); void bpf_prog_array_free(struct bpf_prog_array *progs); int bpf_prog_array_length(struct bpf_prog_array *progs); -- cgit v1.2.3 From 7472d5a642c94a0ee1882ff3038de72ffe803a01 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 27 Jan 2022 07:46:00 -0800 Subject: compiler_types: define __user as __attribute__((btf_type_tag("user"))) The __user attribute is currently mainly used by sparse for type checking. The attribute indicates whether a memory access is in user memory address space or not. Such information is important during tracing kernel internal functions or data structures as accessing user memory often has different mechanisms compared to accessing kernel memory. For example, the perf-probe needs explicit command line specification to indicate a particular argument or string in user-space memory ([1], [2], [3]). Currently, vmlinux BTF is available in kernel with many distributions. If __user attribute information is available in vmlinux BTF, the explicit user memory access information from users will not be necessary as the kernel can figure it out by itself with vmlinux BTF. Besides the above possible use for perf/probe, another use case is for bpf verifier. Currently, for bpf BPF_PROG_TYPE_TRACING type of bpf programs, users can write direct code like p->m1->m2 and "p" could be a function parameter. Without __user information in BTF, the verifier will assume p->m1 accessing kernel memory and will generate normal loads. Let us say "p" actually tagged with __user in the source code. In such cases, p->m1 is actually accessing user memory and direct load is not right and may produce incorrect result. For such cases, bpf_probe_read_user() will be the correct way to read p->m1. To support encoding __user information in BTF, a new attribute __attribute__((btf_type_tag(""))) is implemented in clang ([4]). For example, if we have #define __user __attribute__((btf_type_tag("user"))) during kernel compilation, the attribute "user" information will be preserved in dwarf. After pahole converting dwarf to BTF, __user information will be available in vmlinux BTF. The following is an example with latest upstream clang (clang14) and pahole 1.23: [$ ~] cat test.c #define __user __attribute__((btf_type_tag("user"))) int foo(int __user *arg) { return *arg; } [$ ~] clang -O2 -g -c test.c [$ ~] pahole -JV test.o ... [1] INT int size=4 nr_bits=32 encoding=SIGNED [2] TYPE_TAG user type_id=1 [3] PTR (anon) type_id=2 [4] FUNC_PROTO (anon) return=1 args=(3 arg) [5] FUNC foo type_id=4 [$ ~] You can see for the function argument "int __user *arg", its type is described as PTR -> TYPE_TAG(user) -> INT The kernel can use this information for bpf verification or other use cases. Current btf_type_tag is only supported in clang (>= clang14) and pahole (>= 1.23). gcc support is also proposed and under development ([5]). [1] http://lkml.kernel.org/r/155789874562.26965.10836126971405890891.stgit@devnote2 [2] http://lkml.kernel.org/r/155789872187.26965.4468456816590888687.stgit@devnote2 [3] http://lkml.kernel.org/r/155789871009.26965.14167558859557329331.stgit@devnote2 [4] https://reviews.llvm.org/D111199 [5] https://lore.kernel.org/bpf/0cbeb2fb-1a18-f690-e360-24b1c90c2a91@fb.com/ Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220127154600.652613-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/compiler_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 3c1795fdb568..3f31ff400432 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -31,6 +31,9 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __kernel # ifdef STRUCTLEAK_PLUGIN # define __user __attribute__((user)) +# elif defined(CONFIG_DEBUG_INFO_BTF) && defined(CONFIG_PAHOLE_HAS_BTF_TAG) && \ + __has_attribute(btf_type_tag) +# define __user __attribute__((btf_type_tag("user"))) # else # define __user # endif -- cgit v1.2.3 From c6f1bfe89ac95dc829dcb4ed54780da134ac5fce Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 27 Jan 2022 07:46:06 -0800 Subject: bpf: reject program if a __user tagged memory accessed in kernel way BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type of bpf programs, e.g., a->b. If "a" is a pointer pointing to kernel memory, bpf verifier will allow user to write code in C like a->b and the verifier will translate it to a kernel load properly. If "a" is a pointer to user memory, it is expected that bpf developer should be bpf_probe_read_user() helper to get the value a->b. Without utilizing BTF __user tagging information, current verifier will assume that a->b is a kernel memory access and this may generate incorrect result. Now BTF contains __user information, it can check whether the pointer points to a user memory or not. If it is, the verifier can reject the program and force users to use bpf_probe_read_user() helper explicitly. In the future, we can easily extend btf_add_space for other address space tagging, for example, rcu/percpu etc. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 9 ++++++--- include/linux/btf.h | 5 +++++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e3b82ce51445..6eb0b180d33b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -332,7 +332,10 @@ enum bpf_type_flag { */ MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS), - __BPF_TYPE_LAST_FLAG = MEM_ALLOC, + /* MEM is in user address space. */ + MEM_USER = BIT(3 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_LAST_FLAG = MEM_USER, }; /* Max number of base types. */ @@ -588,7 +591,7 @@ struct bpf_verifier_ops { const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id); + u32 *next_btf_id, enum bpf_type_flag *flag); }; struct bpf_prog_offload_ops { @@ -1780,7 +1783,7 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size, int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id); + u32 *next_btf_id, enum bpf_type_flag *flag); bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, const struct btf *need_btf, u32 need_type_id); diff --git a/include/linux/btf.h b/include/linux/btf.h index b12cfe3b12bb..f6c43dd513fa 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -238,6 +238,11 @@ static inline bool btf_type_is_var(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_VAR; } +static inline bool btf_type_is_type_tag(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG; +} + /* union is only a special case of struct: * all its offsetof(member) == 0 */ -- cgit v1.2.3 From 9059b04b4108be71397941f4665d5aa79783125a Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 9 Jan 2022 21:46:34 +0200 Subject: net/mlx5: Remove unused TIR modify bitmask enums struct mlx5_ifc_modify_tir_bitmask_bits is used for the bitmask of MODIFY_TIR operations. Remove the unused bitmask enums. Signed-off-by: Tariq Toukan Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 598ac3bcc901..27145c4d6820 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -63,13 +63,6 @@ enum { MLX5_EVENT_TYPE_CODING_FPGA_QP_ERROR = 0x21 }; -enum { - MLX5_MODIFY_TIR_BITMASK_LRO = 0x0, - MLX5_MODIFY_TIR_BITMASK_INDIRECT_TABLE = 0x1, - MLX5_MODIFY_TIR_BITMASK_HASH = 0x2, - MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN = 0x3 -}; - enum { MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, -- cgit v1.2.3 From 8cda7a4f96e435be2fd074009d69521d973d7d31 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 7 Jan 2022 17:57:33 -0500 Subject: drm/amdgpu/UAPI: add new CTX OP to get/set stable pstates Add a new CTX ioctl operation to set stable pstates for profiling. When creating traces for tools like RGP or using SPM or doing performance profiling, it's required to enable a special stable profiling power state on the GPU. These profiling states set fixed clocks and disable certain other power features like powergating which may impact the results. Historically, these profiling pstates were enabled via sysfs, but this adds an interface to enable it via the CTX ioctl from the application. Since the power state is global only one application can set it at a time, so if multiple applications try and use it only the first will get it, the ioctl will return -EBUSY for others. The sysfs interface will override whatever has been set by this interface. Mesa MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/207 v2: don't default r = 0; v3: rebase on Evan's PM cleanup Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index be4f9111f478..76b580d10a52 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -206,6 +206,8 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_FREE_CTX 2 #define AMDGPU_CTX_OP_QUERY_STATE 3 #define AMDGPU_CTX_OP_QUERY_STATE2 4 +#define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 +#define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 @@ -238,10 +240,18 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_PRIORITY_HIGH 512 #define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023 +/* select a stable profiling pstate for perfmon tools */ +#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK 0xf +#define AMDGPU_CTX_STABLE_PSTATE_NONE 0 +#define AMDGPU_CTX_STABLE_PSTATE_STANDARD 1 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK 2 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 +#define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 + struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ __u32 op; - /** For future use, no flags defined so far */ + /** Flags */ __u32 flags; __u32 ctx_id; /** AMDGPU_CTX_PRIORITY_* */ @@ -262,6 +272,11 @@ union drm_amdgpu_ctx_out { /** Reset status since the last call of the ioctl. */ __u32 reset_status; } state; + + struct { + __u32 flags; + __u32 _pad; + } pstate; }; union drm_amdgpu_ctx { -- cgit v1.2.3 From f37a4cc6bb0ba08c2d9fd7d18a1da87161cbb7f9 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Jan 2022 00:36:28 +0000 Subject: udp6: pass flow in ip6_make_skb together with cork Another preparation patch. inet_cork_full already contains a field for iflow, so we can avoid passing a separate struct iflow6 into __ip6_append_data() and ip6_make_skb(), and use the flow stored in inet_cork_full. Make sure callers set cork->fl, i.e. we init it in ip6_append_data() and before calling ip6_make_skb(). Signed-off-by: Pavel Begunkov Reviewed-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3afcb128e064..5e0b56d66724 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1020,7 +1020,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - struct ipcm6_cookie *ipc6, struct flowi6 *fl6, + struct ipcm6_cookie *ipc6, struct rt6_info *rt, unsigned int flags, struct inet_cork_full *cork); -- cgit v1.2.3 From 31ed2261e88fbd1eb62fc870ef2c6b2cf2951336 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Jan 2022 00:36:31 +0000 Subject: ipv6: partially inline ipv6_fixup_options Inline a part of ipv6_fixup_options() to avoid extra overhead on function call if opt is NULL. Signed-off-by: Pavel Begunkov Reviewed-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5e0b56d66724..082f30256f59 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -437,8 +437,16 @@ struct ipv6_txoptions *ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, int newtype, struct ipv6_opt_hdr *newopt); -struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, - struct ipv6_txoptions *opt); +struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space, + struct ipv6_txoptions *opt); + +static inline struct ipv6_txoptions * +ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt) +{ + if (!opt) + return NULL; + return __ipv6_fixup_options(opt_space, opt); +} bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, const struct inet6_skb_parm *opt); -- cgit v1.2.3 From eb90686d5d10fef9cadd9c0eb30f3fee66d2b2a5 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 7 Jan 2022 20:06:55 +0800 Subject: crypto: sm3 - create SM3 stand-alone library Stand-alone implementation of the SM3 algorithm. It is designed to have as little dependencies as possible. In other cases you should generally use the hash APIs from include/crypto/hash.h. Especially when hashing large amounts of data as those APIs may be hw-accelerated. In the new SM3 stand-alone library, sm3_transform() has also been optimized, instead of simply using the code in sm3_generic. Signed-off-by: Tianjia Zhang Reviewed-by: Gilad Ben-Yossef Signed-off-by: Herbert Xu --- include/crypto/sm3.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/crypto/sm3.h b/include/crypto/sm3.h index 42ea21289ba9..b5fb6d1bf247 100644 --- a/include/crypto/sm3.h +++ b/include/crypto/sm3.h @@ -1,5 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Common values for SM3 algorithm + * + * Copyright (C) 2017 ARM Limited or its affiliates. + * Copyright (C) 2017 Gilad Ben-Yossef + * Copyright (C) 2021 Tianjia Zhang */ #ifndef _CRYPTO_SM3_H @@ -39,4 +44,31 @@ extern int crypto_sm3_final(struct shash_desc *desc, u8 *out); extern int crypto_sm3_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *hash); + +/* + * Stand-alone implementation of the SM3 algorithm. It is designed to + * have as little dependencies as possible so it can be used in the + * kexec_file purgatory. In other cases you should generally use the + * hash APIs from include/crypto/hash.h. Especially when hashing large + * amounts of data as those APIs may be hw-accelerated. + * + * For details see lib/crypto/sm3.c + */ + +static inline void sm3_init(struct sm3_state *sctx) +{ + sctx->state[0] = SM3_IVA; + sctx->state[1] = SM3_IVB; + sctx->state[2] = SM3_IVC; + sctx->state[3] = SM3_IVD; + sctx->state[4] = SM3_IVE; + sctx->state[5] = SM3_IVF; + sctx->state[6] = SM3_IVG; + sctx->state[7] = SM3_IVH; + sctx->count = 0; +} + +void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len); +void sm3_final(struct sm3_state *sctx, u8 *out); + #endif -- cgit v1.2.3 From b4784a45ea69577f21f89898c71127774a090a2a Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 7 Jan 2022 20:06:58 +0800 Subject: crypto: sm3 - make dependent on sm3 library SM3 generic library is stand-alone implementation, it is necessary making the sm3-generic implementation to depends on SM3 library. The functions crypto_sm3_*() provided by sm3_generic is no longer exported. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- include/crypto/sm3.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/crypto/sm3.h b/include/crypto/sm3.h index b5fb6d1bf247..1f021ad0533f 100644 --- a/include/crypto/sm3.h +++ b/include/crypto/sm3.h @@ -35,16 +35,6 @@ struct sm3_state { u8 buffer[SM3_BLOCK_SIZE]; }; -struct shash_desc; - -extern int crypto_sm3_update(struct shash_desc *desc, const u8 *data, - unsigned int len); - -extern int crypto_sm3_final(struct shash_desc *desc, u8 *out); - -extern int crypto_sm3_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *hash); - /* * Stand-alone implementation of the SM3 algorithm. It is designed to * have as little dependencies as possible so it can be used in the -- cgit v1.2.3 From c265a3a6690b093e542aceb4a7bf8bf6c577e42b Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 25 Jan 2022 13:25:40 +0100 Subject: net: mac802154: Explain the use of ieee802154_wake/stop_queue() It is not straightforward to the newcomer that a single skb can currently be sent at a time and that the internal process is to stop the queue when processing a frame before re-enabling it. Make this clear by documenting the ieee802154_wake/stop_queue() helpers. Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20220125122540.855604-4-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- include/net/mac802154.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index d524ffb9eb25..2c3bbc6645ba 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -464,6 +464,12 @@ void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, * ieee802154_wake_queue - wake ieee802154 queue * @hw: pointer as obtained from ieee802154_alloc_hw(). * + * Tranceivers usually have either one transmit framebuffer or one framebuffer + * for both transmitting and receiving. Hence, the core currently only handles + * one frame at a time for each phy, which means we had to stop the queue to + * avoid new skb to come during the transmission. The queue then needs to be + * woken up after the operation. + * * Drivers should use this function instead of netif_wake_queue. */ void ieee802154_wake_queue(struct ieee802154_hw *hw); @@ -472,6 +478,12 @@ void ieee802154_wake_queue(struct ieee802154_hw *hw); * ieee802154_stop_queue - stop ieee802154 queue * @hw: pointer as obtained from ieee802154_alloc_hw(). * + * Tranceivers usually have either one transmit framebuffer or one framebuffer + * for both transmitting and receiving. Hence, the core currently only handles + * one frame at a time for each phy, which means we need to tell upper layers to + * stop giving us new skbs while we are busy with the transmitted one. The queue + * must then be stopped before transmitting. + * * Drivers should use this function instead of netif_stop_queue. */ void ieee802154_stop_queue(struct ieee802154_hw *hw); -- cgit v1.2.3 From dbd171df8cc02a7c286779a048082624fc4dce8e Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 17 Jan 2022 08:05:08 +0100 Subject: media: memory: mtk-smi: Get rid of mtk_smi_larb_get/put After adding device_link between the iommu consumer and smi-larb, the pm_runtime_get(_sync) of smi-larb and smi-common will be called automatically. we can get rid of mtk_smi_larb_get/put. Signed-off-by: Yong Wu Reviewed-by: Evan Green Acked-by: Krzysztof Kozlowski Acked-by: Matthias Brugger Reviewed-by: Dafna Hirschfeld Tested-by: Frank Wunderlich # BPI-R2/MT7623 Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/soc/mediatek/smi.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h index 15e3397cec58..11f7d6b59642 100644 --- a/include/soc/mediatek/smi.h +++ b/include/soc/mediatek/smi.h @@ -19,26 +19,6 @@ struct mtk_smi_larb_iommu { unsigned char bank[32]; }; -/* - * mtk_smi_larb_get: Enable the power domain and clocks for this local arbiter. - * It also initialize some basic setting(like iommu). - * mtk_smi_larb_put: Disable the power domain and clocks for this local arbiter. - * Both should be called in non-atomic context. - * - * Returns 0 if successful, negative on failure. - */ -int mtk_smi_larb_get(struct device *larbdev); -void mtk_smi_larb_put(struct device *larbdev); - -#else - -static inline int mtk_smi_larb_get(struct device *larbdev) -{ - return 0; -} - -static inline void mtk_smi_larb_put(struct device *larbdev) { } - #endif #endif -- cgit v1.2.3 From b5b3d10ef638fcadc56609961875bf157a92aaa4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Jan 2022 08:33:49 -0800 Subject: net: mii: remove mii_lpa_mod_linkmode_lpa_sgmii() Vladimir points out that since we removed mii_lpa_to_linkmode_lpa_sgmii(), mii_lpa_mod_linkmode_lpa_sgmii() is also no longer called. Suggested-by: Vladimir Oltean Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/mii.h | 33 --------------------------------- 1 file changed, 33 deletions(-) (limited to 'include') diff --git a/include/linux/mii.h b/include/linux/mii.h index b8a1a17a87dd..5ee13083cec7 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -354,39 +354,6 @@ static inline u32 mii_adv_to_ethtool_adv_x(u32 adv) return result; } -/** - * mii_lpa_mod_linkmode_adv_sgmii - * @lp_advertising: pointer to destination link mode. - * @lpa: value of the MII_LPA register - * - * A small helper function that translates MII_LPA bits to - * linkmode advertisement settings for SGMII. - * Leaves other bits unchanged. - */ -static inline void -mii_lpa_mod_linkmode_lpa_sgmii(unsigned long *lp_advertising, u32 lpa) -{ - u32 speed_duplex = lpa & LPA_SGMII_DPX_SPD_MASK; - - linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, lp_advertising, - speed_duplex == LPA_SGMII_1000HALF); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, lp_advertising, - speed_duplex == LPA_SGMII_1000FULL); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, lp_advertising, - speed_duplex == LPA_SGMII_100HALF); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, lp_advertising, - speed_duplex == LPA_SGMII_100FULL); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, lp_advertising, - speed_duplex == LPA_SGMII_10HALF); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, lp_advertising, - speed_duplex == LPA_SGMII_10FULL); -} - /** * mii_adv_mod_linkmode_adv_t * @advertising:pointer to destination link mode. -- cgit v1.2.3 From 9690ae60429020f38e4aa2540c306f27eb021bc0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Jan 2022 10:42:59 -0800 Subject: ethtool: add header/data split indication For applications running on a mix of platforms it's useful to have a clear indication whether host's NIC supports the geometry requirements of TCP zero-copy. TCP zero-copy Rx requires data to be neatly placed into memory pages. Most NICs can't do that. This patch is adding GET support only, since the NICs I work with either always have the feature enabled or enable it whenever MTU is set to jumbo. In other words I don't need SET. But adding set should be trivial. (The only note on SET is that we will likely want the setting to be "sticky" and use 0 / `unknown` to reset it back to driver default.) Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ include/uapi/linux/ethtool_netlink.h | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 11efc45de66a..e0853f48b75e 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -70,9 +70,11 @@ enum { /** * struct kernel_ethtool_ringparam - RX/TX ring configuration * @rx_buf_len: Current length of buffers on the rx ring. + * @tcp_data_split: Scatter packet headers and data to separate buffers */ struct kernel_ethtool_ringparam { u32 rx_buf_len; + u8 tcp_data_split; }; /** diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index cca6e474a085..417d4280d7b5 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -318,6 +318,12 @@ enum { /* RINGS */ +enum { + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0, + ETHTOOL_TCP_DATA_SPLIT_DISABLED, + ETHTOOL_TCP_DATA_SPLIT_ENABLED, +}; + enum { ETHTOOL_A_RINGS_UNSPEC, ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ @@ -330,6 +336,7 @@ enum { ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ ETHTOOL_A_RINGS_TX, /* u32 */ ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ + ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, -- cgit v1.2.3 From 6cdef8a6ee748ec522aabee049633b16b4115f73 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Jan 2022 12:09:35 -0800 Subject: SUNRPC: add netns refcount tracker to struct svc_xprt struct svc_xprt holds a long lived reference to a netns, it is worth tracking it. Signed-off-by: Eric Dumazet Acked-by: Chuck Lever Signed-off-by: David S. Miller --- include/linux/sunrpc/svc_xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 571f605bc91e..382af90320ac 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -88,6 +88,7 @@ struct svc_xprt { struct list_head xpt_users; /* callbacks on free */ struct net *xpt_net; + netns_tracker ns_tracker; const struct cred *xpt_cred; struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */ struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */ -- cgit v1.2.3 From b9a0d6d143ec63132beff04802578c499083f87c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Jan 2022 12:09:37 -0800 Subject: SUNRPC: add netns refcount tracker to struct rpc_xprt Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 955ea4d7af0b..3cdc8d878d81 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -284,6 +284,7 @@ struct rpc_xprt { } stat; struct net *xprt_net; + netns_tracker ns_tracker; const char *servername; const char *address_strings[RPC_DISPLAY_MAX]; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -- cgit v1.2.3 From a299299963aeba13bac88020e5f8fd293fbe10b2 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 3 Jan 2022 17:24:09 +0100 Subject: media: v4l2-subdev: Drop .set_mbus_config() operation The .set_mbus_config() operation is deprecated, and nothing in the kernel uses it. Drop it. Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-mediabus.h | 15 +++++++-------- include/media/v4l2-subdev.h | 13 ------------- 2 files changed, 7 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h index 841e190aedd9..96af9cedacde 100644 --- a/include/media/v4l2-mediabus.h +++ b/include/media/v4l2-mediabus.h @@ -15,15 +15,14 @@ * How to use the V4L2_MBUS_* flags: * Flags are defined for each of the possible states and values of a media * bus configuration parameter. One and only one bit of each group of flags - * shall be set by the users of the v4l2_subdev_pad_ops.get_mbus_config and - * v4l2_subdev_pad_ops.set_mbus_config operations to ensure that no - * conflicting settings are specified when reporting and setting the media bus - * configuration with the two operations respectively. For example, it is - * invalid to set or clear both the V4L2_MBUS_HSYNC_ACTIVE_HIGH and the + * shall be set by the users of the v4l2_subdev_pad_ops.get_mbus_config + * operation to ensure that no conflicting settings are specified when + * reporting the media bus configuration. For example, it is invalid to set or + * clear both the V4L2_MBUS_HSYNC_ACTIVE_HIGH and the * V4L2_MBUS_HSYNC_ACTIVE_LOW flag at the same time. Instead either flag - * V4L2_MBUS_HSYNC_ACTIVE_HIGH or flag V4L2_MBUS_HSYNC_ACTIVE_LOW shall be - * set. The same is true for the V4L2_MBUS_CSI2_1/2/3/4_LANE flags group: only - * one of these four bits shall be set. + * V4L2_MBUS_HSYNC_ACTIVE_HIGH or flag V4L2_MBUS_HSYNC_ACTIVE_LOW shall be set. + * The same is true for the V4L2_MBUS_CSI2_1/2/3/4_LANE flags group: only one + * of these four bits shall be set. * * TODO: replace the existing V4L2_MBUS_* flags with structures of fields * to avoid conflicting settings. diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 95ec18c2f49c..6c153b33bb04 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -715,17 +715,6 @@ struct v4l2_subdev_state { * this operation as close as possible to stream on time. The * operation shall fail if the pad index it has been called on * is not valid or in case of unrecoverable failures. - * - * @set_mbus_config: set the media bus configuration of a remote sub-device. - * This operations is intended to allow, in combination with - * the get_mbus_config operation, the negotiation of media bus - * configuration parameters between media sub-devices. The - * operation shall not fail if the requested configuration is - * not supported, but the driver shall update the content of - * the %config argument to reflect what has been actually - * applied to the hardware. The operation shall fail if the - * pad index it has been called on is not valid or in case of - * unrecoverable failures. */ struct v4l2_subdev_pad_ops { int (*init_cfg)(struct v4l2_subdev *sd, @@ -768,8 +757,6 @@ struct v4l2_subdev_pad_ops { struct v4l2_mbus_frame_desc *fd); int (*get_mbus_config)(struct v4l2_subdev *sd, unsigned int pad, struct v4l2_mbus_config *config); - int (*set_mbus_config)(struct v4l2_subdev *sd, unsigned int pad, - struct v4l2_mbus_config *config); }; /** -- cgit v1.2.3 From 94d964e58ad6ba907c4169be99267ef517796614 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 3 Jan 2022 17:24:10 +0100 Subject: media: v4l2-fwnode: Move bus config structure to v4l2_mediabus.h To prepare for usage of the v4l2_fwnode_bus_* data structures to describe bus configuration in the subdev .get_mbus_config() operation, rename the structures with a v4l2_mbus_config_ prefix instead of v4l2_fwnode_bus_, and move them to v4l2_mediabus.h. Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-fwnode.h | 61 +++++-------------------------------------- include/media/v4l2-mediabus.h | 49 ++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-fwnode.h b/include/media/v4l2-fwnode.h index 9c97f1dbd1c6..feb132df45a3 100644 --- a/include/media/v4l2-fwnode.h +++ b/include/media/v4l2-fwnode.h @@ -25,68 +25,19 @@ struct fwnode_handle; struct v4l2_async_notifier; struct v4l2_async_subdev; -#define V4L2_FWNODE_CSI2_MAX_DATA_LANES 8 - -/** - * struct v4l2_fwnode_bus_mipi_csi2 - MIPI CSI-2 bus data structure - * @flags: media bus (V4L2_MBUS_*) flags - * @data_lanes: an array of physical data lane indexes - * @clock_lane: physical lane index of the clock lane - * @num_data_lanes: number of data lanes - * @lane_polarities: polarity of the lanes. The order is the same of - * the physical lanes. - */ -struct v4l2_fwnode_bus_mipi_csi2 { - unsigned int flags; - unsigned char data_lanes[V4L2_FWNODE_CSI2_MAX_DATA_LANES]; - unsigned char clock_lane; - unsigned char num_data_lanes; - bool lane_polarities[1 + V4L2_FWNODE_CSI2_MAX_DATA_LANES]; -}; - -/** - * struct v4l2_fwnode_bus_parallel - parallel data bus data structure - * @flags: media bus (V4L2_MBUS_*) flags - * @bus_width: bus width in bits - * @data_shift: data shift in bits - */ -struct v4l2_fwnode_bus_parallel { - unsigned int flags; - unsigned char bus_width; - unsigned char data_shift; -}; - -/** - * struct v4l2_fwnode_bus_mipi_csi1 - CSI-1/CCP2 data bus structure - * @clock_inv: polarity of clock/strobe signal - * false - not inverted, true - inverted - * @strobe: false - data/clock, true - data/strobe - * @lane_polarity: the polarities of the clock (index 0) and data lanes - * index (1) - * @data_lane: the number of the data lane - * @clock_lane: the number of the clock lane - */ -struct v4l2_fwnode_bus_mipi_csi1 { - unsigned char clock_inv:1; - unsigned char strobe:1; - bool lane_polarity[2]; - unsigned char data_lane; - unsigned char clock_lane; -}; - /** * struct v4l2_fwnode_endpoint - the endpoint data structure * @base: fwnode endpoint of the v4l2_fwnode * @bus_type: bus type * @bus: bus configuration data structure - * @bus.parallel: embedded &struct v4l2_fwnode_bus_parallel. + * @bus.parallel: embedded &struct v4l2_mbus_config_parallel. * Used if the bus is parallel. - * @bus.mipi_csi1: embedded &struct v4l2_fwnode_bus_mipi_csi1. + * @bus.mipi_csi1: embedded &struct v4l2_mbus_config_mipi_csi1. * Used if the bus is MIPI Alliance's Camera Serial * Interface version 1 (MIPI CSI1) or Standard * Mobile Imaging Architecture's Compact Camera Port 2 * (SMIA CCP2). - * @bus.mipi_csi2: embedded &struct v4l2_fwnode_bus_mipi_csi2. + * @bus.mipi_csi2: embedded &struct v4l2_mbus_config_mipi_csi2. * Used if the bus is MIPI Alliance's Camera Serial * Interface version 2 (MIPI CSI2). * @link_frequencies: array of supported link frequencies @@ -100,9 +51,9 @@ struct v4l2_fwnode_endpoint { */ enum v4l2_mbus_type bus_type; struct { - struct v4l2_fwnode_bus_parallel parallel; - struct v4l2_fwnode_bus_mipi_csi1 mipi_csi1; - struct v4l2_fwnode_bus_mipi_csi2 mipi_csi2; + struct v4l2_mbus_config_parallel parallel; + struct v4l2_mbus_config_mipi_csi1 mipi_csi1; + struct v4l2_mbus_config_mipi_csi2 mipi_csi2; } bus; u64 *link_frequencies; unsigned int nr_of_link_frequencies; diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h index 96af9cedacde..9c4970fbd8ea 100644 --- a/include/media/v4l2-mediabus.h +++ b/include/media/v4l2-mediabus.h @@ -92,6 +92,55 @@ V4L2_MBUS_CSI2_CHANNEL_2 | \ V4L2_MBUS_CSI2_CHANNEL_3) +#define V4L2_MBUS_CSI2_MAX_DATA_LANES 8 + +/** + * struct v4l2_mbus_config_mipi_csi2 - MIPI CSI-2 data bus configuration + * @flags: media bus (V4L2_MBUS_*) flags + * @data_lanes: an array of physical data lane indexes + * @clock_lane: physical lane index of the clock lane + * @num_data_lanes: number of data lanes + * @lane_polarities: polarity of the lanes. The order is the same of + * the physical lanes. + */ +struct v4l2_mbus_config_mipi_csi2 { + unsigned int flags; + unsigned char data_lanes[V4L2_MBUS_CSI2_MAX_DATA_LANES]; + unsigned char clock_lane; + unsigned char num_data_lanes; + bool lane_polarities[1 + V4L2_MBUS_CSI2_MAX_DATA_LANES]; +}; + +/** + * struct v4l2_mbus_config_parallel - parallel data bus configuration + * @flags: media bus (V4L2_MBUS_*) flags + * @bus_width: bus width in bits + * @data_shift: data shift in bits + */ +struct v4l2_mbus_config_parallel { + unsigned int flags; + unsigned char bus_width; + unsigned char data_shift; +}; + +/** + * struct v4l2_mbus_config_mipi_csi1 - CSI-1/CCP2 data bus configuration + * @clock_inv: polarity of clock/strobe signal + * false - not inverted, true - inverted + * @strobe: false - data/clock, true - data/strobe + * @lane_polarity: the polarities of the clock (index 0) and data lanes + * index (1) + * @data_lane: the number of the data lane + * @clock_lane: the number of the clock lane + */ +struct v4l2_mbus_config_mipi_csi1 { + unsigned char clock_inv:1; + unsigned char strobe:1; + bool lane_polarity[2]; + unsigned char data_lane; + unsigned char clock_lane; +}; + /** * enum v4l2_mbus_type - media bus type * @V4L2_MBUS_UNKNOWN: unknown bus type, no V4L2 mediabus configuration -- cgit v1.2.3 From d01ffb9eee4af165d83b08dd73ebdf9fe94a519b Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 28 Jan 2022 12:47:16 +0800 Subject: ax25: add refcount in ax25_dev to avoid UAF bugs If we dereference ax25_dev after we call kfree(ax25_dev) in ax25_dev_device_down(), it will lead to concurrency UAF bugs. There are eight syscall functions suffer from UAF bugs, include ax25_bind(), ax25_release(), ax25_connect(), ax25_ioctl(), ax25_getname(), ax25_sendmsg(), ax25_getsockopt() and ax25_info_show(). One of the concurrency UAF can be shown as below: (USE) | (FREE) | ax25_device_event | ax25_dev_device_down ax25_bind | ... ... | kfree(ax25_dev) ax25_fillin_cb() | ... ax25_fillin_cb_from_dev() | ... | The root cause of UAF bugs is that kfree(ax25_dev) in ax25_dev_device_down() is not protected by any locks. When ax25_dev, which there are still pointers point to, is released, the concurrency UAF bug will happen. This patch introduces refcount into ax25_dev in order to guarantee that there are no pointers point to it when ax25_dev is released. Signed-off-by: Duoming Zhou Signed-off-by: David S. Miller --- include/net/ax25.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/ax25.h b/include/net/ax25.h index 526e49589197..50b417df6221 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -239,6 +239,7 @@ typedef struct ax25_dev { #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) ax25_dama_info dama; #endif + refcount_t refcount; } ax25_dev; typedef struct ax25_cb { @@ -293,6 +294,15 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) } } +#define ax25_dev_hold(__ax25_dev) \ + refcount_inc(&((__ax25_dev)->refcount)) + +static __inline__ void ax25_dev_put(ax25_dev *ax25_dev) +{ + if (refcount_dec_and_test(&ax25_dev->refcount)) { + kfree(ax25_dev); + } +} static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->dev = dev; -- cgit v1.2.3 From 3c75c0ea5da749bd1efebd1387f2e5011b8c7d78 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 19 Jan 2022 16:52:48 +0100 Subject: ASoC: soc-pcm: Fix DPCM lockdep warning due to nested stream locks The recent change for DPCM locking caused spurious lockdep warnings. Actually the warnings are false-positive, as those are triggered due to the nested stream locks for FE and BE. Since both locks belong to the same lock class, lockdep sees it as if a deadlock. For fixing this, we need to take PCM stream locks for BE with the nested lock primitives. Since currently snd_pcm_stream_lock*() helper assumes only the top-level single locking, a new helper function snd_pcm_stream_lock_irqsave_nested() is defined for a single-depth nested lock, which is now used in the BE DAI trigger that is always performed inside a FE stream lock. Fixes: b2ae80663008 ("ASoC: soc-pcm: serialize BE triggers") Reported-and-tested-by: Hans de Goede Reported-and-tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/73018f3c-9769-72ea-0325-b3f8e2381e30@redhat.com Link: https://lore.kernel.org/alsa-devel/9a0abddd-49e9-872d-2f00-a1697340f786@samsung.com Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20220119155249.26754-2-tiwai@suse.de Signed-off-by: Mark Brown --- include/sound/pcm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 33451f8ff755..524220fe1af6 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -614,6 +614,7 @@ void snd_pcm_stream_unlock(struct snd_pcm_substream *substream); void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream); void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream); unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream); +unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream); /** * snd_pcm_stream_lock_irqsave - Lock the PCM stream @@ -632,6 +633,20 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream); void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream, unsigned long flags); +/** + * snd_pcm_stream_lock_irqsave_nested - Single-nested PCM stream locking + * @substream: PCM substream + * @flags: irq flags + * + * This locks the PCM stream like snd_pcm_stream_lock_irqsave() but with + * the single-depth lockdep subclass. + */ +#define snd_pcm_stream_lock_irqsave_nested(substream, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = _snd_pcm_stream_lock_irqsave_nested(substream); \ + } while (0) + /** * snd_pcm_group_for_each_entry - iterate over the linked substreams * @s: the iterator -- cgit v1.2.3 From 06feec6005c9d9500cd286ec440aabf8b2ddd94d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Jan 2022 22:50:39 +0300 Subject: ASoC: hdmi-codec: Fix OOB memory accesses Correct size of iec_status array by changing it to the size of status array of the struct snd_aes_iec958. This fixes out-of-bounds slab read accesses made by memcpy() of the hdmi-codec driver. This problem is reported by KASAN. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20220112195039.1329-1-digetx@gmail.com Signed-off-by: Mark Brown --- include/uapi/sound/asound.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index ff7e638221c5..228279ea0670 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -56,8 +56,10 @@ * * ****************************************************************************/ +#define AES_IEC958_STATUS_SIZE 24 + struct snd_aes_iec958 { - unsigned char status[24]; /* AES/IEC958 channel status bits */ + unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */ unsigned char subcode[147]; /* AES/IEC958 subcode bits */ unsigned char pad; /* nothing */ unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */ -- cgit v1.2.3 From b2a90f4fcb146d0e033203ab646f0fd22cfa947f Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 13 Jan 2022 11:20:22 +0100 Subject: media: lirc: remove unused lirc features These features have never been implemented by any lirc driver, including staging or out of tree drivers. The ioctls for these feaures were removed in commit d55f09abe24b ("[media] lirc.h: remove several unused ioctls"). So, we can safely remove them. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index 9919f2062b14..a1f9c26ea537 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -72,11 +72,9 @@ #define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16) #define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16) -#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000 #define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000 #define LIRC_CAN_GET_REC_RESOLUTION 0x20000000 #define LIRC_CAN_SET_REC_TIMEOUT 0x10000000 -#define LIRC_CAN_SET_REC_FILTER 0x08000000 #define LIRC_CAN_MEASURE_CARRIER 0x02000000 #define LIRC_CAN_USE_WIDEBAND_RECEIVER 0x04000000 @@ -84,8 +82,6 @@ #define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK) #define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK) -#define LIRC_CAN_NOTIFY_DECODE 0x01000000 - /*** IOCTL commands for lirc driver ***/ #define LIRC_GET_FEATURES _IOR('i', 0x00000000, __u32) -- cgit v1.2.3 From 950170d6d2a5d3c0c959696d2440e5c8dfd02896 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sat, 15 Jan 2022 11:12:35 +0100 Subject: media: rc-core: rename ir_raw_event_reset to ir_raw_event_overflow The driver report a reset event when the hardware reports and overflow. There is no reason to have a generic "reset" event. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/media/rc-core.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/media/rc-core.h b/include/media/rc-core.h index 33b3f7fcf92e..803349599c27 100644 --- a/include/media/rc-core.h +++ b/include/media/rc-core.h @@ -298,7 +298,7 @@ struct ir_raw_event { u8 duty_cycle; unsigned pulse:1; - unsigned reset:1; + unsigned overflow:1; unsigned timeout:1; unsigned carrier_report:1; }; @@ -321,9 +321,9 @@ int ir_raw_encode_scancode(enum rc_proto protocol, u32 scancode, struct ir_raw_event *events, unsigned int max); int ir_raw_encode_carrier(enum rc_proto protocol); -static inline void ir_raw_event_reset(struct rc_dev *dev) +static inline void ir_raw_event_overflow(struct rc_dev *dev) { - ir_raw_event_store(dev, &((struct ir_raw_event) { .reset = true })); + ir_raw_event_store(dev, &((struct ir_raw_event) { .overflow = true })); dev->idle = true; ir_raw_event_handle(dev); } -- cgit v1.2.3 From 68a99f6a0ebfe9101ea79ba5af1c407a5ad4f629 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sat, 15 Jan 2022 11:19:11 +0100 Subject: media: lirc: report ir receiver overflow If the driver reports that the hardware had an overflow, report this to userspace. It would be nice to know when this happens, and not just get a long space. This change has been tested with lircd, ir-ctl, and ir-keytable. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index a1f9c26ea537..21c69a6a100d 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -16,14 +16,16 @@ #define LIRC_MODE2_PULSE 0x01000000 #define LIRC_MODE2_FREQUENCY 0x02000000 #define LIRC_MODE2_TIMEOUT 0x03000000 +#define LIRC_MODE2_OVERFLOW 0x04000000 #define LIRC_VALUE_MASK 0x00FFFFFF #define LIRC_MODE2_MASK 0xFF000000 -#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE) -#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE) -#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY) -#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT) +#define LIRC_SPACE(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_SPACE) +#define LIRC_PULSE(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_PULSE) +#define LIRC_FREQUENCY(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY) +#define LIRC_TIMEOUT(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT) +#define LIRC_OVERFLOW(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_OVERFLOW) #define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK) #define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK) @@ -32,6 +34,7 @@ #define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE) #define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY) #define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT) +#define LIRC_IS_OVERFLOW(val) (LIRC_MODE2(val) == LIRC_MODE2_OVERFLOW) /* used heavily by lirc userspace */ #define lirc_t int -- cgit v1.2.3 From ca0acb511c21738b32386ce0f85c284b351d919e Mon Sep 17 00:00:00 2001 From: Akhil R Date: Fri, 28 Jan 2022 17:14:25 +0530 Subject: device property: Add fwnode_irq_get_byname Add fwnode_irq_get_byname() to get an interrupt by name from either ACPI table or Device Tree, whichever is used for enumeration. In the ACPI case, this allow us to use 'interrupt-names' in _DSD which can be mapped to Interrupt() resource by index. The implementation is similar to 'interrupt-names' in the Device Tree. Signed-off-by: Akhil R Reviewed-by: Andy Shevchenko Acked-by: Rafael J. Wysocki Signed-off-by: Wolfram Sang --- include/linux/property.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index 7399a0b45f98..95d56a562b6a 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -121,6 +121,7 @@ struct fwnode_handle *fwnode_handle_get(struct fwnode_handle *fwnode); void fwnode_handle_put(struct fwnode_handle *fwnode); int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index); +int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name); void __iomem *fwnode_iomap(struct fwnode_handle *fwnode, int index); -- cgit v1.2.3 From a263a84088f689bf0c1552a510b25d0bcc45fcae Mon Sep 17 00:00:00 2001 From: Akhil R Date: Fri, 28 Jan 2022 17:14:27 +0530 Subject: i2c: smbus: Use device_*() functions instead of of_*() Change of_*() functions to device_*() for firmware agnostic usage. This allows to have the smbus_alert interrupt without any changes in the controller drivers using the ACPI table. Signed-off-by: Akhil R Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/i2c-smbus.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h index 1ef421818d3a..95cf902e0bda 100644 --- a/include/linux/i2c-smbus.h +++ b/include/linux/i2c-smbus.h @@ -30,10 +30,10 @@ struct i2c_client *i2c_new_smbus_alert_device(struct i2c_adapter *adapter, struct i2c_smbus_alert_setup *setup); int i2c_handle_smbus_alert(struct i2c_client *ara); -#if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_OF) -int of_i2c_setup_smbus_alert(struct i2c_adapter *adap); +#if IS_ENABLED(CONFIG_I2C_SMBUS) +int i2c_setup_smbus_alert(struct i2c_adapter *adap); #else -static inline int of_i2c_setup_smbus_alert(struct i2c_adapter *adap) +static inline int i2c_setup_smbus_alert(struct i2c_adapter *adap) { return 0; } -- cgit v1.2.3 From a81595729be6561d25ae64d5bbae059058234363 Mon Sep 17 00:00:00 2001 From: Giulio Benetti Date: Tue, 11 Jan 2022 16:54:11 -0500 Subject: dt-bindings: imx: Add clock binding for i.MXRT1050 Add the clock binding doc for i.MXRT1050. Signed-off-by: Giulio Benetti Signed-off-by: Jesse Taube [Jesse: added clocks from IMXRT1050_CLK_IPG_PDOF to IMXRT1050_CLK_DMA_MUX and moved IMXRT1050_CLK_END on] Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/20220111215415.2075257-4-Mr.Bossman075@gmail.com Signed-off-by: Abel Vesa --- include/dt-bindings/clock/imxrt1050-clock.h | 72 +++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 include/dt-bindings/clock/imxrt1050-clock.h (limited to 'include') diff --git a/include/dt-bindings/clock/imxrt1050-clock.h b/include/dt-bindings/clock/imxrt1050-clock.h new file mode 100644 index 000000000000..93bef0832d16 --- /dev/null +++ b/include/dt-bindings/clock/imxrt1050-clock.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */ +/* + * Copyright(C) 2019 + * Author(s): Giulio Benetti + */ + +#ifndef __DT_BINDINGS_CLOCK_IMXRT1050_H +#define __DT_BINDINGS_CLOCK_IMXRT1050_H + +#define IMXRT1050_CLK_DUMMY 0 +#define IMXRT1050_CLK_CKIL 1 +#define IMXRT1050_CLK_CKIH 2 +#define IMXRT1050_CLK_OSC 3 +#define IMXRT1050_CLK_PLL2_PFD0_352M 4 +#define IMXRT1050_CLK_PLL2_PFD1_594M 5 +#define IMXRT1050_CLK_PLL2_PFD2_396M 6 +#define IMXRT1050_CLK_PLL3_PFD0_720M 7 +#define IMXRT1050_CLK_PLL3_PFD1_664_62M 8 +#define IMXRT1050_CLK_PLL3_PFD2_508_24M 9 +#define IMXRT1050_CLK_PLL3_PFD3_454_74M 10 +#define IMXRT1050_CLK_PLL2_198M 11 +#define IMXRT1050_CLK_PLL3_120M 12 +#define IMXRT1050_CLK_PLL3_80M 13 +#define IMXRT1050_CLK_PLL3_60M 14 +#define IMXRT1050_CLK_PLL1_BYPASS 15 +#define IMXRT1050_CLK_PLL2_BYPASS 16 +#define IMXRT1050_CLK_PLL3_BYPASS 17 +#define IMXRT1050_CLK_PLL5_BYPASS 19 +#define IMXRT1050_CLK_PLL1_REF_SEL 20 +#define IMXRT1050_CLK_PLL2_REF_SEL 21 +#define IMXRT1050_CLK_PLL3_REF_SEL 22 +#define IMXRT1050_CLK_PLL5_REF_SEL 23 +#define IMXRT1050_CLK_PRE_PERIPH_SEL 24 +#define IMXRT1050_CLK_PERIPH_SEL 25 +#define IMXRT1050_CLK_SEMC_ALT_SEL 26 +#define IMXRT1050_CLK_SEMC_SEL 27 +#define IMXRT1050_CLK_USDHC1_SEL 28 +#define IMXRT1050_CLK_USDHC2_SEL 29 +#define IMXRT1050_CLK_LPUART_SEL 30 +#define IMXRT1050_CLK_LCDIF_SEL 31 +#define IMXRT1050_CLK_VIDEO_POST_DIV_SEL 32 +#define IMXRT1050_CLK_VIDEO_DIV 33 +#define IMXRT1050_CLK_ARM_PODF 34 +#define IMXRT1050_CLK_LPUART_PODF 35 +#define IMXRT1050_CLK_USDHC1_PODF 36 +#define IMXRT1050_CLK_USDHC2_PODF 37 +#define IMXRT1050_CLK_SEMC_PODF 38 +#define IMXRT1050_CLK_AHB_PODF 39 +#define IMXRT1050_CLK_LCDIF_PRED 40 +#define IMXRT1050_CLK_LCDIF_PODF 41 +#define IMXRT1050_CLK_USDHC1 42 +#define IMXRT1050_CLK_USDHC2 43 +#define IMXRT1050_CLK_LPUART1 44 +#define IMXRT1050_CLK_SEMC 45 +#define IMXRT1050_CLK_LCDIF_APB 46 +#define IMXRT1050_CLK_PLL1_ARM 47 +#define IMXRT1050_CLK_PLL2_SYS 48 +#define IMXRT1050_CLK_PLL3_USB_OTG 49 +#define IMXRT1050_CLK_PLL4_AUDIO 50 +#define IMXRT1050_CLK_PLL5_VIDEO 51 +#define IMXRT1050_CLK_PLL6_ENET 52 +#define IMXRT1050_CLK_PLL7_USB_HOST 53 +#define IMXRT1050_CLK_LCDIF_PIX 54 +#define IMXRT1050_CLK_USBOH3 55 +#define IMXRT1050_CLK_IPG_PDOF 56 +#define IMXRT1050_CLK_PER_CLK_SEL 57 +#define IMXRT1050_CLK_PER_PDOF 58 +#define IMXRT1050_CLK_DMA 59 +#define IMXRT1050_CLK_DMA_MUX 60 +#define IMXRT1050_CLK_END 61 + +#endif /* __DT_BINDINGS_CLOCK_IMXRT1050_H */ -- cgit v1.2.3 From 4f0e30407ef6f2075b8e86d54be42e787087cd61 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 28 Jan 2022 08:06:54 -0800 Subject: ipv4: drop fragmentation code from ip_options_build() Since v2.5.44 and addition of ip_options_fragment() ip_options_build() does not render headers for fragments directly. @is_frag is always 0. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index b51bae43b0dd..fdbab0c00fca 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -712,7 +712,7 @@ int ip_forward(struct sk_buff *skb); */ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, - __be32 daddr, struct rtable *rt, int is_frag); + __be32 daddr, struct rtable *rt); int __ip_options_echo(struct net *net, struct ip_options *dopt, struct sk_buff *skb, const struct ip_options *sopt); -- cgit v1.2.3 From 1713a8de4a3bf7d7d2d5759c895e486885fd1cc3 Mon Sep 17 00:00:00 2001 From: Qianggui Song Date: Thu, 13 Jan 2022 11:10:42 +0800 Subject: dt-bindings: gpio: Add a header file for Amlogic Meson S4 Add a new dt-binding header file which documents the detail pin names. Reviewed-by: Martin Blumenstingl Acked-by: Rob Herring Signed-off-by: Qianggui Song Link: https://lore.kernel.org/r/20220113031044.2665-3-qianggui.song@amlogic.com Signed-off-by: Linus Walleij --- include/dt-bindings/gpio/meson-s4-gpio.h | 99 ++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 include/dt-bindings/gpio/meson-s4-gpio.h (limited to 'include') diff --git a/include/dt-bindings/gpio/meson-s4-gpio.h b/include/dt-bindings/gpio/meson-s4-gpio.h new file mode 100644 index 000000000000..35aee21b94f1 --- /dev/null +++ b/include/dt-bindings/gpio/meson-s4-gpio.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */ +/* + * Copyright (c) 2021 Amlogic, Inc. All rights reserved. + * Author: Qianggui Song + */ + +#ifndef _DT_BINDINGS_MESON_S4_GPIO_H +#define _DT_BINDINGS_MESON_S4_GPIO_H + +#define GPIOB_0 0 +#define GPIOB_1 1 +#define GPIOB_2 2 +#define GPIOB_3 3 +#define GPIOB_4 4 +#define GPIOB_5 5 +#define GPIOB_6 6 +#define GPIOB_7 7 +#define GPIOB_8 8 +#define GPIOB_9 9 +#define GPIOB_10 10 +#define GPIOB_11 11 +#define GPIOB_12 12 +#define GPIOB_13 13 + +#define GPIOC_0 14 +#define GPIOC_1 15 +#define GPIOC_2 16 +#define GPIOC_3 17 +#define GPIOC_4 18 +#define GPIOC_5 19 +#define GPIOC_6 20 +#define GPIOC_7 21 + +#define GPIOE_0 22 +#define GPIOE_1 23 + +#define GPIOD_0 24 +#define GPIOD_1 25 +#define GPIOD_2 26 +#define GPIOD_3 27 +#define GPIOD_4 28 +#define GPIOD_5 29 +#define GPIOD_6 30 +#define GPIOD_7 31 +#define GPIOD_8 32 +#define GPIOD_9 33 +#define GPIOD_10 34 +#define GPIOD_11 35 + +#define GPIOH_0 36 +#define GPIOH_1 37 +#define GPIOH_2 38 +#define GPIOH_3 39 +#define GPIOH_4 40 +#define GPIOH_5 41 +#define GPIOH_6 42 +#define GPIOH_7 43 +#define GPIOH_8 44 +#define GPIOH_9 45 +#define GPIOH_10 46 +#define GPIOH_11 47 + +#define GPIOX_0 48 +#define GPIOX_1 49 +#define GPIOX_2 50 +#define GPIOX_3 51 +#define GPIOX_4 52 +#define GPIOX_5 53 +#define GPIOX_6 54 +#define GPIOX_7 55 +#define GPIOX_8 56 +#define GPIOX_9 57 +#define GPIOX_10 58 +#define GPIOX_11 59 +#define GPIOX_12 60 +#define GPIOX_13 61 +#define GPIOX_14 62 +#define GPIOX_15 63 +#define GPIOX_16 64 +#define GPIOX_17 65 +#define GPIOX_18 66 +#define GPIOX_19 67 + +#define GPIOZ_0 68 +#define GPIOZ_1 69 +#define GPIOZ_2 70 +#define GPIOZ_3 71 +#define GPIOZ_4 72 +#define GPIOZ_5 73 +#define GPIOZ_6 74 +#define GPIOZ_7 75 +#define GPIOZ_8 76 +#define GPIOZ_9 77 +#define GPIOZ_10 78 +#define GPIOZ_11 79 +#define GPIOZ_12 80 + +#define GPIO_TEST_N 81 +#endif /* _DT_BINDINGS_MESON_S4_GPIO_H */ -- cgit v1.2.3 From 340407d214e4566c8a5d6997b7ca5ec21c7077a8 Mon Sep 17 00:00:00 2001 From: Wells Lu Date: Sun, 16 Jan 2022 22:52:13 +0800 Subject: dt-bindings: pinctrl: Add dt-bindings for Sunplus SP7021 Add dt-bindings header files and documentation for Sunplus SP7021 SoC. Signed-off-by: Wells Lu Link: https://lore.kernel.org/r/1642344734-27229-2-git-send-email-wellslutw@gmail.com Signed-off-by: Linus Walleij --- include/dt-bindings/pinctrl/sppctl-sp7021.h | 179 ++++++++++++++++++++++++++++ include/dt-bindings/pinctrl/sppctl.h | 31 +++++ 2 files changed, 210 insertions(+) create mode 100644 include/dt-bindings/pinctrl/sppctl-sp7021.h create mode 100644 include/dt-bindings/pinctrl/sppctl.h (limited to 'include') diff --git a/include/dt-bindings/pinctrl/sppctl-sp7021.h b/include/dt-bindings/pinctrl/sppctl-sp7021.h new file mode 100644 index 000000000000..629aa9b5ffbc --- /dev/null +++ b/include/dt-bindings/pinctrl/sppctl-sp7021.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Sunplus SP7021 dt-bindings Pinctrl header file + * Copyright (C) Sunplus Tech/Tibbo Tech. + * Author: Dvorkin Dmitry + */ + +#ifndef __DT_BINDINGS_PINCTRL_SPPCTL_SP7021_H__ +#define __DT_BINDINGS_PINCTRL_SPPCTL_SP7021_H__ + +#include + +/* + * Please don't change the order of the following defines. + * They are based on order of 'hardware' control register + * defined in MOON2 ~ MOON3 registers. + */ +#define MUXF_GPIO 0 +#define MUXF_IOP 1 +#define MUXF_L2SW_CLK_OUT 2 +#define MUXF_L2SW_MAC_SMI_MDC 3 +#define MUXF_L2SW_LED_FLASH0 4 +#define MUXF_L2SW_LED_FLASH1 5 +#define MUXF_L2SW_LED_ON0 6 +#define MUXF_L2SW_LED_ON1 7 +#define MUXF_L2SW_MAC_SMI_MDIO 8 +#define MUXF_L2SW_P0_MAC_RMII_TXEN 9 +#define MUXF_L2SW_P0_MAC_RMII_TXD0 10 +#define MUXF_L2SW_P0_MAC_RMII_TXD1 11 +#define MUXF_L2SW_P0_MAC_RMII_CRSDV 12 +#define MUXF_L2SW_P0_MAC_RMII_RXD0 13 +#define MUXF_L2SW_P0_MAC_RMII_RXD1 14 +#define MUXF_L2SW_P0_MAC_RMII_RXER 15 +#define MUXF_L2SW_P1_MAC_RMII_TXEN 16 +#define MUXF_L2SW_P1_MAC_RMII_TXD0 17 +#define MUXF_L2SW_P1_MAC_RMII_TXD1 18 +#define MUXF_L2SW_P1_MAC_RMII_CRSDV 19 +#define MUXF_L2SW_P1_MAC_RMII_RXD0 20 +#define MUXF_L2SW_P1_MAC_RMII_RXD1 21 +#define MUXF_L2SW_P1_MAC_RMII_RXER 22 +#define MUXF_DAISY_MODE 23 +#define MUXF_SDIO_CLK 24 +#define MUXF_SDIO_CMD 25 +#define MUXF_SDIO_D0 26 +#define MUXF_SDIO_D1 27 +#define MUXF_SDIO_D2 28 +#define MUXF_SDIO_D3 29 +#define MUXF_PWM0 30 +#define MUXF_PWM1 31 +#define MUXF_PWM2 32 +#define MUXF_PWM3 33 +#define MUXF_PWM4 34 +#define MUXF_PWM5 35 +#define MUXF_PWM6 36 +#define MUXF_PWM7 37 +#define MUXF_ICM0_D 38 +#define MUXF_ICM1_D 39 +#define MUXF_ICM2_D 40 +#define MUXF_ICM3_D 41 +#define MUXF_ICM0_CLK 42 +#define MUXF_ICM1_CLK 43 +#define MUXF_ICM2_CLK 44 +#define MUXF_ICM3_CLK 45 +#define MUXF_SPIM0_INT 46 +#define MUXF_SPIM0_CLK 47 +#define MUXF_SPIM0_EN 48 +#define MUXF_SPIM0_DO 49 +#define MUXF_SPIM0_DI 50 +#define MUXF_SPIM1_INT 51 +#define MUXF_SPIM1_CLK 52 +#define MUXF_SPIM1_EN 53 +#define MUXF_SPIM1_DO 54 +#define MUXF_SPIM1_DI 55 +#define MUXF_SPIM2_INT 56 +#define MUXF_SPIM2_CLK 57 +#define MUXF_SPIM2_EN 58 +#define MUXF_SPIM2_DO 59 +#define MUXF_SPIM2_DI 60 +#define MUXF_SPIM3_INT 61 +#define MUXF_SPIM3_CLK 62 +#define MUXF_SPIM3_EN 63 +#define MUXF_SPIM3_DO 64 +#define MUXF_SPIM3_DI 65 +#define MUXF_SPI0S_INT 66 +#define MUXF_SPI0S_CLK 67 +#define MUXF_SPI0S_EN 68 +#define MUXF_SPI0S_DO 69 +#define MUXF_SPI0S_DI 70 +#define MUXF_SPI1S_INT 71 +#define MUXF_SPI1S_CLK 72 +#define MUXF_SPI1S_EN 73 +#define MUXF_SPI1S_DO 74 +#define MUXF_SPI1S_DI 75 +#define MUXF_SPI2S_INT 76 +#define MUXF_SPI2S_CLK 77 +#define MUXF_SPI2S_EN 78 +#define MUXF_SPI2S_DO 79 +#define MUXF_SPI2S_DI 80 +#define MUXF_SPI3S_INT 81 +#define MUXF_SPI3S_CLK 82 +#define MUXF_SPI3S_EN 83 +#define MUXF_SPI3S_DO 84 +#define MUXF_SPI3S_DI 85 +#define MUXF_I2CM0_CLK 86 +#define MUXF_I2CM0_DAT 87 +#define MUXF_I2CM1_CLK 88 +#define MUXF_I2CM1_DAT 89 +#define MUXF_I2CM2_CLK 90 +#define MUXF_I2CM2_DAT 91 +#define MUXF_I2CM3_CLK 92 +#define MUXF_I2CM3_DAT 93 +#define MUXF_UA1_TX 94 +#define MUXF_UA1_RX 95 +#define MUXF_UA1_CTS 96 +#define MUXF_UA1_RTS 97 +#define MUXF_UA2_TX 98 +#define MUXF_UA2_RX 99 +#define MUXF_UA2_CTS 100 +#define MUXF_UA2_RTS 101 +#define MUXF_UA3_TX 102 +#define MUXF_UA3_RX 103 +#define MUXF_UA3_CTS 104 +#define MUXF_UA3_RTS 105 +#define MUXF_UA4_TX 106 +#define MUXF_UA4_RX 107 +#define MUXF_UA4_CTS 108 +#define MUXF_UA4_RTS 109 +#define MUXF_TIMER0_INT 110 +#define MUXF_TIMER1_INT 111 +#define MUXF_TIMER2_INT 112 +#define MUXF_TIMER3_INT 113 +#define MUXF_GPIO_INT0 114 +#define MUXF_GPIO_INT1 115 +#define MUXF_GPIO_INT2 116 +#define MUXF_GPIO_INT3 117 +#define MUXF_GPIO_INT4 118 +#define MUXF_GPIO_INT5 119 +#define MUXF_GPIO_INT6 120 +#define MUXF_GPIO_INT7 121 + +/* + * Please don't change the order of the following defines. + * They are based on order of items in array 'sppctl_list_funcs' + * in Sunplus pinctrl driver. + */ +#define GROP_SPI_FLASH 122 +#define GROP_SPI_FLASH_4BIT 123 +#define GROP_SPI_NAND 124 +#define GROP_CARD0_EMMC 125 +#define GROP_SD_CARD 126 +#define GROP_UA0 127 +#define GROP_ACHIP_DEBUG 128 +#define GROP_ACHIP_UA2AXI 129 +#define GROP_FPGA_IFX 130 +#define GROP_HDMI_TX 131 +#define GROP_AUD_EXT_ADC_IFX0 132 +#define GROP_AUD_EXT_DAC_IFX0 133 +#define GROP_SPDIF_RX 134 +#define GROP_SPDIF_TX 135 +#define GROP_TDMTX_IFX0 136 +#define GROP_TDMRX_IFX0 137 +#define GROP_PDMRX_IFX0 138 +#define GROP_PCM_IEC_TX 139 +#define GROP_LCDIF 140 +#define GROP_DVD_DSP_DEBUG 141 +#define GROP_I2C_DEBUG 142 +#define GROP_I2C_SLAVE 143 +#define GROP_WAKEUP 144 +#define GROP_UART2AXI 145 +#define GROP_USB0_I2C 146 +#define GROP_USB1_I2C 147 +#define GROP_USB0_OTG 148 +#define GROP_USB1_OTG 149 +#define GROP_UPHY0_DEBUG 150 +#define GROP_UPHY1_DEBUG 151 +#define GROP_UPHY0_EXT 152 +#define GROP_PROBE_PORT 153 + +#endif diff --git a/include/dt-bindings/pinctrl/sppctl.h b/include/dt-bindings/pinctrl/sppctl.h new file mode 100644 index 000000000000..50557265dbfc --- /dev/null +++ b/include/dt-bindings/pinctrl/sppctl.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Sunplus dt-bindings Pinctrl header file + * Copyright (C) Sunplus Tech / Tibbo Tech. + * Author: Dvorkin Dmitry + */ + +#ifndef __DT_BINDINGS_PINCTRL_SPPCTL_H__ +#define __DT_BINDINGS_PINCTRL_SPPCTL_H__ + +#define IOP_G_MASTE (0x01 << 0) +#define IOP_G_FIRST (0x01 << 1) + +#define SPPCTL_PCTL_G_PMUX (0x00 | IOP_G_MASTE) +#define SPPCTL_PCTL_G_GPIO (IOP_G_FIRST | IOP_G_MASTE) +#define SPPCTL_PCTL_G_IOPP (IOP_G_FIRST | 0x00) + +#define SPPCTL_PCTL_L_OUT (0x01 << 0) /* Output LOW */ +#define SPPCTL_PCTL_L_OU1 (0x01 << 1) /* Output HIGH */ +#define SPPCTL_PCTL_L_INV (0x01 << 2) /* Input Invert */ +#define SPPCTL_PCTL_L_ONV (0x01 << 3) /* Output Invert */ +#define SPPCTL_PCTL_L_ODR (0x01 << 4) /* Output Open Drain */ + +/* + * pack into 32-bit value: + * pin# (8bit), typ (8bit), function (8bit), flag (8bit) + */ +#define SPPCTL_IOPAD(pin, typ, fun, flg) (((pin) << 24) | ((typ) << 16) | \ + ((fun) << 8) | (flg)) + +#endif -- cgit v1.2.3 From e820a33748b5e22cecafddf919a7d8679949deb1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 26 Jan 2022 15:53:50 +0200 Subject: math.h: Introduce data types for fractional numbers Introduce a macro to produce data types like struct TYPE_fract { __TYPE numerator; __TYPE denominator; }; to be used in the code wherever it's needed. In the following changes convert some users to it. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220126135353.24007-1-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron --- include/linux/math.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/math.h b/include/linux/math.h index 53674a327e39..439b8f0b9ebd 100644 --- a/include/linux/math.h +++ b/include/linux/math.h @@ -2,6 +2,7 @@ #ifndef _LINUX_MATH_H #define _LINUX_MATH_H +#include #include #include @@ -106,6 +107,17 @@ } \ ) +#define __STRUCT_FRACT(type) \ +struct type##_fract { \ + __##type numerator; \ + __##type denominator; \ +}; +__STRUCT_FRACT(s16) +__STRUCT_FRACT(u16) +__STRUCT_FRACT(s32) +__STRUCT_FRACT(u32) +#undef __STRUCT_FRACT + /* * Multiplies an integer by a fraction, while avoiding unnecessary * overflow or loss of precision. -- cgit v1.2.3 From a5e9b2ddbbc789f34be2333263232435d8edf57c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 26 Jan 2022 15:53:53 +0200 Subject: iio: adc: qcom-vadc-common: Re-use generic struct u32_fract Instead of custom data type re-use generic struct u32_fract. No changes intended. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220126135353.24007-4-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron --- include/linux/iio/adc/qcom-vadc-common.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/iio/adc/qcom-vadc-common.h b/include/linux/iio/adc/qcom-vadc-common.h index 33f60f43e1aa..ce78d4804994 100644 --- a/include/linux/iio/adc/qcom-vadc-common.h +++ b/include/linux/iio/adc/qcom-vadc-common.h @@ -6,6 +6,7 @@ #ifndef QCOM_VADC_COMMON_H #define QCOM_VADC_COMMON_H +#include #include #define VADC_CONV_TIME_MIN_US 2000 @@ -79,16 +80,6 @@ struct vadc_linear_graph { s32 gnd; }; -/** - * struct vadc_prescale_ratio - Represent scaling ratio for ADC input. - * @num: the inverse numerator of the gain applied to the input channel. - * @den: the inverse denominator of the gain applied to the input channel. - */ -struct vadc_prescale_ratio { - u32 num; - u32 den; -}; - /** * enum vadc_scale_fn_type - Scaling function to convert ADC code to * physical scaled units for the channel. @@ -144,12 +135,12 @@ struct adc5_data { int qcom_vadc_scale(enum vadc_scale_fn_type scaletype, const struct vadc_linear_graph *calib_graph, - const struct vadc_prescale_ratio *prescale, + const struct u32_fract *prescale, bool absolute, u16 adc_code, int *result_mdec); struct qcom_adc5_scale_type { - int (*scale_fn)(const struct vadc_prescale_ratio *prescale, + int (*scale_fn)(const struct u32_fract *prescale, const struct adc5_data *data, u16 adc_code, int *result); }; -- cgit v1.2.3 From 73c105ad2a3e142d81fc59761ce8353d0b211b8f Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Fri, 28 Jan 2022 21:32:40 +0300 Subject: phy: make phy_set_max_speed() *void* After following the call tree of phy_set_max_speed(), it became clear that this function never returns anything but 0, so we can change its result type to *void* and drop the result checks from the three drivers that actually bothered to do it... Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Signed-off-by: Sergey Shtylyov Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 6de8d7a90d78..cd08cf1a8b0d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1661,7 +1661,7 @@ int phy_disable_interrupts(struct phy_device *phydev); void phy_request_interrupt(struct phy_device *phydev); void phy_free_interrupt(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); -int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); +void phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); void phy_advertise_supported(struct phy_device *phydev); void phy_support_sym_pause(struct phy_device *phydev); -- cgit v1.2.3 From 47ed9442b2ecfcdc72889667236d6c59b6a3337e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 28 Jan 2022 16:53:47 -0700 Subject: ipv4: Make ip_idents_reserve static ip_idents_reserve is only used in net/ipv4/route.c. Make it static and remove the export. Signed-off-by: David Ahern Cc: Eric Dumazet Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index fdbab0c00fca..3984f2c39c4b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -517,7 +517,6 @@ void ip_dst_metrics_put(struct dst_entry *dst) kfree(p); } -u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, -- cgit v1.2.3 From 13e906e50a8cf6033f22c03c4d772e36a9e02c6b Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 27 Jan 2022 12:01:07 -0800 Subject: component: Replace most references to 'master' with 'aggregate device' Remove most references to 'master' in the code and replace them with some form of 'aggregate device'. This better reflects the reality of what this code does, i.e. an aggregate device that represents a device like a GPU card once some set of devices that make up the aggregate device probe and register with the component framework. Cc: Daniel Vetter Cc: Greg Kroah-Hartman Cc: Laurent Pinchart Cc: "Rafael J. Wysocki" Cc: Rob Clark Cc: Russell King Cc: Saravana Kannan Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20220127200141.1295328-2-swboyd@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/component.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/component.h b/include/linux/component.h index 16de18f473d7..7012569c6546 100644 --- a/include/linux/component.h +++ b/include/linux/component.h @@ -38,10 +38,10 @@ int component_add_typed(struct device *dev, const struct component_ops *ops, int subcomponent); void component_del(struct device *, const struct component_ops *); -int component_bind_all(struct device *master, void *master_data); -void component_unbind_all(struct device *master, void *master_data); +int component_bind_all(struct device *parent, void *data); +void component_unbind_all(struct device *parent, void *data); -struct master; +struct aggregate_device; /** * struct component_master_ops - callback for the aggregate driver @@ -89,22 +89,22 @@ struct component_match; int component_master_add_with_match(struct device *, const struct component_master_ops *, struct component_match *); -void component_match_add_release(struct device *master, +void component_match_add_release(struct device *parent, struct component_match **matchptr, void (*release)(struct device *, void *), int (*compare)(struct device *, void *), void *compare_data); -void component_match_add_typed(struct device *master, +void component_match_add_typed(struct device *parent, struct component_match **matchptr, int (*compare_typed)(struct device *, int, void *), void *compare_data); /** * component_match_add - add a component match entry - * @master: device with the aggregate driver + * @parent: device with the aggregate driver * @matchptr: pointer to the list of component matches * @compare: compare function to match against all components * @compare_data: opaque pointer passed to the @compare function * - * Adds a new component match to the list stored in @matchptr, which the @master + * Adds a new component match to the list stored in @matchptr, which the @parent * aggregate driver needs to function. The list of component matches pointed to * by @matchptr must be initialized to NULL before adding the first match. This * only matches against components added with component_add(). @@ -114,11 +114,11 @@ void component_match_add_typed(struct device *master, * * See also component_match_add_release() and component_match_add_typed(). */ -static inline void component_match_add(struct device *master, +static inline void component_match_add(struct device *parent, struct component_match **matchptr, int (*compare)(struct device *, void *), void *compare_data) { - component_match_add_release(master, matchptr, NULL, compare, + component_match_add_release(parent, matchptr, NULL, compare, compare_data); } -- cgit v1.2.3 From f6c6804c43fa18d3cee64b55490dfbd3bef1363a Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 28 Jan 2022 15:40:25 +0000 Subject: kvm: Move KVM_GET_XSAVE2 IOCTL definition at the end of kvm.h This way we can more easily find the next free IOCTL number when adding new IOCTLs. Fixes: be50b2065dfa ("kvm: x86: Add support for getting/setting expanded xstate buffer") Signed-off-by: Janosch Frank Message-Id: <20220128154025.102666-1-frankja@linux.ibm.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b46bcdb0cab1..5191b57e1562 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1624,9 +1624,6 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) -/* Available with KVM_CAP_XSAVE2 */ -#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) - struct kvm_s390_pv_sec_parm { __u64 origin; __u64 length; @@ -2048,4 +2045,7 @@ struct kvm_stats_desc { #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From e187013abeb4c2a7ec8a4bb978844c7e92a1a6ec Mon Sep 17 00:00:00 2001 From: Akhmat Karakotov Date: Mon, 31 Jan 2022 16:31:21 +0300 Subject: txhash: Make rethinking txhash behavior configurable via sysctl Add a per ns sysctl that controls the txhash rethink behavior: net.core.txrehash. When enabled, the same behavior is retained, when disabled, rethink is not performed. Sysctl is enabled by default. Signed-off-by: Akhmat Karakotov Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/core.h | 1 + include/net/sock.h | 34 +++++++++++++++++++++------------- include/uapi/linux/socket.h | 3 +++ 3 files changed, 25 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 552bc25b1933..388244e315e7 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -10,6 +10,7 @@ struct netns_core { struct ctl_table_header *sysctl_hdr; int sysctl_somaxconn; + u8 sysctl_txrehash; #ifdef CONFIG_PROC_FS struct prot_inuse __percpu *prot_inuse; diff --git a/include/net/sock.h b/include/net/sock.h index ff9b508d9c5f..0540e1b2aeb0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -587,6 +587,18 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk) __tmp | SK_USER_DATA_NOCOPY); \ }) +static inline +struct net *sock_net(const struct sock *sk) +{ + return read_pnet(&sk->sk_net); +} + +static inline +void sock_net_set(struct sock *sk, struct net *net) +{ + write_pnet(&sk->sk_net, net); +} + /* * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK * or not whether his port will be reused by someone else. SK_FORCE_REUSE @@ -2054,10 +2066,18 @@ static inline void sk_set_txhash(struct sock *sk) static inline bool sk_rethink_txhash(struct sock *sk) { - if (sk->sk_txhash) { + u8 rehash; + + if (!sk->sk_txhash) + return false; + + rehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash); + + if (rehash) { sk_set_txhash(sk); return true; } + return false; } @@ -2704,18 +2724,6 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); } -static inline -struct net *sock_net(const struct sock *sk) -{ - return read_pnet(&sk->sk_net); -} - -static inline -void sock_net_set(struct sock *sk, struct net *net) -{ - write_pnet(&sk->sk_net, net); -} - static inline bool skb_sk_is_prefetched(struct sk_buff *skb) { diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index eb0a9a5b6e71..0accd6102ece 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -31,4 +31,7 @@ struct __kernel_sockaddr_storage { #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) +#define SOCK_TXREHASH_DISABLED 0 +#define SOCK_TXREHASH_ENABLED 1 + #endif /* _UAPI_LINUX_SOCKET_H */ -- cgit v1.2.3 From 26859240e4ee701e0379f08634957adaff67e43a Mon Sep 17 00:00:00 2001 From: Akhmat Karakotov Date: Mon, 31 Jan 2022 16:31:22 +0300 Subject: txhash: Add socket option to control TX hash rethink behavior Add the SO_TXREHASH socket option to control hash rethink behavior per socket. When default mode is set, sockets disable rehash at initialization and use sysctl option when entering listen state. setsockopt() overrides default behavior. Signed-off-by: Akhmat Karakotov Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 12 +++--------- include/uapi/asm-generic/socket.h | 2 ++ include/uapi/linux/socket.h | 1 + 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 0540e1b2aeb0..d6c13f0fba40 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -316,6 +316,7 @@ struct sk_filter; * @sk_rcvtimeo: %SO_RCVTIMEO setting * @sk_sndtimeo: %SO_SNDTIMEO setting * @sk_txhash: computed flow hash for use on transmit + * @sk_txrehash: enable TX hash rethink * @sk_filter: socket filtering instructions * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received @@ -491,6 +492,7 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; + u8 sk_txrehash; #ifdef CONFIG_NET_RX_BUSY_POLL u8 sk_prefer_busy_poll; u16 sk_busy_poll_budget; @@ -2066,18 +2068,10 @@ static inline void sk_set_txhash(struct sock *sk) static inline bool sk_rethink_txhash(struct sock *sk) { - u8 rehash; - - if (!sk->sk_txhash) - return false; - - rehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash); - - if (rehash) { + if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) { sk_set_txhash(sk); return true; } - return false; } diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index c77a1313b3b0..467ca2f28760 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -128,6 +128,8 @@ #define SO_RESERVE_MEM 73 +#define SO_TXREHASH 74 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index 0accd6102ece..51d6bb2f6765 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -31,6 +31,7 @@ struct __kernel_sockaddr_storage { #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) +#define SOCK_TXREHASH_DEFAULT ((u8)-1) #define SOCK_TXREHASH_DISABLED 0 #define SOCK_TXREHASH_ENABLED 1 -- cgit v1.2.3 From 31455bbda2081af83f72bb4636348b12b82c37c1 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 25 Jan 2022 01:58:36 +0100 Subject: spi: pxa2xx_spi: Convert to use GPIO descriptors This converts the PXA2xx SPI driver to use GPIO descriptors exclusively to retrieve GPIO chip select lines. The device tree and ACPI paths of the driver already use descriptors, hence ->use_gpio_descriptors is already set and this codepath is well tested. Convert all the PXA boards providing chip select GPIOs as platform data and drop the old GPIO chipselect handling in favor of the core managing it exclusively. Cc: Marek Vasut Cc: Daniel Mack Cc: Haojian Zhuang Cc: Robert Jarzmik Cc: linux-arm-kernel@lists.infradead.org Acked-by: Jonathan Cameron Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220125005836.494807-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- include/linux/spi/pxa2xx_spi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index ca74dce36706..4658e7801b42 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -42,7 +42,6 @@ struct pxa2xx_spi_chip { u8 rx_threshold; u8 dma_burst_size; u32 timeout; - int gpio_cs; }; #if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP) -- cgit v1.2.3 From 6419abb80e82c603bbec6d7f5af6c2f79fa5c4ae Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Tue, 25 Jan 2022 13:00:09 -0800 Subject: kunit: remove va_format from kunit_assert The concern is that having a lot of redundant fields in kunit_assert can blow up stack usage if the compiler doesn't optimize them away [1]. The comment on this field implies that it was meant to be initialized when the expect/assert was declared, but this only happens when we run kunit_do_failed_assertion(). We don't need to access it outside of that function, so move it out of the struct and make it a local variable there. This change also takes the chance to reduce the number of macros by inlining the now simplified KUNIT_INIT_ASSERT_STRUCT() macro. [1] https://groups.google.com/g/kunit-dev/c/i3fZXgvBrfA/m/VULQg1z6BAAJ Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/assert.h | 43 +++++++++++++------------------------------ 1 file changed, 13 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/kunit/assert.h b/include/kunit/assert.h index f2b3ae5cc2de..0b3704db54b6 100644 --- a/include/kunit/assert.h +++ b/include/kunit/assert.h @@ -42,44 +42,21 @@ struct kunit_loc { /** * struct kunit_assert - Data for printing a failed assertion or expectation. - * @message: an optional message to provide additional context. * @format: a function which formats the data in this kunit_assert to a string. * * Represents a failed expectation/assertion. Contains all the data necessary to * format a string to a user reporting the failure. */ struct kunit_assert { - struct va_format message; void (*format)(const struct kunit_assert *assert, + const struct va_format *message, struct string_stream *stream); }; -/** - * KUNIT_INIT_VA_FMT_NULL - Default initializer for struct va_format. - * - * Used inside a struct initialization block to initialize struct va_format to - * default values where fmt and va are null. - */ -#define KUNIT_INIT_VA_FMT_NULL { .fmt = NULL, .va = NULL } - -/** - * KUNIT_INIT_ASSERT_STRUCT() - Initializer for a &struct kunit_assert. - * @fmt: The formatting function which builds a string out of this kunit_assert. - * - * The base initializer for a &struct kunit_assert. - */ -#define KUNIT_INIT_ASSERT_STRUCT(fmt) { \ - .message = KUNIT_INIT_VA_FMT_NULL, \ - .format = fmt \ -} - void kunit_assert_prologue(const struct kunit_loc *loc, enum kunit_assert_type type, struct string_stream *stream); -void kunit_assert_print_msg(const struct kunit_assert *assert, - struct string_stream *stream); - /** * struct kunit_fail_assert - Represents a plain fail expectation/assertion. * @assert: The parent of this type. @@ -91,6 +68,7 @@ struct kunit_fail_assert { }; void kunit_fail_assert_format(const struct kunit_assert *assert, + const struct va_format *message, struct string_stream *stream); /** @@ -100,7 +78,7 @@ void kunit_fail_assert_format(const struct kunit_assert *assert, * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ #define KUNIT_INIT_FAIL_ASSERT_STRUCT { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(kunit_fail_assert_format) \ + .assert = { .format = kunit_fail_assert_format }, \ } /** @@ -120,6 +98,7 @@ struct kunit_unary_assert { }; void kunit_unary_assert_format(const struct kunit_assert *assert, + const struct va_format *message, struct string_stream *stream); /** @@ -131,7 +110,7 @@ void kunit_unary_assert_format(const struct kunit_assert *assert, * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ #define KUNIT_INIT_UNARY_ASSERT_STRUCT(cond, expect_true) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(kunit_unary_assert_format), \ + .assert = { .format = kunit_unary_assert_format }, \ .condition = cond, \ .expected_true = expect_true \ } @@ -153,6 +132,7 @@ struct kunit_ptr_not_err_assert { }; void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert, + const struct va_format *message, struct string_stream *stream); /** @@ -165,7 +145,7 @@ void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert, * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ #define KUNIT_INIT_PTR_NOT_ERR_STRUCT(txt, val) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(kunit_ptr_not_err_assert_format), \ + .assert = { .format = kunit_ptr_not_err_assert_format }, \ .text = txt, \ .value = val \ } @@ -194,6 +174,7 @@ struct kunit_binary_assert { }; void kunit_binary_assert_format(const struct kunit_assert *assert, + const struct va_format *message, struct string_stream *stream); /** @@ -213,7 +194,7 @@ void kunit_binary_assert_format(const struct kunit_assert *assert, left_val, \ right_str, \ right_val) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(kunit_binary_assert_format), \ + .assert = { .format = kunit_binary_assert_format }, \ .operation = op_str, \ .left_text = left_str, \ .left_value = left_val, \ @@ -245,6 +226,7 @@ struct kunit_binary_ptr_assert { }; void kunit_binary_ptr_assert_format(const struct kunit_assert *assert, + const struct va_format *message, struct string_stream *stream); /** @@ -265,7 +247,7 @@ void kunit_binary_ptr_assert_format(const struct kunit_assert *assert, left_val, \ right_str, \ right_val) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(kunit_binary_ptr_assert_format), \ + .assert = { .format = kunit_binary_ptr_assert_format }, \ .operation = op_str, \ .left_text = left_str, \ .left_value = left_val, \ @@ -297,6 +279,7 @@ struct kunit_binary_str_assert { }; void kunit_binary_str_assert_format(const struct kunit_assert *assert, + const struct va_format *message, struct string_stream *stream); /** @@ -316,7 +299,7 @@ void kunit_binary_str_assert_format(const struct kunit_assert *assert, left_val, \ right_str, \ right_val) { \ - .assert = KUNIT_INIT_ASSERT_STRUCT(kunit_binary_str_assert_format), \ + .assert = { .format = kunit_binary_str_assert_format }, \ .operation = op_str, \ .left_text = left_str, \ .left_value = left_val, \ -- cgit v1.2.3 From 064ff292aca500d6b911dca6abe1ece22620d475 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Tue, 25 Jan 2022 13:00:10 -0800 Subject: kunit: consolidate KUNIT_INIT_BINARY_ASSERT_STRUCT macros We currently have 2 other versions of KUNIT_INIT_BINARY_ASSERT_STRUCT. The only differences are that * the format funcition they pass is different * the types of left_val/right_val should be different (integral, pointer, string). The latter doesn't actually matter since these macros are just plumbing them along to KUNIT_ASSERTION where they will get type checked. So combine them all into a single KUNIT_INIT_BINARY_ASSERT_STRUCT that now also takes the format function as a parameter. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/assert.h | 68 ++++++++------------------------------------------ include/kunit/test.h | 20 ++++++++------- 2 files changed, 22 insertions(+), 66 deletions(-) (limited to 'include') diff --git a/include/kunit/assert.h b/include/kunit/assert.h index 0b3704db54b6..649bfac9f406 100644 --- a/include/kunit/assert.h +++ b/include/kunit/assert.h @@ -178,23 +178,28 @@ void kunit_binary_assert_format(const struct kunit_assert *assert, struct string_stream *stream); /** - * KUNIT_INIT_BINARY_ASSERT_STRUCT() - Initializes a - * &struct kunit_binary_assert. + * KUNIT_INIT_BINARY_ASSERT_STRUCT() - Initializes a binary assert like + * kunit_binary_assert, kunit_binary_ptr_assert, etc. + * + * @format_func: a function which formats the assert to a string. * @op_str: A string representation of the comparison operator (e.g. "=="). * @left_str: A string representation of the expression in the left slot. * @left_val: The actual evaluated value of the expression in the left slot. * @right_str: A string representation of the expression in the right slot. * @right_val: The actual evaluated value of the expression in the right slot. * - * Initializes a &struct kunit_binary_assert. Intended to be used in - * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. + * Initializes a binary assert like kunit_binary_assert, + * kunit_binary_ptr_assert, etc. This relies on these structs having the same + * fields but with different types for left_val/right_val. + * This is ultimately used by binary assertion macros like KUNIT_EXPECT_EQ, etc. */ -#define KUNIT_INIT_BINARY_ASSERT_STRUCT(op_str, \ +#define KUNIT_INIT_BINARY_ASSERT_STRUCT(format_func, \ + op_str, \ left_str, \ left_val, \ right_str, \ right_val) { \ - .assert = { .format = kunit_binary_assert_format }, \ + .assert = { .format = format_func }, \ .operation = op_str, \ .left_text = left_str, \ .left_value = left_val, \ @@ -229,32 +234,6 @@ void kunit_binary_ptr_assert_format(const struct kunit_assert *assert, const struct va_format *message, struct string_stream *stream); -/** - * KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT() - Initializes a - * &struct kunit_binary_ptr_assert. - * @type: The type (assertion or expectation) of this kunit_assert. - * @op_str: A string representation of the comparison operator (e.g. "=="). - * @left_str: A string representation of the expression in the left slot. - * @left_val: The actual evaluated value of the expression in the left slot. - * @right_str: A string representation of the expression in the right slot. - * @right_val: The actual evaluated value of the expression in the right slot. - * - * Initializes a &struct kunit_binary_ptr_assert. Intended to be used in - * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. - */ -#define KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT(op_str, \ - left_str, \ - left_val, \ - right_str, \ - right_val) { \ - .assert = { .format = kunit_binary_ptr_assert_format }, \ - .operation = op_str, \ - .left_text = left_str, \ - .left_value = left_val, \ - .right_text = right_str, \ - .right_value = right_val \ -} - /** * struct kunit_binary_str_assert - An expectation/assertion that compares two * string values (for example, KUNIT_EXPECT_STREQ(test, foo, "bar")). @@ -282,29 +261,4 @@ void kunit_binary_str_assert_format(const struct kunit_assert *assert, const struct va_format *message, struct string_stream *stream); -/** - * KUNIT_INIT_BINARY_STR_ASSERT_STRUCT() - Initializes a - * &struct kunit_binary_str_assert. - * @op_str: A string representation of the comparison operator (e.g. "=="). - * @left_str: A string representation of the expression in the left slot. - * @left_val: The actual evaluated value of the expression in the left slot. - * @right_str: A string representation of the expression in the right slot. - * @right_val: The actual evaluated value of the expression in the right slot. - * - * Initializes a &struct kunit_binary_str_assert. Intended to be used in - * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. - */ -#define KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(op_str, \ - left_str, \ - left_val, \ - right_str, \ - right_val) { \ - .assert = { .format = kunit_binary_str_assert_format }, \ - .operation = op_str, \ - .left_text = left_str, \ - .left_value = left_val, \ - .right_text = right_str, \ - .right_value = right_val \ -} - #endif /* _KUNIT_ASSERT_H */ diff --git a/include/kunit/test.h b/include/kunit/test.h index bf82c313223b..a93dfb8ff393 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -864,7 +864,7 @@ void kunit_do_failed_assertion(struct kunit *test, */ #define KUNIT_BASE_BINARY_ASSERTION(test, \ assert_class, \ - ASSERT_CLASS_INIT, \ + format_func, \ assert_type, \ left, \ op, \ @@ -879,11 +879,12 @@ do { \ assert_type, \ __left op __right, \ assert_class, \ - ASSERT_CLASS_INIT(#op, \ - #left, \ - __left, \ - #right, \ - __right), \ + KUNIT_INIT_BINARY_ASSERT_STRUCT(format_func, \ + #op, \ + #left, \ + __left, \ + #right, \ + __right), \ fmt, \ ##__VA_ARGS__); \ } while (0) @@ -897,7 +898,7 @@ do { \ ...) \ KUNIT_BASE_BINARY_ASSERTION(test, \ kunit_binary_assert, \ - KUNIT_INIT_BINARY_ASSERT_STRUCT, \ + kunit_binary_assert_format, \ assert_type, \ left, op, right, \ fmt, \ @@ -912,7 +913,7 @@ do { \ ...) \ KUNIT_BASE_BINARY_ASSERTION(test, \ kunit_binary_ptr_assert, \ - KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \ + kunit_binary_ptr_assert_format, \ assert_type, \ left, op, right, \ fmt, \ @@ -933,7 +934,8 @@ do { \ assert_type, \ strcmp(__left, __right) op 0, \ kunit_binary_str_assert, \ - KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(#op, \ + KUNIT_INIT_BINARY_ASSERT_STRUCT(kunit_binary_str_assert_format,\ + #op, \ #left, \ __left, \ #right, \ -- cgit v1.2.3 From 2b6861e2372bac68861c54372f68f6016a7484fc Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Tue, 25 Jan 2022 13:00:11 -0800 Subject: kunit: factor out str constants from binary assertion structs If the compiler doesn't optimize them away, each kunit assertion (use of KUNIT_EXPECT_EQ, etc.) can use 88 bytes of stack space in the worst and most common case. This has led to compiler warnings and a suggestion from Linus to move data from the structs into static const's where possible [1]. This builds upon [2] which did so for the base struct kunit_assert type. That only reduced sizeof(struct kunit_binary_assert) from 88 to 64. Given these are by far the most commonly used asserts, this patch factors out the textual representations of the operands and comparator into another static const, saving 16 more bytes. In detail, KUNIT_EXPECT_EQ(test, 2 + 2, 5) yields the following struct (struct kunit_binary_assert) { .assert = , .operation = "==", .left_text = "2 + 2", .left_value = 4, .right_text = "5", .right_value = 5, } After this change static const struct kunit_binary_assert_text __text = { .operation = "==", .left_text = "2 + 2", .right_text = "5", }; (struct kunit_binary_assert) { .assert = , .text = &__text, .left_value = 4, .right_value = 5, } This also DRYs the code a bit more since these str fields were repeated for the string and pointer versions of kunit_binary_assert. Note: we could name the kunit_binary_assert_text fields left/right instead of left_text/right_text. But that would require changing the macros a bit since they have args called "left" and "right" which would be substituted in `.left = #left` as `.2 + 2 = \"2 + 2\"`. [1] https://groups.google.com/g/kunit-dev/c/i3fZXgvBrfA/m/VULQg1z6BAAJ [2] https://lore.kernel.org/linux-kselftest/20220113165931.451305-6-dlatypov@google.com/ Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/assert.h | 49 ++++++++++++++++++++++--------------------------- include/kunit/test.h | 20 +++++++++++++------- 2 files changed, 35 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/kunit/assert.h b/include/kunit/assert.h index 649bfac9f406..4b52e12c2ae8 100644 --- a/include/kunit/assert.h +++ b/include/kunit/assert.h @@ -150,14 +150,25 @@ void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert, .value = val \ } +/** + * struct kunit_binary_assert_text - holds strings for &struct + * kunit_binary_assert and friends to try and make the structs smaller. + * @operation: A string representation of the comparison operator (e.g. "=="). + * @left_text: A string representation of the left expression (e.g. "2+2"). + * @right_text: A string representation of the right expression (e.g. "2+2"). + */ +struct kunit_binary_assert_text { + const char *operation; + const char *left_text; + const char *right_text; +}; + /** * struct kunit_binary_assert - An expectation/assertion that compares two * non-pointer values (for example, KUNIT_EXPECT_EQ(test, 1 + 1, 2)). * @assert: The parent of this type. - * @operation: A string representation of the comparison operator (e.g. "=="). - * @left_text: A string representation of the expression in the left slot. + * @text: Holds the textual representations of the operands and op (e.g. "=="). * @left_value: The actual evaluated value of the expression in the left slot. - * @right_text: A string representation of the expression in the right slot. * @right_value: The actual evaluated value of the expression in the right slot. * * Represents an expectation/assertion that compares two non-pointer values. For @@ -166,10 +177,8 @@ void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert, */ struct kunit_binary_assert { struct kunit_assert assert; - const char *operation; - const char *left_text; + const struct kunit_binary_assert_text *text; long long left_value; - const char *right_text; long long right_value; }; @@ -182,10 +191,8 @@ void kunit_binary_assert_format(const struct kunit_assert *assert, * kunit_binary_assert, kunit_binary_ptr_assert, etc. * * @format_func: a function which formats the assert to a string. - * @op_str: A string representation of the comparison operator (e.g. "=="). - * @left_str: A string representation of the expression in the left slot. + * @text_: Pointer to a kunit_binary_assert_text. * @left_val: The actual evaluated value of the expression in the left slot. - * @right_str: A string representation of the expression in the right slot. * @right_val: The actual evaluated value of the expression in the right slot. * * Initializes a binary assert like kunit_binary_assert, @@ -194,16 +201,12 @@ void kunit_binary_assert_format(const struct kunit_assert *assert, * This is ultimately used by binary assertion macros like KUNIT_EXPECT_EQ, etc. */ #define KUNIT_INIT_BINARY_ASSERT_STRUCT(format_func, \ - op_str, \ - left_str, \ + text_, \ left_val, \ - right_str, \ right_val) { \ .assert = { .format = format_func }, \ - .operation = op_str, \ - .left_text = left_str, \ + .text = text_, \ .left_value = left_val, \ - .right_text = right_str, \ .right_value = right_val \ } @@ -211,10 +214,8 @@ void kunit_binary_assert_format(const struct kunit_assert *assert, * struct kunit_binary_ptr_assert - An expectation/assertion that compares two * pointer values (for example, KUNIT_EXPECT_PTR_EQ(test, foo, bar)). * @assert: The parent of this type. - * @operation: A string representation of the comparison operator (e.g. "=="). - * @left_text: A string representation of the expression in the left slot. + * @text: Holds the textual representations of the operands and op (e.g. "=="). * @left_value: The actual evaluated value of the expression in the left slot. - * @right_text: A string representation of the expression in the right slot. * @right_value: The actual evaluated value of the expression in the right slot. * * Represents an expectation/assertion that compares two pointer values. For @@ -223,10 +224,8 @@ void kunit_binary_assert_format(const struct kunit_assert *assert, */ struct kunit_binary_ptr_assert { struct kunit_assert assert; - const char *operation; - const char *left_text; + const struct kunit_binary_assert_text *text; const void *left_value; - const char *right_text; const void *right_value; }; @@ -238,10 +237,8 @@ void kunit_binary_ptr_assert_format(const struct kunit_assert *assert, * struct kunit_binary_str_assert - An expectation/assertion that compares two * string values (for example, KUNIT_EXPECT_STREQ(test, foo, "bar")). * @assert: The parent of this type. - * @operation: A string representation of the comparison operator (e.g. "=="). - * @left_text: A string representation of the expression in the left slot. + * @text: Holds the textual representations of the operands and comparator. * @left_value: The actual evaluated value of the expression in the left slot. - * @right_text: A string representation of the expression in the right slot. * @right_value: The actual evaluated value of the expression in the right slot. * * Represents an expectation/assertion that compares two string values. For @@ -250,10 +247,8 @@ void kunit_binary_ptr_assert_format(const struct kunit_assert *assert, */ struct kunit_binary_str_assert { struct kunit_assert assert; - const char *operation; - const char *left_text; + const struct kunit_binary_assert_text *text; const char *left_value; - const char *right_text; const char *right_value; }; diff --git a/include/kunit/test.h b/include/kunit/test.h index a93dfb8ff393..088ff394ae94 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -874,16 +874,19 @@ void kunit_do_failed_assertion(struct kunit *test, do { \ typeof(left) __left = (left); \ typeof(right) __right = (right); \ + static const struct kunit_binary_assert_text __text = { \ + .operation = #op, \ + .left_text = #left, \ + .right_text = #right, \ + }; \ \ KUNIT_ASSERTION(test, \ assert_type, \ __left op __right, \ assert_class, \ KUNIT_INIT_BINARY_ASSERT_STRUCT(format_func, \ - #op, \ - #left, \ + &__text, \ __left, \ - #right, \ __right), \ fmt, \ ##__VA_ARGS__); \ @@ -928,17 +931,20 @@ do { \ ...) \ do { \ const char *__left = (left); \ - const char *__right = (right); \ + const char *__right = (right); \ + static const struct kunit_binary_assert_text __text = { \ + .operation = #op, \ + .left_text = #left, \ + .right_text = #right, \ + }; \ \ KUNIT_ASSERTION(test, \ assert_type, \ strcmp(__left, __right) op 0, \ kunit_binary_str_assert, \ KUNIT_INIT_BINARY_ASSERT_STRUCT(kunit_binary_str_assert_format,\ - #op, \ - #left, \ + &__text, \ __left, \ - #right, \ __right), \ fmt, \ ##__VA_ARGS__); \ -- cgit v1.2.3 From c2741453478badf571ef020d160053e8d5e1ba94 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Thu, 27 Jan 2022 13:52:22 -0800 Subject: kunit: cleanup assertion macro internal variables All the operands should be tagged `const`. We're only assigning them to variables so that we can compare them (e.g. check if left == right, etc.) and avoid evaluating expressions multiple times. There's no need for them to be mutable. Also rename the helper variable `loc` to `__loc` like we do with `__assertion` and `__strs` to avoid potential name collisions with user code. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/test.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index 088ff394ae94..00b9ff7783ab 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -779,10 +779,10 @@ void kunit_do_failed_assertion(struct kunit *test, #define KUNIT_ASSERTION(test, assert_type, pass, assert_class, INITIALIZER, fmt, ...) do { \ if (unlikely(!(pass))) { \ - static const struct kunit_loc loc = KUNIT_CURRENT_LOC; \ + static const struct kunit_loc __loc = KUNIT_CURRENT_LOC; \ struct assert_class __assertion = INITIALIZER; \ kunit_do_failed_assertion(test, \ - &loc, \ + &__loc, \ assert_type, \ &__assertion.assert, \ fmt, \ @@ -872,8 +872,8 @@ void kunit_do_failed_assertion(struct kunit *test, fmt, \ ...) \ do { \ - typeof(left) __left = (left); \ - typeof(right) __right = (right); \ + const typeof(left) __left = (left); \ + const typeof(right) __right = (right); \ static const struct kunit_binary_assert_text __text = { \ .operation = #op, \ .left_text = #left, \ @@ -956,7 +956,7 @@ do { \ fmt, \ ...) \ do { \ - typeof(ptr) __ptr = (ptr); \ + const typeof(ptr) __ptr = (ptr); \ \ KUNIT_ASSERTION(test, \ assert_type, \ -- cgit v1.2.3 From 4421a582718ab81608d8486734c18083b822390d Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sun, 30 Jan 2022 12:55:17 +0100 Subject: bpf: Make dst_port field in struct bpf_sock 16-bit wide Menglong Dong reports that the documentation for the dst_port field in struct bpf_sock is inaccurate and confusing. From the BPF program PoV, the field is a zero-padded 16-bit integer in network byte order. The value appears to the BPF user as if laid out in memory as so: offsetof(struct bpf_sock, dst_port) + 0 + 8 +16 0x00 +24 0x00 32-, 16-, and 8-bit wide loads from the field are all allowed, but only if the offset into the field is 0. 32-bit wide loads from dst_port are especially confusing. The loaded value, after converting to host byte order with bpf_ntohl(dst_port), contains the port number in the upper 16-bits. Remove the confusion by splitting the field into two 16-bit fields. For backward compatibility, allow 32-bit wide loads from offsetof(struct bpf_sock, dst_port). While at it, allow loads 8-bit loads at offset [0] and [1] from dst_port. Reported-by: Menglong Dong Signed-off-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20220130115518.213259-2-jakub@cloudflare.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4a2f7041ebae..a7f0ddedac1f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5574,7 +5574,8 @@ struct bpf_sock { __u32 src_ip4; __u32 src_ip6[4]; __u32 src_port; /* host byte order */ - __u32 dst_port; /* network byte order */ + __be16 dst_port; /* network byte order */ + __u16 :16; /* zero padding */ __u32 dst_ip4; __u32 dst_ip6[4]; __u32 state; -- cgit v1.2.3 From 943515090ec67f81f6f93febfddb8c9118357e97 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 8 Dec 2021 09:34:22 +0100 Subject: firmware: qcom: scm: Add function to set the maximum IOMMU pool size This is not necessary for basic functionality of the IOMMU, but it's an optimization that tells to the TZ what's the maximum mappable size for the secure IOMMUs, so that it can optimize the data structures in the TZ itself. Signed-off-by: AngeloGioacchino Del Regno [Marijn: ported from 5.3 to the unified architecture in 5.11] Signed-off-by: Marijn Suijten Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211208083423.22037-3-marijn.suijten@somainline.org --- include/linux/qcom_scm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 81cad9e1e412..8a065f8660c1 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -83,6 +83,7 @@ extern bool qcom_scm_restore_sec_cfg_available(void); extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); +extern int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); extern int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, u32 cp_nonpixel_start, u32 cp_nonpixel_size); -- cgit v1.2.3 From 071a13332de894cb3c38b17c82350f1e4167c023 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 8 Dec 2021 09:34:23 +0100 Subject: firmware: qcom: scm: Add function to set IOMMU pagetable addressing Add a function to change the IOMMU pagetable addressing to AArch32 LPAE or AArch64. If doing that, then this must be done for each IOMMU context (not necessarily at the same time). Signed-off-by: AngeloGioacchino Del Regno [Marijn: ported from 5.3 to the unified architecture in 5.11] Signed-off-by: Marijn Suijten Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211208083423.22037-4-marijn.suijten@somainline.org --- include/linux/qcom_scm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 8a065f8660c1..ca4a88d7cbdc 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -108,6 +108,7 @@ extern bool qcom_scm_hdcp_available(void); extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp); +extern int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, -- cgit v1.2.3 From 6d3ac94bae21de6b6a1d81596752428960012816 Mon Sep 17 00:00:00 2001 From: Yang Guang Date: Fri, 14 Jan 2022 08:11:02 +0800 Subject: ssb: fix boolreturn.cocci warning The coccinelle report ./include/linux/ssb/ssb_driver_gige.h:98:8-9: WARNING: return of 0/1 in function 'ssb_gige_must_flush_posted_writes' with return type bool Return statements in functions returning bool should use true/false instead of 1/0. Reported-by: Zeal Robot Signed-off-by: Yang Guang Signed-off-by: David Yang Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/fa4f1fa737e715eb62a85229ac5f12bae21145cf.1642065490.git.davidcomponentone@gmail.com --- include/linux/ssb/ssb_driver_gige.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h index 15ba0df1ee0d..28c145a51e57 100644 --- a/include/linux/ssb/ssb_driver_gige.h +++ b/include/linux/ssb/ssb_driver_gige.h @@ -95,7 +95,7 @@ static inline bool ssb_gige_must_flush_posted_writes(struct pci_dev *pdev) struct ssb_gige *dev = pdev_to_ssb_gige(pdev); if (dev) return (dev->dev->bus->chip_id == 0x4785); - return 0; + return false; } /* Get the device MAC address */ -- cgit v1.2.3 From ef9989afda73332df566852d6e9ca695c05f10ce Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 1 Feb 2022 13:29:22 +0000 Subject: kvm: add guest_state_{enter,exit}_irqoff() When transitioning to/from guest mode, it is necessary to inform lockdep, tracing, and RCU in a specific order, similar to the requirements for transitions to/from user mode. Additionally, it is necessary to perform vtime accounting for a window around running the guest, with RCU enabled, such that timer interrupts taken from the guest can be accounted as guest time. Most architectures don't handle all the necessary pieces, and a have a number of common bugs, including unsafe usage of RCU during the window between guest_enter() and guest_exit(). On x86, this was dealt with across commits: 87fa7f3e98a1310e ("x86/kvm: Move context tracking where it belongs") 0642391e2139a2c1 ("x86/kvm/vmx: Add hardirq tracing to guest enter/exit") 9fc975e9efd03e57 ("x86/kvm/svm: Add hardirq tracing on guest enter/exit") 3ebccdf373c21d86 ("x86/kvm/vmx: Move guest enter/exit into .noinstr.text") 135961e0a7d555fc ("x86/kvm/svm: Move guest enter/exit into .noinstr.text") 160457140187c5fb ("KVM: x86: Defer vtime accounting 'til after IRQ handling") bc908e091b326467 ("KVM: x86: Consolidate guest enter/exit logic to common helpers") ... but those fixes are specific to x86, and as the resulting logic (while correct) is split across generic helper functions and x86-specific helper functions, it is difficult to see that the entry/exit accounting is balanced. This patch adds generic helpers which architectures can use to handle guest entry/exit consistently and correctly. The guest_{enter,exit}() helpers are split into guest_timing_{enter,exit}() to perform vtime accounting, and guest_context_{enter,exit}() to perform the necessary context tracking and RCU management. The existing guest_{enter,exit}() heleprs are left as wrappers of these. Atop this, new guest_state_enter_irqoff() and guest_state_exit_irqoff() helpers are added to handle the ordering of lockdep, tracing, and RCU manageent. These are inteneded to mirror exit_to_user_mode() and enter_from_user_mode(). Subsequent patches will migrate architectures over to the new helpers, following a sequence: guest_timing_enter_irqoff(); guest_state_enter_irqoff(); < run the vcpu > guest_state_exit_irqoff(); < take any pending IRQs > guest_timing_exit_irqoff(); This sequences handles all of the above correctly, and more clearly balances the entry and exit portions, making it easier to understand. The existing helpers are marked as deprecated, and will be removed once all architectures have been converted. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Marc Zyngier Reviewed-by: Paolo Bonzini Reviewed-by: Nicolas Saenz Julienne Message-Id: <20220201132926.3301912-2-mark.rutland@arm.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 112 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 109 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f079820f52b5..b3810976a27f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -29,7 +29,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -368,8 +370,11 @@ struct kvm_vcpu { u64 last_used_slot_gen; }; -/* must be called with irqs disabled */ -static __always_inline void guest_enter_irqoff(void) +/* + * Start accounting time towards a guest. + * Must be called before entering guest context. + */ +static __always_inline void guest_timing_enter_irqoff(void) { /* * This is running in ioctl context so its safe to assume that it's the @@ -378,7 +383,18 @@ static __always_inline void guest_enter_irqoff(void) instrumentation_begin(); vtime_account_guest_enter(); instrumentation_end(); +} +/* + * Enter guest context and enter an RCU extended quiescent state. + * + * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is + * unsafe to use any code which may directly or indirectly use RCU, tracing + * (including IRQ flag tracing), or lockdep. All code in this period must be + * non-instrumentable. + */ +static __always_inline void guest_context_enter_irqoff(void) +{ /* * KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode @@ -394,16 +410,79 @@ static __always_inline void guest_enter_irqoff(void) } } -static __always_inline void guest_exit_irqoff(void) +/* + * Deprecated. Architectures should move to guest_timing_enter_irqoff() and + * guest_state_enter_irqoff(). + */ +static __always_inline void guest_enter_irqoff(void) +{ + guest_timing_enter_irqoff(); + guest_context_enter_irqoff(); +} + +/** + * guest_state_enter_irqoff - Fixup state when entering a guest + * + * Entry to a guest will enable interrupts, but the kernel state is interrupts + * disabled when this is invoked. Also tell RCU about it. + * + * 1) Trace interrupts on state + * 2) Invoke context tracking if enabled to adjust RCU state + * 3) Tell lockdep that interrupts are enabled + * + * Invoked from architecture specific code before entering a guest. + * Must be called with interrupts disabled and the caller must be + * non-instrumentable. + * The caller has to invoke guest_timing_enter_irqoff() before this. + * + * Note: this is analogous to exit_to_user_mode(). + */ +static __always_inline void guest_state_enter_irqoff(void) +{ + instrumentation_begin(); + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + instrumentation_end(); + + guest_context_enter_irqoff(); + lockdep_hardirqs_on(CALLER_ADDR0); +} + +/* + * Exit guest context and exit an RCU extended quiescent state. + * + * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is + * unsafe to use any code which may directly or indirectly use RCU, tracing + * (including IRQ flag tracing), or lockdep. All code in this period must be + * non-instrumentable. + */ +static __always_inline void guest_context_exit_irqoff(void) { context_tracking_guest_exit(); +} +/* + * Stop accounting time towards a guest. + * Must be called after exiting guest context. + */ +static __always_inline void guest_timing_exit_irqoff(void) +{ instrumentation_begin(); /* Flush the guest cputime we spent on the guest */ vtime_account_guest_exit(); instrumentation_end(); } +/* + * Deprecated. Architectures should move to guest_state_exit_irqoff() and + * guest_timing_exit_irqoff(). + */ +static __always_inline void guest_exit_irqoff(void) +{ + guest_context_exit_irqoff(); + guest_timing_exit_irqoff(); +} + static inline void guest_exit(void) { unsigned long flags; @@ -413,6 +492,33 @@ static inline void guest_exit(void) local_irq_restore(flags); } +/** + * guest_state_exit_irqoff - Establish state when returning from guest mode + * + * Entry from a guest disables interrupts, but guest mode is traced as + * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. + * + * 1) Tell lockdep that interrupts are disabled + * 2) Invoke context tracking if enabled to reactivate RCU + * 3) Trace interrupts off state + * + * Invoked from architecture specific code after exiting a guest. + * Must be invoked with interrupts disabled and the caller must be + * non-instrumentable. + * The caller has to invoke guest_timing_exit_irqoff() after this. + * + * Note: this is analogous to enter_from_user_mode(). + */ +static __always_inline void guest_state_exit_irqoff(void) +{ + lockdep_hardirqs_off(CALLER_ADDR0); + guest_context_exit_irqoff(); + + instrumentation_begin(); + trace_hardirqs_off_finish(); + instrumentation_end(); +} + static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) { /* -- cgit v1.2.3 From 2220af8ca61ae67de4ec3deec1c6395a2f65b9fd Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 1 Feb 2022 14:06:47 +0100 Subject: power: supply: core: Refactor power_supply_set_input_current_limit_from_supplier() Some (USB) charger ICs have variants with USB D+ and D- pins to do their own builtin charger-type detection, like e.g. the bq24190 and bq25890 and also variants which lack this functionality, e.g. the bq24192 and bq25892. In case the charger-type; and thus the input-current-limit detection is done outside the charger IC then we need some way to communicate this to the charger IC. In the past extcon was used for this, but if the external detection does e.g. full USB PD negotiation then the extcon cable-types do not convey enough information. For these setups it was decided to model the external charging "brick" and the parameters negotiated with it as a power_supply class-device itself; and power_supply_set_input_current_limit_from_supplier() was introduced to allow drivers to get the input-current-limit this way. But in some cases psy drivers may want to know other properties, e.g. the bq25892 can do "quick-charge" negotiation by pulsing its current draw, but this should only be done if the usb_type psy-property of its supplier is set to DCP (and device-properties indicate the board allows higher voltages). Instead of adding extra helper functions for each property which a psy-driver wants to query from its supplier, refactor power_supply_set_input_current_limit_from_supplier() into a more generic power_supply_get_property_from_supplier() function. Reviewed-by: Andy Shevchenko Signed-off-by: Hans de Goede Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index e218041cc000..006111917d1a 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -597,8 +597,9 @@ power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *table int table_len, int temp); extern void power_supply_changed(struct power_supply *psy); extern int power_supply_am_i_supplied(struct power_supply *psy); -extern int power_supply_set_input_current_limit_from_supplier( - struct power_supply *psy); +int power_supply_get_property_from_supplier(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val); extern int power_supply_set_battery_charged(struct power_supply *psy); #ifdef CONFIG_POWER_SUPPLY -- cgit v1.2.3 From 79d35365a5858466ff7b37aaf1fcf11b683b9442 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 1 Feb 2022 14:06:56 +0100 Subject: power: supply: bq25890: Add support for registering the Vbus boost converter as a regulator The bq25890_charger code supports enabling/disabling the boost converter based on usb-phy notifications. But the usb-phy framework is not used on all boards/platforms. At support for registering the Vbus boost converter as a standard regulator when there is no usb-phy on the board. Also add support for providing regulator_init_data through platform_data for use on boards where device-tree is not used and the platform code must thus provide the regulator_init_data. Reviewed-by: Andy Shevchenko Signed-off-by: Hans de Goede Signed-off-by: Sebastian Reichel --- include/linux/power/bq25890_charger.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/linux/power/bq25890_charger.h (limited to 'include') diff --git a/include/linux/power/bq25890_charger.h b/include/linux/power/bq25890_charger.h new file mode 100644 index 000000000000..c706ddb77a08 --- /dev/null +++ b/include/linux/power/bq25890_charger.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Platform data for the TI bq25890 battery charger driver. + */ + +#ifndef _BQ25890_CHARGER_H_ +#define _BQ25890_CHARGER_H_ + +struct regulator_init_data; + +struct bq25890_platform_data { + const struct regulator_init_data *regulator_init_data; +}; + +#endif -- cgit v1.2.3 From 3afcbe09470091ca8a8048ef7c96701839a70961 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 1 Feb 2022 14:07:00 +0100 Subject: mfd: intel_soc_pmic_chtwc: Add cht_wc_model data to struct intel_soc_pmic Tablet / laptop designs using an Intel Cherry Trail x86 main SoC with an Intel Whiskey Cove PMIC do not use a single standard setup for the charger, fuel-gauge and other chips surrounding the PMIC / charging+data USB port. Unlike what is normal on x86 this diversity in designs is not handled by the ACPI tables. On 2 of the 3 known designs there are no standard (PNP0C0A) ACPI battery devices and on the 3th design the ACPI battery device does not work under Linux due to it requiring non-standard and undocumented ACPI behavior. So to make things work under Linux we use native charger and fuel-gauge drivers on these devices, re-using the native drivers used on ARM boards with the same charger / fuel-gauge ICs. This requires various MFD-cell drivers for the CHT-WC PMIC cells to know which model they are exactly running on so that they can e.g. instantiate an I2C-client for the right model charger-IC (the charger is connected to an I2C-controller which is part of the PMIC). Rather then duplicating DMI-id matching to check which model we are running on in each MFD-cell driver, add a check for this to the shared drivers/mfd/intel_soc_pmic_chtwc.c code by using a DMI table for all 3 known models: 1. The GPD Win and GPD Pocket mini-laptops, these are really 2 models but the Pocket re-uses the GPD Win's design in a different housing: The WC PMIC is connected to a TI BQ24292i charger, paired with a Maxim MAX17047 fuelgauge + a FUSB302 USB Type-C Controller + a PI3USB30532 USB switch, for a fully functional Type-C port. 2. The Xiaomi Mi Pad 2: The WC PMIC is connected to a TI BQ25890 charger, paired with a TI BQ27520 fuelgauge, using the TI BQ25890 for BC1.2 charger type detection, for a USB-2 only Type-C port without PD. 3. The Lenovo Yoga Book YB1-X90 / Lenovo Yoga Book YB1-X91 series: The WC PMIC is connected to a TI BQ25892 charger, paired with a TI BQ27542 fuelgauge, using the WC PMIC for BC1.2 charger type detection and using the BQ25892's Mediatek Pump Express+ (1.0) support to enable charging with up to 12V through a micro-USB port. Reviewed-by: Andy Shevchenko Acked-by: Lee Jones Signed-off-by: Hans de Goede Signed-off-by: Sebastian Reichel --- include/linux/mfd/intel_soc_pmic.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h index 6a88e34cb955..945bde1fe55c 100644 --- a/include/linux/mfd/intel_soc_pmic.h +++ b/include/linux/mfd/intel_soc_pmic.h @@ -13,6 +13,13 @@ #include +enum intel_cht_wc_models { + INTEL_CHT_WC_UNKNOWN, + INTEL_CHT_WC_GPD_WIN_POCKET, + INTEL_CHT_WC_XIAOMI_MIPAD2, + INTEL_CHT_WC_LENOVO_YOGABOOK1, +}; + /** * struct intel_soc_pmic - Intel SoC PMIC data * @irq: Master interrupt number of the parent PMIC device @@ -39,6 +46,7 @@ struct intel_soc_pmic { struct regmap_irq_chip_data *irq_chip_data_crit; struct device *dev; struct intel_scu_ipc_dev *scu; + enum intel_cht_wc_models cht_wc_model; }; int intel_soc_pmic_exec_mipi_pmic_seq_element(u16 i2c_address, u32 reg_address, -- cgit v1.2.3 From ab28e944197fa78e6af7c4a0ffd6bba9a5bbacf0 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 31 Jan 2022 15:01:11 -0800 Subject: topology/sysfs: Add PPIN in sysfs under cpu topology PPIN is the Protected Processor Identification Number. This is used to identify the socket as a Field Replaceable Unit (FRU). Existing code only displays this when reporting errors. But this makes it inconvenient for large clusters to use it for its intended purpose of inventory control. Add ppin to /sys/devices/system/cpu/cpu*/topology to make what is already available using RDMSR more easily accessible. Make the file read only for root in case there are still people concerned about making a unique system "serial number" available. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20220131230111.2004669-6-tony.luck@intel.com --- include/linux/topology.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/topology.h b/include/linux/topology.h index a6e201758ae9..f19bc3626297 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -211,6 +211,9 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_drawer_id #define topology_drawer_id(cpu) ((void)(cpu), -1) #endif +#ifndef topology_ppin +#define topology_ppin(cpu) ((void)(cpu), 0ull) +#endif #ifndef topology_sibling_cpumask #define topology_sibling_cpumask(cpu) cpumask_of(cpu) #endif -- cgit v1.2.3 From bee9f65523218e3baeeecde9295c8fbe9bc08e0a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Jan 2022 16:02:50 +0000 Subject: netfs, cachefiles: Add a method to query presence of data in the cache Add a netfs_cache_ops method by which a network filesystem can ask the cache about what data it has available and where so that it can make a multipage read more efficient. Signed-off-by: David Howells cc: linux-cachefs@redhat.com Acked-by: Jeff Layton Reviewed-by: Rohith Surabattula Signed-off-by: Steve French --- include/linux/netfs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index b46c39d98bbd..614f22213e21 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -244,6 +244,13 @@ struct netfs_cache_ops { int (*prepare_write)(struct netfs_cache_resources *cres, loff_t *_start, size_t *_len, loff_t i_size, bool no_space_allocated_yet); + + /* Query the occupancy of the cache in a region, returning where the + * next chunk of data starts and how long it is. + */ + int (*query_occupancy)(struct netfs_cache_resources *cres, + loff_t start, size_t len, size_t granularity, + loff_t *_data_start, size_t *_data_len); }; struct readahead_control; -- cgit v1.2.3 From 90b2433edb6d995bd23d6adde753095b4ab26104 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 31 Jan 2022 15:22:42 -0300 Subject: seq_file: fix NULL pointer arithmetic warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement conditional logic in order to replace NULL pointer arithmetic. The use of NULL pointer arithmetic was pointed out by clang with the following warning: fs/kernfs/file.c:128:15: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] return NULL + !*ppos; ~~~~ ^ fs/seq_file.c:559:14: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] return NULL + (*pos == 0); Signed-off-by: Maíra Canal Signed-off-by: Al Viro --- include/linux/seq_file.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 88cc16444b43..60820ab511d2 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -162,6 +162,7 @@ int seq_dentry(struct seq_file *, struct dentry *, const char *); int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc); +void *single_start(struct seq_file *, loff_t *); int single_open(struct file *, int (*)(struct seq_file *, void *), void *); int single_open_size(struct file *, int (*)(struct seq_file *, void *), void *, size_t); int single_release(struct inode *, struct file *); -- cgit v1.2.3 From e3dc1399506f894110667ee5c66a6a70f06f3348 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 21 Jan 2022 17:24:23 +0000 Subject: spi: Make spi_alloc_device and spi_add_device public again This functions were previously made private since they were not used. However, these functions will be needed again. Partial revert of commit da21fde0fdb3 ("spi: Make several public functions private to spi.c") Signed-off-by: Stefan Binding Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20220121172431.6876-2-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7ab3fed7b804..0346a3ff27fd 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -1452,7 +1452,19 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n) * use spi_new_device() to describe each device. You can also call * spi_unregister_device() to start making that device vanish, but * normally that would be handled by spi_unregister_controller(). + * + * You can also use spi_alloc_device() and spi_add_device() to use a two + * stage registration sequence for each spi_device. This gives the caller + * some more control over the spi_device structure before it is registered, + * but requires that caller to initialize fields that would otherwise + * be defined using the board info. */ +extern struct spi_device * +spi_alloc_device(struct spi_controller *ctlr); + +extern int +spi_add_device(struct spi_device *spi); + extern struct spi_device * spi_new_device(struct spi_controller *, struct spi_board_info *); -- cgit v1.2.3 From 000bee0ed70af79e610444096fb453430220960f Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 21 Jan 2022 17:24:24 +0000 Subject: spi: Create helper API to lookup ACPI info for spi device This can then be used to find a spi resource inside an ACPI node, and allocate a spi device. Signed-off-by: Stefan Binding Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20220121172431.6876-3-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 0346a3ff27fd..d159cef12f1a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -16,6 +16,7 @@ #include #include +#include struct dma_chan; struct software_node; @@ -759,6 +760,11 @@ extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); +#if IS_ENABLED(CONFIG_ACPI) +extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, + struct acpi_device *adev); +#endif + /* * SPI resource management while processing a SPI message */ -- cgit v1.2.3 From 87e59b36e5e26122efd55d77adb9fac827987db0 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 21 Jan 2022 17:24:25 +0000 Subject: spi: Support selection of the index of the ACPI Spi Resource before alloc If a node contains more than one SPI resource it may be necessary to use an index to select which one you want to allocate a spi device for. Signed-off-by: Stefan Binding Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20220121172431.6876-4-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index d159cef12f1a..e5bbb9cbd3d7 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -762,7 +762,8 @@ extern void spi_unregister_controller(struct spi_controller *ctlr); #if IS_ENABLED(CONFIG_ACPI) extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, - struct acpi_device *adev); + struct acpi_device *adev, + int index); #endif /* -- cgit v1.2.3 From e612af7acef2459f1afd885f4107748995a05963 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 21 Jan 2022 17:24:26 +0000 Subject: spi: Add API to count spi acpi resources Some ACPI nodes may have more than one Spi Resource. To be able to handle these case, its necessary to have a way of counting these resources. Signed-off-by: Stefan Binding Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20220121172431.6876-5-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e5bbb9cbd3d7..394b4241d989 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -764,6 +764,7 @@ extern void spi_unregister_controller(struct spi_controller *ctlr); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); +int acpi_spi_count_resources(struct acpi_device *adev); #endif /* -- cgit v1.2.3 From 02b2a91c6f0d57df687e666b475849a54f295a12 Mon Sep 17 00:00:00 2001 From: David Girault Date: Tue, 1 Feb 2022 19:09:56 +0100 Subject: net: ieee802154: Provide a kdoc to the address structure Give this structure a header to better explain its content. Signed-off-by: David Girault [miquel.raynal@bootlin.com: Isolate this change from a bigger commit and reword the comment] Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20220201180956.93581-1-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- include/net/cfg802154.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 6ed07844eb24..833672d6fbe4 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -227,6 +227,16 @@ static inline void wpan_phy_net_set(struct wpan_phy *wpan_phy, struct net *net) write_pnet(&wpan_phy->_net, net); } +/** + * struct ieee802154_addr - IEEE802.15.4 device address + * @mode: Address mode from frame header. Can be one of: + * - @IEEE802154_ADDR_NONE + * - @IEEE802154_ADDR_SHORT + * - @IEEE802154_ADDR_LONG + * @pan_id: The PAN ID this address belongs to + * @short_addr: address if @mode is @IEEE802154_ADDR_SHORT + * @extended_addr: address if @mode is @IEEE802154_ADDR_LONG + */ struct ieee802154_addr { u8 mode; __le16 pan_id; -- cgit v1.2.3 From 6d5c900eb64107001e91e1f46bddc254dded8a59 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 Jan 2022 09:22:41 -0800 Subject: net/mlx5e: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Use struct_group() in struct vlan_ethhdr around members h_dest and h_source, so they can be referenced together. This will allow memcpy() and sizeof() to more easily reason about sizes, improve readability, and avoid future warnings about writing beyond the end of h_dest. "pahole" shows no size nor member offset changes to struct vlan_ethhdr. "objdump -d" shows no object code changes. Fixes: 34802a42b352 ("net/mlx5e: Do not modify the TX SKB") Signed-off-by: Kees Cook Signed-off-by: Saeed Mahameed --- include/linux/if_vlan.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 8420fe504927..2be4dd7e90a9 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -46,8 +46,10 @@ struct vlan_hdr { * @h_vlan_encapsulated_proto: packet type ID or len */ struct vlan_ethhdr { - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; + struct_group(addrs, + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + ); __be16 h_vlan_proto; __be16 h_vlan_TCI; __be16 h_vlan_encapsulated_proto; -- cgit v1.2.3 From 4decd2e54b61686787f36b727d2772e067a46ea5 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 26 Jan 2022 21:10:03 +0000 Subject: dt-bindings: clock: Add R9A07G054 CPG Clock and Reset Definitions Define RZ/V2L (R9A07G054) Clock Pulse Generator Core Clock and module clock outputs, as listed in Table 7.1.4.2 ("Clock List r1.0") and also add Reset definitions referring to registers CPG_RST_* in Section 7.2.3 ("Register configuration") of the RZ/V2L Hardware User's Manual (Rev. 1.00, Nov. 2021). Signed-off-by: Biju Das Signed-off-by: Lad Prabhakar Acked-by: Rob Herring Link: https://lore.kernel.org/r/20220126211003.6675-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/r9a07g054-cpg.h | 229 ++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 include/dt-bindings/clock/r9a07g054-cpg.h (limited to 'include') diff --git a/include/dt-bindings/clock/r9a07g054-cpg.h b/include/dt-bindings/clock/r9a07g054-cpg.h new file mode 100644 index 000000000000..43f4dbda872c --- /dev/null +++ b/include/dt-bindings/clock/r9a07g054-cpg.h @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + * + * Copyright (C) 2022 Renesas Electronics Corp. + */ +#ifndef __DT_BINDINGS_CLOCK_R9A07G054_CPG_H__ +#define __DT_BINDINGS_CLOCK_R9A07G054_CPG_H__ + +#include + +/* R9A07G054 CPG Core Clocks */ +#define R9A07G054_CLK_I 0 +#define R9A07G054_CLK_I2 1 +#define R9A07G054_CLK_G 2 +#define R9A07G054_CLK_S0 3 +#define R9A07G054_CLK_S1 4 +#define R9A07G054_CLK_SPI0 5 +#define R9A07G054_CLK_SPI1 6 +#define R9A07G054_CLK_SD0 7 +#define R9A07G054_CLK_SD1 8 +#define R9A07G054_CLK_M0 9 +#define R9A07G054_CLK_M1 10 +#define R9A07G054_CLK_M2 11 +#define R9A07G054_CLK_M3 12 +#define R9A07G054_CLK_M4 13 +#define R9A07G054_CLK_HP 14 +#define R9A07G054_CLK_TSU 15 +#define R9A07G054_CLK_ZT 16 +#define R9A07G054_CLK_P0 17 +#define R9A07G054_CLK_P1 18 +#define R9A07G054_CLK_P2 19 +#define R9A07G054_CLK_AT 20 +#define R9A07G054_OSCCLK 21 +#define R9A07G054_CLK_P0_DIV2 22 +#define R9A07G054_CLK_DRP_M 23 +#define R9A07G054_CLK_DRP_D 24 +#define R9A07G054_CLK_DRP_A 25 + +/* R9A07G054 Module Clocks */ +#define R9A07G054_CA55_SCLK 0 +#define R9A07G054_CA55_PCLK 1 +#define R9A07G054_CA55_ATCLK 2 +#define R9A07G054_CA55_GICCLK 3 +#define R9A07G054_CA55_PERICLK 4 +#define R9A07G054_CA55_ACLK 5 +#define R9A07G054_CA55_TSCLK 6 +#define R9A07G054_GIC600_GICCLK 7 +#define R9A07G054_IA55_CLK 8 +#define R9A07G054_IA55_PCLK 9 +#define R9A07G054_MHU_PCLK 10 +#define R9A07G054_SYC_CNT_CLK 11 +#define R9A07G054_DMAC_ACLK 12 +#define R9A07G054_DMAC_PCLK 13 +#define R9A07G054_OSTM0_PCLK 14 +#define R9A07G054_OSTM1_PCLK 15 +#define R9A07G054_OSTM2_PCLK 16 +#define R9A07G054_MTU_X_MCK_MTU3 17 +#define R9A07G054_POE3_CLKM_POE 18 +#define R9A07G054_GPT_PCLK 19 +#define R9A07G054_POEG_A_CLKP 20 +#define R9A07G054_POEG_B_CLKP 21 +#define R9A07G054_POEG_C_CLKP 22 +#define R9A07G054_POEG_D_CLKP 23 +#define R9A07G054_WDT0_PCLK 24 +#define R9A07G054_WDT0_CLK 25 +#define R9A07G054_WDT1_PCLK 26 +#define R9A07G054_WDT1_CLK 27 +#define R9A07G054_WDT2_PCLK 28 +#define R9A07G054_WDT2_CLK 29 +#define R9A07G054_SPI_CLK2 30 +#define R9A07G054_SPI_CLK 31 +#define R9A07G054_SDHI0_IMCLK 32 +#define R9A07G054_SDHI0_IMCLK2 33 +#define R9A07G054_SDHI0_CLK_HS 34 +#define R9A07G054_SDHI0_ACLK 35 +#define R9A07G054_SDHI1_IMCLK 36 +#define R9A07G054_SDHI1_IMCLK2 37 +#define R9A07G054_SDHI1_CLK_HS 38 +#define R9A07G054_SDHI1_ACLK 39 +#define R9A07G054_GPU_CLK 40 +#define R9A07G054_GPU_AXI_CLK 41 +#define R9A07G054_GPU_ACE_CLK 42 +#define R9A07G054_ISU_ACLK 43 +#define R9A07G054_ISU_PCLK 44 +#define R9A07G054_H264_CLK_A 45 +#define R9A07G054_H264_CLK_P 46 +#define R9A07G054_CRU_SYSCLK 47 +#define R9A07G054_CRU_VCLK 48 +#define R9A07G054_CRU_PCLK 49 +#define R9A07G054_CRU_ACLK 50 +#define R9A07G054_MIPI_DSI_PLLCLK 51 +#define R9A07G054_MIPI_DSI_SYSCLK 52 +#define R9A07G054_MIPI_DSI_ACLK 53 +#define R9A07G054_MIPI_DSI_PCLK 54 +#define R9A07G054_MIPI_DSI_VCLK 55 +#define R9A07G054_MIPI_DSI_LPCLK 56 +#define R9A07G054_LCDC_CLK_A 57 +#define R9A07G054_LCDC_CLK_P 58 +#define R9A07G054_LCDC_CLK_D 59 +#define R9A07G054_SSI0_PCLK2 60 +#define R9A07G054_SSI0_PCLK_SFR 61 +#define R9A07G054_SSI1_PCLK2 62 +#define R9A07G054_SSI1_PCLK_SFR 63 +#define R9A07G054_SSI2_PCLK2 64 +#define R9A07G054_SSI2_PCLK_SFR 65 +#define R9A07G054_SSI3_PCLK2 66 +#define R9A07G054_SSI3_PCLK_SFR 67 +#define R9A07G054_SRC_CLKP 68 +#define R9A07G054_USB_U2H0_HCLK 69 +#define R9A07G054_USB_U2H1_HCLK 70 +#define R9A07G054_USB_U2P_EXR_CPUCLK 71 +#define R9A07G054_USB_PCLK 72 +#define R9A07G054_ETH0_CLK_AXI 73 +#define R9A07G054_ETH0_CLK_CHI 74 +#define R9A07G054_ETH1_CLK_AXI 75 +#define R9A07G054_ETH1_CLK_CHI 76 +#define R9A07G054_I2C0_PCLK 77 +#define R9A07G054_I2C1_PCLK 78 +#define R9A07G054_I2C2_PCLK 79 +#define R9A07G054_I2C3_PCLK 80 +#define R9A07G054_SCIF0_CLK_PCK 81 +#define R9A07G054_SCIF1_CLK_PCK 82 +#define R9A07G054_SCIF2_CLK_PCK 83 +#define R9A07G054_SCIF3_CLK_PCK 84 +#define R9A07G054_SCIF4_CLK_PCK 85 +#define R9A07G054_SCI0_CLKP 86 +#define R9A07G054_SCI1_CLKP 87 +#define R9A07G054_IRDA_CLKP 88 +#define R9A07G054_RSPI0_CLKB 89 +#define R9A07G054_RSPI1_CLKB 90 +#define R9A07G054_RSPI2_CLKB 91 +#define R9A07G054_CANFD_PCLK 92 +#define R9A07G054_GPIO_HCLK 93 +#define R9A07G054_ADC_ADCLK 94 +#define R9A07G054_ADC_PCLK 95 +#define R9A07G054_TSU_PCLK 96 +#define R9A07G054_STPAI_INITCLK 97 +#define R9A07G054_STPAI_ACLK 98 +#define R9A07G054_STPAI_MCLK 99 +#define R9A07G054_STPAI_DCLKIN 100 +#define R9A07G054_STPAI_ACLK_DRP 101 + +/* R9A07G054 Resets */ +#define R9A07G054_CA55_RST_1_0 0 +#define R9A07G054_CA55_RST_1_1 1 +#define R9A07G054_CA55_RST_3_0 2 +#define R9A07G054_CA55_RST_3_1 3 +#define R9A07G054_CA55_RST_4 4 +#define R9A07G054_CA55_RST_5 5 +#define R9A07G054_CA55_RST_6 6 +#define R9A07G054_CA55_RST_7 7 +#define R9A07G054_CA55_RST_8 8 +#define R9A07G054_CA55_RST_9 9 +#define R9A07G054_CA55_RST_10 10 +#define R9A07G054_CA55_RST_11 11 +#define R9A07G054_CA55_RST_12 12 +#define R9A07G054_GIC600_GICRESET_N 13 +#define R9A07G054_GIC600_DBG_GICRESET_N 14 +#define R9A07G054_IA55_RESETN 15 +#define R9A07G054_MHU_RESETN 16 +#define R9A07G054_DMAC_ARESETN 17 +#define R9A07G054_DMAC_RST_ASYNC 18 +#define R9A07G054_SYC_RESETN 19 +#define R9A07G054_OSTM0_PRESETZ 20 +#define R9A07G054_OSTM1_PRESETZ 21 +#define R9A07G054_OSTM2_PRESETZ 22 +#define R9A07G054_MTU_X_PRESET_MTU3 23 +#define R9A07G054_POE3_RST_M_REG 24 +#define R9A07G054_GPT_RST_C 25 +#define R9A07G054_POEG_A_RST 26 +#define R9A07G054_POEG_B_RST 27 +#define R9A07G054_POEG_C_RST 28 +#define R9A07G054_POEG_D_RST 29 +#define R9A07G054_WDT0_PRESETN 30 +#define R9A07G054_WDT1_PRESETN 31 +#define R9A07G054_WDT2_PRESETN 32 +#define R9A07G054_SPI_RST 33 +#define R9A07G054_SDHI0_IXRST 34 +#define R9A07G054_SDHI1_IXRST 35 +#define R9A07G054_GPU_RESETN 36 +#define R9A07G054_GPU_AXI_RESETN 37 +#define R9A07G054_GPU_ACE_RESETN 38 +#define R9A07G054_ISU_ARESETN 39 +#define R9A07G054_ISU_PRESETN 40 +#define R9A07G054_H264_X_RESET_VCP 41 +#define R9A07G054_H264_CP_PRESET_P 42 +#define R9A07G054_CRU_CMN_RSTB 43 +#define R9A07G054_CRU_PRESETN 44 +#define R9A07G054_CRU_ARESETN 45 +#define R9A07G054_MIPI_DSI_CMN_RSTB 46 +#define R9A07G054_MIPI_DSI_ARESET_N 47 +#define R9A07G054_MIPI_DSI_PRESET_N 48 +#define R9A07G054_LCDC_RESET_N 49 +#define R9A07G054_SSI0_RST_M2_REG 50 +#define R9A07G054_SSI1_RST_M2_REG 51 +#define R9A07G054_SSI2_RST_M2_REG 52 +#define R9A07G054_SSI3_RST_M2_REG 53 +#define R9A07G054_SRC_RST 54 +#define R9A07G054_USB_U2H0_HRESETN 55 +#define R9A07G054_USB_U2H1_HRESETN 56 +#define R9A07G054_USB_U2P_EXL_SYSRST 57 +#define R9A07G054_USB_PRESETN 58 +#define R9A07G054_ETH0_RST_HW_N 59 +#define R9A07G054_ETH1_RST_HW_N 60 +#define R9A07G054_I2C0_MRST 61 +#define R9A07G054_I2C1_MRST 62 +#define R9A07G054_I2C2_MRST 63 +#define R9A07G054_I2C3_MRST 64 +#define R9A07G054_SCIF0_RST_SYSTEM_N 65 +#define R9A07G054_SCIF1_RST_SYSTEM_N 66 +#define R9A07G054_SCIF2_RST_SYSTEM_N 67 +#define R9A07G054_SCIF3_RST_SYSTEM_N 68 +#define R9A07G054_SCIF4_RST_SYSTEM_N 69 +#define R9A07G054_SCI0_RST 70 +#define R9A07G054_SCI1_RST 71 +#define R9A07G054_IRDA_RST 72 +#define R9A07G054_RSPI0_RST 73 +#define R9A07G054_RSPI1_RST 74 +#define R9A07G054_RSPI2_RST 75 +#define R9A07G054_CANFD_RSTP_N 76 +#define R9A07G054_CANFD_RSTC_N 77 +#define R9A07G054_GPIO_RSTN 78 +#define R9A07G054_GPIO_PORT_RESETN 79 +#define R9A07G054_GPIO_SPARE_RESETN 80 +#define R9A07G054_ADC_PRESETN 81 +#define R9A07G054_ADC_ADRST_N 82 +#define R9A07G054_TSU_PRESETN 83 +#define R9A07G054_STPAI_ARESETN 84 + +#endif /* __DT_BINDINGS_CLOCK_R9A07G054_CPG_H__ */ -- cgit v1.2.3 From bfdf4e6208051ed7165b2e92035b4bf11f43eb63 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 27 Jan 2022 10:27:20 -0500 Subject: rseq: Remove broken uapi field layout on 32-bit little endian The rseq rseq_cs.ptr.{ptr32,padding} uapi endianness handling is entirely wrong on 32-bit little endian: a preprocessor logic mistake wrongly uses the big endian field layout on 32-bit little endian architectures. Fortunately, those ptr32 accessors were never used within the kernel, and only meant as a convenience for user-space. Remove those and replace the whole rseq_cs union by a __u64 type, as this is the only thing really needed to express the ABI. Document how 32-bit architectures are meant to interact with this field. Fixes: ec9c82e03a74 ("rseq: uapi: Declare rseq_cs field as union, update includes") Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220127152720.25898-1-mathieu.desnoyers@efficios.com --- include/uapi/linux/rseq.h | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 9a402fdb60e9..77ee207623a9 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -105,23 +105,11 @@ struct rseq { * Read and set by the kernel. Set by user-space with single-copy * atomicity semantics. This field should only be updated by the * thread which registered this data structure. Aligned on 64-bit. + * + * 32-bit architectures should update the low order bits of the + * rseq_cs field, leaving the high order bits initialized to 0. */ - union { - __u64 ptr64; -#ifdef __LP64__ - __u64 ptr; -#else - struct { -#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN) - __u32 padding; /* Initialized to zero. */ - __u32 ptr32; -#else /* LITTLE */ - __u32 ptr32; - __u32 padding; /* Initialized to zero. */ -#endif /* ENDIAN */ - } ptr; -#endif - } rseq_cs; + __u64 rseq_cs; /* * Restartable sequences flags field. -- cgit v1.2.3 From c8eaf6ac76f40f6c59fc7d056e2e08c4a57ea9c7 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Fri, 28 Jan 2022 17:50:25 +0800 Subject: sched: move autogroup sysctls into its own file move autogroup sysctls to autogroup.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220128095025.8745-1-nizhen@uniontech.com --- include/linux/sched/sysctl.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index c19dd5a2c05c..3f2b70f8d32c 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -45,10 +45,6 @@ extern unsigned int sysctl_sched_uclamp_util_min_rt_default; extern unsigned int sysctl_sched_cfs_bandwidth_slice; #endif -#ifdef CONFIG_SCHED_AUTOGROUP -extern unsigned int sysctl_sched_autogroup_enabled; -#endif - extern int sysctl_sched_rr_timeslice; extern int sched_rr_timeslice; -- cgit v1.2.3 From ddecd22878601a606d160680fa85802b75d92eb6 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 31 Jan 2022 11:34:07 +0100 Subject: perf: uapi: Document perf_event_attr::sig_data truncation on 32 bit architectures Due to the alignment requirements of siginfo_t, as described in 3ddb3fd8cdb0 ("signal, perf: Fix siginfo_t by avoiding u64 on 32-bit architectures"), siginfo_t::si_perf_data is limited to an unsigned long. However, perf_event_attr::sig_data is an u64, to avoid having to deal with compat conversions. Due to being an u64, it may not immediately be clear to users that sig_data is truncated on 32 bit architectures. Add a comment to explicitly point this out, and hopefully help some users save time by not having to deduce themselves what's happening. Reported-by: Dmitry Vyukov Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Link: https://lore.kernel.org/r/20220131103407.1971678-3-elver@google.com --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1b65042ab1db..82858b697c05 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -465,6 +465,8 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via * siginfo_t::si_perf_data, e.g. to permit user to identify the event. + * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be + * truncated accordingly on 32 bit architectures. */ __u64 sig_data; }; -- cgit v1.2.3 From 1148836fd3226c20de841084aba24184d4fbbe77 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 2 Feb 2022 14:55:29 +0100 Subject: Revert "fbdev: Garbage collect fbdev scrolling acceleration, part 1 (from TODO list)" This reverts commit b3ec8cdf457e5e63d396fe1346cc788cf7c1b578. Revert the second (of 2) commits which disabled scrolling acceleration in fbcon/fbdev. It introduced a regression for fbdev-supported graphic cards because of the performance penalty by doing screen scrolling by software instead of using the existing graphic card 2D hardware acceleration. Console scrolling acceleration was disabled by dropping code which checked at runtime the driver hardware capabilities for the BINFO_HWACCEL_COPYAREA or FBINFO_HWACCEL_FILLRECT flags and if set, it enabled scrollmode SCROLL_MOVE which uses hardware acceleration to move screen contents. After dropping those checks scrollmode was hard-wired to SCROLL_REDRAW instead, which forces all graphic cards to redraw every character at the new screen position when scrolling. This change effectively disabled all hardware-based scrolling acceleration for ALL drivers, because now all kind of 2D hardware acceleration (bitblt, fillrect) in the drivers isn't used any longer. The original commit message mentions that only 3 DRM drivers (nouveau, omapdrm and gma500) used hardware acceleration in the past and thus code for checking and using scrolling acceleration is obsolete. This statement is NOT TRUE, because beside the DRM drivers there are around 35 other fbdev drivers which depend on fbdev/fbcon and still provide hardware acceleration for fbdev/fbcon. The original commit message also states that syzbot found lots of bugs in fbcon and thus it's "often the solution to just delete code and remove features". This is true, and the bugs - which actually affected all users of fbcon, including DRM - were fixed, or code was dropped like e.g. the support for software scrollback in vgacon (commit 973c096f6a85). So to further analyze which bugs were found by syzbot, I've looked through all patches in drivers/video which were tagged with syzbot or syzkaller back to year 2005. The vast majority fixed the reported issues on a higher level, e.g. when screen is to be resized, or when font size is to be changed. The few ones which touched driver code fixed a real driver bug, e.g. by adding a check. But NONE of those patches touched code of either the SCROLL_MOVE or the SCROLL_REDRAW case. That means, there was no real reason why SCROLL_MOVE had to be ripped-out and just SCROLL_REDRAW had to be used instead. The only reason I can imagine so far was that SCROLL_MOVE wasn't used by DRM and as such it was assumed that it could go away. That argument completely missed the fact that SCROLL_MOVE is still heavily used by fbdev (non-DRM) drivers. Some people mention that using memcpy() instead of the hardware acceleration is pretty much the same speed. But that's not true, at least not for older graphic cards and machines where we see speed decreases by factor 10 and more and thus this change leads to console responsiveness way worse than before. That's why the original commit is to be reverted. By reverting we reintroduce hardware-based scrolling acceleration and fix the performance regression for fbdev drivers. There isn't any impact on DRM when reverting those patches. Signed-off-by: Helge Deller Acked-by: Geert Uytterhoeven Acked-by: Sven Schnelle Cc: stable@vger.kernel.org # v5.16+ Signed-off-by: Helge Deller Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220202135531.92183-2-deller@gmx.de --- include/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index 3da95842b207..02f362c661c8 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -262,7 +262,7 @@ struct fb_ops { /* Draws a rectangle */ void (*fb_fillrect) (struct fb_info *info, const struct fb_fillrect *rect); - /* Copy data from area to another. Obsolete. */ + /* Copy data from area to another */ void (*fb_copyarea) (struct fb_info *info, const struct fb_copyarea *region); /* Draws a image to the display */ void (*fb_imageblit) (struct fb_info *info, const struct fb_image *image); -- cgit v1.2.3 From 295ab96f478d0fa56393e85406f19a867e26ce22 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 2 Feb 2022 01:03:20 +0100 Subject: net: dsa: provide switch operations for tracking the master state Certain drivers may need to send management traffic to the switch for things like register access, FDB dump, etc, to accelerate what their slow bus (SPI, I2C, MDIO) can already do. Ethernet is faster (especially in bulk transactions) but is also more unreliable, since the user may decide to bring the DSA master down (or not bring it up), therefore severing the link between the host and the attached switch. Drivers needing Ethernet-based register access already should have fallback logic to the slow bus if the Ethernet method fails, but that fallback may be based on a timeout, and the I/O to the switch may slow down to a halt if the master is down, because every Ethernet packet will have to time out. The driver also doesn't have the option to turn off Ethernet-based I/O momentarily, because it wouldn't know when to turn it back on. Which is where this change comes in. By tracking NETDEV_CHANGE, NETDEV_UP and NETDEV_GOING_DOWN events on the DSA master, we should know the exact interval of time during which this interface is reliably available for traffic. Provide this information to switches so they can use it as they wish. An helper is added dsa_port_master_is_operational() to check if a master port is operational. Signed-off-by: Vladimir Oltean Signed-off-by: Ansuel Smith Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 57b3e4e7413b..43c4153ef53a 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -278,6 +278,10 @@ struct dsa_port { u8 devlink_port_setup:1; + /* Master state bits, valid only on CPU ports */ + u8 master_admin_up:1; + u8 master_oper_up:1; + u8 setup:1; struct device_node *dn; @@ -478,6 +482,12 @@ static inline bool dsa_port_is_unused(struct dsa_port *dp) return dp->type == DSA_PORT_TYPE_UNUSED; } +static inline bool dsa_port_master_is_operational(struct dsa_port *dp) +{ + return dsa_port_is_cpu(dp) && dp->master_admin_up && + dp->master_oper_up; +} + static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED; @@ -1036,6 +1046,13 @@ struct dsa_switch_ops { int (*tag_8021q_vlan_add)(struct dsa_switch *ds, int port, u16 vid, u16 flags); int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid); + + /* + * DSA master tracking operations + */ + void (*master_state_change)(struct dsa_switch *ds, + const struct net_device *master, + bool operational); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ -- cgit v1.2.3 From 3ec762fb13c7e7273800b94c80db1c2cc37590d1 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Wed, 2 Feb 2022 01:03:23 +0100 Subject: net: dsa: tag_qca: move define to include linux/dsa Move tag_qca define to include dir linux/dsa as the qca8k require access to the tagger define to support in-band mdio read/write using ethernet packet. Signed-off-by: Ansuel Smith Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/tag_qca.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/linux/dsa/tag_qca.h (limited to 'include') diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h new file mode 100644 index 000000000000..c02d2d39ff4a --- /dev/null +++ b/include/linux/dsa/tag_qca.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __TAG_QCA_H +#define __TAG_QCA_H + +#define QCA_HDR_LEN 2 +#define QCA_HDR_VERSION 0x2 + +#define QCA_HDR_RECV_VERSION GENMASK(15, 14) +#define QCA_HDR_RECV_PRIORITY GENMASK(13, 11) +#define QCA_HDR_RECV_TYPE GENMASK(10, 6) +#define QCA_HDR_RECV_FRAME_IS_TAGGED BIT(3) +#define QCA_HDR_RECV_SOURCE_PORT GENMASK(2, 0) + +#define QCA_HDR_XMIT_VERSION GENMASK(15, 14) +#define QCA_HDR_XMIT_PRIORITY GENMASK(13, 11) +#define QCA_HDR_XMIT_CONTROL GENMASK(10, 8) +#define QCA_HDR_XMIT_FROM_CPU BIT(7) +#define QCA_HDR_XMIT_DP_BIT GENMASK(6, 0) + +#endif /* __TAG_QCA_H */ -- cgit v1.2.3 From c2ee8181fddb293d296477f60b3eb4fa3ce4e1a6 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Wed, 2 Feb 2022 01:03:25 +0100 Subject: net: dsa: tag_qca: add define for handling mgmt Ethernet packet Add all the required define to prepare support for mgmt read/write in Ethernet packet. Any packet of this type has to be dropped as the only use of these special packet is receive ack for an mgmt write request or receive data for an mgmt read request. A struct is used that emulates the Ethernet header but is used for a different purpose. Signed-off-by: Ansuel Smith Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/tag_qca.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include') diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h index c02d2d39ff4a..f366422ab7a0 100644 --- a/include/linux/dsa/tag_qca.h +++ b/include/linux/dsa/tag_qca.h @@ -12,10 +12,54 @@ #define QCA_HDR_RECV_FRAME_IS_TAGGED BIT(3) #define QCA_HDR_RECV_SOURCE_PORT GENMASK(2, 0) +/* Packet type for recv */ +#define QCA_HDR_RECV_TYPE_NORMAL 0x0 +#define QCA_HDR_RECV_TYPE_MIB 0x1 +#define QCA_HDR_RECV_TYPE_RW_REG_ACK 0x2 + #define QCA_HDR_XMIT_VERSION GENMASK(15, 14) #define QCA_HDR_XMIT_PRIORITY GENMASK(13, 11) #define QCA_HDR_XMIT_CONTROL GENMASK(10, 8) #define QCA_HDR_XMIT_FROM_CPU BIT(7) #define QCA_HDR_XMIT_DP_BIT GENMASK(6, 0) +/* Packet type for xmit */ +#define QCA_HDR_XMIT_TYPE_NORMAL 0x0 +#define QCA_HDR_XMIT_TYPE_RW_REG 0x1 + +/* Check code for a valid mgmt packet. Switch will ignore the packet + * with this wrong. + */ +#define QCA_HDR_MGMT_CHECK_CODE_VAL 0x5 + +/* Specific define for in-band MDIO read/write with Ethernet packet */ +#define QCA_HDR_MGMT_SEQ_LEN 4 /* 4 byte for the seq */ +#define QCA_HDR_MGMT_COMMAND_LEN 4 /* 4 byte for the command */ +#define QCA_HDR_MGMT_DATA1_LEN 4 /* First 4 byte for the mdio data */ +#define QCA_HDR_MGMT_HEADER_LEN (QCA_HDR_MGMT_SEQ_LEN + \ + QCA_HDR_MGMT_COMMAND_LEN + \ + QCA_HDR_MGMT_DATA1_LEN) + +#define QCA_HDR_MGMT_DATA2_LEN 12 /* Other 12 byte for the mdio data */ +#define QCA_HDR_MGMT_PADDING_LEN 34 /* Padding to reach the min Ethernet packet */ + +#define QCA_HDR_MGMT_PKT_LEN (QCA_HDR_MGMT_HEADER_LEN + \ + QCA_HDR_LEN + \ + QCA_HDR_MGMT_DATA2_LEN + \ + QCA_HDR_MGMT_PADDING_LEN) + +#define QCA_HDR_MGMT_SEQ_NUM GENMASK(31, 0) /* 63, 32 */ +#define QCA_HDR_MGMT_CHECK_CODE GENMASK(31, 29) /* 31, 29 */ +#define QCA_HDR_MGMT_CMD BIT(28) /* 28 */ +#define QCA_HDR_MGMT_LENGTH GENMASK(23, 20) /* 23, 20 */ +#define QCA_HDR_MGMT_ADDR GENMASK(18, 0) /* 18, 0 */ + +/* Special struct emulating a Ethernet header */ +struct qca_mgmt_ethhdr { + u32 command; /* command bit 31:0 */ + u32 seq; /* seq 63:32 */ + u32 mdio_data; /* first 4byte mdio */ + __be16 hdr; /* qca hdr */ +} __packed; + #endif /* __TAG_QCA_H */ -- cgit v1.2.3 From 18be654a4345f7d937b4bfbad74bea8093e3a93c Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Wed, 2 Feb 2022 01:03:26 +0100 Subject: net: dsa: tag_qca: add define for handling MIB packet Add struct to correctly parse a mib Ethernet packet. Signed-off-by: Ansuel Smith Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/tag_qca.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h index f366422ab7a0..1fff57f2937b 100644 --- a/include/linux/dsa/tag_qca.h +++ b/include/linux/dsa/tag_qca.h @@ -62,4 +62,14 @@ struct qca_mgmt_ethhdr { __be16 hdr; /* qca hdr */ } __packed; +enum mdio_cmd { + MDIO_WRITE = 0x0, + MDIO_READ +}; + +struct mib_ethhdr { + u32 data[3]; /* first 3 mib counter */ + __be16 hdr; /* qca hdr */ +} __packed; + #endif /* __TAG_QCA_H */ -- cgit v1.2.3 From 31eb6b4386ad91930417e3f5c8157a4b5e31cbd5 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Wed, 2 Feb 2022 01:03:27 +0100 Subject: net: dsa: tag_qca: add support for handling mgmt and MIB Ethernet packet Add connect/disconnect helper to assign private struct to the DSA switch. Add support for Ethernet mgmt and MIB if the DSA driver provide an handler to correctly parse and elaborate the data. Signed-off-by: Ansuel Smith Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/tag_qca.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h index 1fff57f2937b..4359fb0221cf 100644 --- a/include/linux/dsa/tag_qca.h +++ b/include/linux/dsa/tag_qca.h @@ -72,4 +72,11 @@ struct mib_ethhdr { __be16 hdr; /* qca hdr */ } __packed; +struct qca_tagger_data { + void (*rw_reg_ack_handler)(struct dsa_switch *ds, + struct sk_buff *skb); + void (*mib_autocast_handler)(struct dsa_switch *ds, + struct sk_buff *skb); +}; + #endif /* __TAG_QCA_H */ -- cgit v1.2.3 From 5903123f662ed18483f05cac3f9e800a074c29ff Mon Sep 17 00:00:00 2001 From: Akhmat Karakotov Date: Fri, 28 Jan 2022 22:26:21 +0300 Subject: tcp: Use BPF timeout setting for SYN ACK RTO When setting RTO through BPF program, some SYN ACK packets were unaffected and continued to use TCP_TIMEOUT_INIT constant. This patch adds timeout option to struct request_sock. Option is initialized with TCP_TIMEOUT_INIT and is reassigned through BPF using tcp_timeout_init call. SYN ACK retransmits now use newly added timeout option. Signed-off-by: Akhmat Karakotov Acked-by: Martin KaFai Lau v2: - Add timeout option to struct request_sock. Do not call tcp_timeout_init on every syn ack retransmit. v3: - Use unsigned long for min. Bound tcp_timeout_init to TCP_RTO_MAX. v4: - Refactor duplicate code by adding reqsk_timeout function. Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 8 ++++++++ include/net/request_sock.h | 2 ++ include/net/tcp.h | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 4ad47d9f9d27..3908296d103f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -285,6 +285,14 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req); +static inline unsigned long +reqsk_timeout(struct request_sock *req, unsigned long max_timeout) +{ + u64 timeout = (u64)req->timeout << req->num_timeout; + + return (unsigned long)min_t(u64, timeout, max_timeout); +} + static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) { /* The below has to be done to allow calling inet_csk_destroy_sock */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 29e41ff3ec93..144c39db9898 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -70,6 +70,7 @@ struct request_sock { struct saved_syn *saved_syn; u32 secid; u32 peer_secid; + u32 timeout; }; static inline struct request_sock *inet_reqsk(const struct sock *sk) @@ -104,6 +105,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, sk_node_init(&req_to_sk(req)->sk_node); sk_tx_queue_clear(req_to_sk(req)); req->saved_syn = NULL; + req->timeout = 0; req->num_timeout = 0; req->num_retrans = 0; req->sk = NULL; diff --git a/include/net/tcp.h b/include/net/tcp.h index b9fc978fb2ca..eff2487d972d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2358,7 +2358,7 @@ static inline u32 tcp_timeout_init(struct sock *sk) if (timeout <= 0) timeout = TCP_TIMEOUT_INIT; - return timeout; + return min_t(int, timeout, TCP_RTO_MAX); } static inline u32 tcp_rwnd_init_bpf(struct sock *sk) -- cgit v1.2.3 From 926597ffce0e3e2f785475df18e1636194209910 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:39:11 +0100 Subject: block: move disk_{block,unblock,flush}_events to blk.h No need to have these declarations in a public header. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220124093913.742411-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6906a45bc761..504f9a6674ac 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -185,9 +185,6 @@ static inline int bdev_read_only(struct block_device *bdev) return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); } -extern void disk_block_events(struct gendisk *disk); -extern void disk_unblock_events(struct gendisk *disk); -extern void disk_flush_events(struct gendisk *disk, unsigned int mask); bool set_capacity_and_notify(struct gendisk *disk, sector_t size); bool disk_force_media_change(struct gendisk *disk, unsigned int events); -- cgit v1.2.3 From e7243285c0fc87054990fcde630583586ff8ed5f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:39:12 +0100 Subject: block: move blk_drop_partitions to blk.h No need to have this declaration in a public header. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220124093913.742411-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 504f9a6674ac..aa4bd985dbe5 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -219,7 +219,6 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) } int bdev_disk_changed(struct gendisk *disk, bool invalidate); -void blk_drop_partitions(struct gendisk *disk); struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, struct lock_class_key *lkclass); -- cgit v1.2.3 From 322cbb50de711814c42fb088f6d31901502c711a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:39:13 +0100 Subject: block: remove genhd.h There is no good reason to keep genhd.h separate from the main blkdev.h header that includes it. So fold the contents of genhd.h into blkdev.h and remove genhd.h entirely. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220124093913.742411-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 273 ++++++++++++++++++++++++++++++++++++++++++- include/linux/genhd.h | 287 ---------------------------------------------- include/linux/part_stat.h | 2 +- 3 files changed, 271 insertions(+), 291 deletions(-) delete mode 100644 include/linux/genhd.h (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f35aea98bc35..99a4384bb8a5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1,9 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions Copyright (C) 1992 Drew Eckhardt + */ #ifndef _LINUX_BLKDEV_H #define _LINUX_BLKDEV_H -#include -#include +#include +#include +#include #include #include #include @@ -12,11 +16,15 @@ #include #include #include +#include #include #include #include +#include #include #include +#include +#include struct module; struct request_queue; @@ -33,6 +41,10 @@ struct blk_queue_stats; struct blk_stat_callback; struct blk_crypto_profile; +extern const struct device_type disk_type; +extern struct device_type part_type; +extern struct class block_class; + /* Must be consistent with blk_mq_poll_stats_bkt() */ #define BLK_MQ_POLL_STATS_BKTS 16 @@ -45,6 +57,144 @@ struct blk_crypto_profile; */ #define BLKCG_MAX_POLS 6 +#define DISK_MAX_PARTS 256 +#define DISK_NAME_LEN 32 + +#define PARTITION_META_INFO_VOLNAMELTH 64 +/* + * Enough for the string representation of any kind of UUID plus NULL. + * EFI UUID is 36 characters. MSDOS UUID is 11 characters. + */ +#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1) + +struct partition_meta_info { + char uuid[PARTITION_META_INFO_UUIDLTH]; + u8 volname[PARTITION_META_INFO_VOLNAMELTH]; +}; + +/** + * DOC: genhd capability flags + * + * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to + * removable media. When set, the device remains present even when media is not + * inserted. Shall not be set for devices which are removed entirely when the + * media is removed. + * + * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events, + * doesn't appear in sysfs, and can't be opened from userspace or using + * blkdev_get*. Used for the underlying components of multipath devices. + * + * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not + * scan for partitions from add_disk, and users can't add partitions manually. + * + */ +enum { + GENHD_FL_REMOVABLE = 1 << 0, + GENHD_FL_HIDDEN = 1 << 1, + GENHD_FL_NO_PART = 1 << 2, +}; + +enum { + DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ + DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ +}; + +enum { + /* Poll even if events_poll_msecs is unset */ + DISK_EVENT_FLAG_POLL = 1 << 0, + /* Forward events to udev */ + DISK_EVENT_FLAG_UEVENT = 1 << 1, + /* Block event polling when open for exclusive write */ + DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2, +}; + +struct disk_events; +struct badblocks; + +struct blk_integrity { + const struct blk_integrity_profile *profile; + unsigned char flags; + unsigned char tuple_size; + unsigned char interval_exp; + unsigned char tag_size; +}; + +struct gendisk { + /* + * major/first_minor/minors should not be set by any new driver, the + * block core will take care of allocating them automatically. + */ + int major; + int first_minor; + int minors; + + char disk_name[DISK_NAME_LEN]; /* name of major driver */ + + unsigned short events; /* supported events */ + unsigned short event_flags; /* flags related to event processing */ + + struct xarray part_tbl; + struct block_device *part0; + + const struct block_device_operations *fops; + struct request_queue *queue; + void *private_data; + + int flags; + unsigned long state; +#define GD_NEED_PART_SCAN 0 +#define GD_READ_ONLY 1 +#define GD_DEAD 2 +#define GD_NATIVE_CAPACITY 3 + + struct mutex open_mutex; /* open/close mutex */ + unsigned open_partitions; /* number of open partitions */ + + struct backing_dev_info *bdi; + struct kobject *slave_dir; +#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED + struct list_head slave_bdevs; +#endif + struct timer_rand_state *random; + atomic_t sync_io; /* RAID */ + struct disk_events *ev; +#ifdef CONFIG_BLK_DEV_INTEGRITY + struct kobject integrity_kobj; +#endif /* CONFIG_BLK_DEV_INTEGRITY */ +#if IS_ENABLED(CONFIG_CDROM) + struct cdrom_device_info *cdi; +#endif + int node_id; + struct badblocks *bb; + struct lockdep_map lockdep_map; + u64 diskseq; +}; + +static inline bool disk_live(struct gendisk *disk) +{ + return !inode_unhashed(disk->part0->bd_inode); +} + +/* + * The gendisk is refcounted by the part0 block_device, and the bd_device + * therein is also used for device model presentation in sysfs. + */ +#define dev_to_disk(device) \ + (dev_to_bdev(device)->bd_disk) +#define disk_to_dev(disk) \ + (&((disk)->part0->bd_device)) + +#if IS_REACHABLE(CONFIG_CDROM) +#define disk_to_cdi(disk) ((disk)->cdi) +#else +#define disk_to_cdi(disk) NULL +#endif + +static inline dev_t disk_devt(struct gendisk *disk) +{ + return MKDEV(disk->major, disk->first_minor); +} + static inline int blk_validate_block_size(unsigned long bsize) { if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) @@ -596,6 +746,118 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) #define for_each_bio(_bio) \ for (; _bio; _bio = _bio->bi_next) +int __must_check device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups); +static inline int __must_check add_disk(struct gendisk *disk) +{ + return device_add_disk(NULL, disk, NULL); +} +void del_gendisk(struct gendisk *gp); +void invalidate_disk(struct gendisk *disk); +void set_disk_ro(struct gendisk *disk, bool read_only); +void disk_uevent(struct gendisk *disk, enum kobject_action action); + +static inline int get_disk_ro(struct gendisk *disk) +{ + return disk->part0->bd_read_only || + test_bit(GD_READ_ONLY, &disk->state); +} + +static inline int bdev_read_only(struct block_device *bdev) +{ + return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); +} + +bool set_capacity_and_notify(struct gendisk *disk, sector_t size); +bool disk_force_media_change(struct gendisk *disk, unsigned int events); + +void add_disk_randomness(struct gendisk *disk) __latent_entropy; +void rand_initialize_disk(struct gendisk *disk); + +static inline sector_t get_start_sect(struct block_device *bdev) +{ + return bdev->bd_start_sect; +} + +static inline sector_t bdev_nr_sectors(struct block_device *bdev) +{ + return bdev->bd_nr_sectors; +} + +static inline loff_t bdev_nr_bytes(struct block_device *bdev) +{ + return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT; +} + +static inline sector_t get_capacity(struct gendisk *disk) +{ + return bdev_nr_sectors(disk->part0); +} + +static inline u64 sb_bdev_nr_blocks(struct super_block *sb) +{ + return bdev_nr_sectors(sb->s_bdev) >> + (sb->s_blocksize_bits - SECTOR_SHIFT); +} + +int bdev_disk_changed(struct gendisk *disk, bool invalidate); + +struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, + struct lock_class_key *lkclass); +void put_disk(struct gendisk *disk); +struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); + +/** + * blk_alloc_disk - allocate a gendisk structure + * @node_id: numa node to allocate on + * + * Allocate and pre-initialize a gendisk structure for use with BIO based + * drivers. + * + * Context: can sleep + */ +#define blk_alloc_disk(node_id) \ +({ \ + static struct lock_class_key __key; \ + \ + __blk_alloc_disk(node_id, &__key); \ +}) +void blk_cleanup_disk(struct gendisk *disk); + +int __register_blkdev(unsigned int major, const char *name, + void (*probe)(dev_t devt)); +#define register_blkdev(major, name) \ + __register_blkdev(major, name, NULL) +void unregister_blkdev(unsigned int major, const char *name); + +bool bdev_check_media_change(struct block_device *bdev); +int __invalidate_device(struct block_device *bdev, bool kill_dirty); +void set_capacity(struct gendisk *disk, sector_t size); + +#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED +int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); +void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); +int bd_register_pending_holders(struct gendisk *disk); +#else +static inline int bd_link_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ + return 0; +} +static inline void bd_unlink_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ +} +static inline int bd_register_pending_holders(struct gendisk *disk) +{ + return 0; +} +#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ + +dev_t part_devt(struct gendisk *disk, u8 partno); +void inc_diskseq(struct gendisk *disk); +dev_t blk_lookup_devt(const char *name, int partno); +void blk_request_module(dev_t devt); extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); @@ -1311,6 +1573,7 @@ void invalidate_bdev(struct block_device *bdev); int sync_blockdev(struct block_device *bdev); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); +void printk_all_partitions(void); #else static inline void invalidate_bdev(struct block_device *bdev) { @@ -1326,7 +1589,11 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } -#endif +static inline void printk_all_partitions(void) +{ +} +#endif /* CONFIG_BLOCK */ + int fsync_bdev(struct block_device *bdev); int freeze_bdev(struct block_device *bdev); diff --git a/include/linux/genhd.h b/include/linux/genhd.h deleted file mode 100644 index aa4bd985dbe5..000000000000 --- a/include/linux/genhd.h +++ /dev/null @@ -1,287 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_GENHD_H -#define _LINUX_GENHD_H - -/* - * genhd.h Copyright (C) 1992 Drew Eckhardt - * Generic hard disk header file by - * Drew Eckhardt - * - * - */ - -#include -#include -#include -#include -#include -#include - -extern const struct device_type disk_type; -extern struct device_type part_type; -extern struct class block_class; - -#define DISK_MAX_PARTS 256 -#define DISK_NAME_LEN 32 - -#define PARTITION_META_INFO_VOLNAMELTH 64 -/* - * Enough for the string representation of any kind of UUID plus NULL. - * EFI UUID is 36 characters. MSDOS UUID is 11 characters. - */ -#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1) - -struct partition_meta_info { - char uuid[PARTITION_META_INFO_UUIDLTH]; - u8 volname[PARTITION_META_INFO_VOLNAMELTH]; -}; - -/** - * DOC: genhd capability flags - * - * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to - * removable media. When set, the device remains present even when media is not - * inserted. Shall not be set for devices which are removed entirely when the - * media is removed. - * - * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events, - * doesn't appear in sysfs, and can't be opened from userspace or using - * blkdev_get*. Used for the underlying components of multipath devices. - * - * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not - * scan for partitions from add_disk, and users can't add partitions manually. - * - */ -enum { - GENHD_FL_REMOVABLE = 1 << 0, - GENHD_FL_HIDDEN = 1 << 1, - GENHD_FL_NO_PART = 1 << 2, -}; - -enum { - DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ - DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ -}; - -enum { - /* Poll even if events_poll_msecs is unset */ - DISK_EVENT_FLAG_POLL = 1 << 0, - /* Forward events to udev */ - DISK_EVENT_FLAG_UEVENT = 1 << 1, - /* Block event polling when open for exclusive write */ - DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2, -}; - -struct disk_events; -struct badblocks; - -struct blk_integrity { - const struct blk_integrity_profile *profile; - unsigned char flags; - unsigned char tuple_size; - unsigned char interval_exp; - unsigned char tag_size; -}; - -struct gendisk { - /* - * major/first_minor/minors should not be set by any new driver, the - * block core will take care of allocating them automatically. - */ - int major; - int first_minor; - int minors; - - char disk_name[DISK_NAME_LEN]; /* name of major driver */ - - unsigned short events; /* supported events */ - unsigned short event_flags; /* flags related to event processing */ - - struct xarray part_tbl; - struct block_device *part0; - - const struct block_device_operations *fops; - struct request_queue *queue; - void *private_data; - - int flags; - unsigned long state; -#define GD_NEED_PART_SCAN 0 -#define GD_READ_ONLY 1 -#define GD_DEAD 2 -#define GD_NATIVE_CAPACITY 3 - - struct mutex open_mutex; /* open/close mutex */ - unsigned open_partitions; /* number of open partitions */ - - struct backing_dev_info *bdi; - struct kobject *slave_dir; -#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED - struct list_head slave_bdevs; -#endif - struct timer_rand_state *random; - atomic_t sync_io; /* RAID */ - struct disk_events *ev; -#ifdef CONFIG_BLK_DEV_INTEGRITY - struct kobject integrity_kobj; -#endif /* CONFIG_BLK_DEV_INTEGRITY */ -#if IS_ENABLED(CONFIG_CDROM) - struct cdrom_device_info *cdi; -#endif - int node_id; - struct badblocks *bb; - struct lockdep_map lockdep_map; - u64 diskseq; -}; - -static inline bool disk_live(struct gendisk *disk) -{ - return !inode_unhashed(disk->part0->bd_inode); -} - -/* - * The gendisk is refcounted by the part0 block_device, and the bd_device - * therein is also used for device model presentation in sysfs. - */ -#define dev_to_disk(device) \ - (dev_to_bdev(device)->bd_disk) -#define disk_to_dev(disk) \ - (&((disk)->part0->bd_device)) - -#if IS_REACHABLE(CONFIG_CDROM) -#define disk_to_cdi(disk) ((disk)->cdi) -#else -#define disk_to_cdi(disk) NULL -#endif - -static inline dev_t disk_devt(struct gendisk *disk) -{ - return MKDEV(disk->major, disk->first_minor); -} - -void disk_uevent(struct gendisk *disk, enum kobject_action action); - -/* block/genhd.c */ -int __must_check device_add_disk(struct device *parent, struct gendisk *disk, - const struct attribute_group **groups); -static inline int __must_check add_disk(struct gendisk *disk) -{ - return device_add_disk(NULL, disk, NULL); -} -extern void del_gendisk(struct gendisk *gp); - -void invalidate_disk(struct gendisk *disk); - -void set_disk_ro(struct gendisk *disk, bool read_only); - -static inline int get_disk_ro(struct gendisk *disk) -{ - return disk->part0->bd_read_only || - test_bit(GD_READ_ONLY, &disk->state); -} - -static inline int bdev_read_only(struct block_device *bdev) -{ - return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); -} - -bool set_capacity_and_notify(struct gendisk *disk, sector_t size); -bool disk_force_media_change(struct gendisk *disk, unsigned int events); - -/* drivers/char/random.c */ -extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; -extern void rand_initialize_disk(struct gendisk *disk); - -static inline sector_t get_start_sect(struct block_device *bdev) -{ - return bdev->bd_start_sect; -} - -static inline sector_t bdev_nr_sectors(struct block_device *bdev) -{ - return bdev->bd_nr_sectors; -} - -static inline loff_t bdev_nr_bytes(struct block_device *bdev) -{ - return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT; -} - -static inline sector_t get_capacity(struct gendisk *disk) -{ - return bdev_nr_sectors(disk->part0); -} - -static inline u64 sb_bdev_nr_blocks(struct super_block *sb) -{ - return bdev_nr_sectors(sb->s_bdev) >> - (sb->s_blocksize_bits - SECTOR_SHIFT); -} - -int bdev_disk_changed(struct gendisk *disk, bool invalidate); - -struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, - struct lock_class_key *lkclass); -extern void put_disk(struct gendisk *disk); -struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); - -/** - * blk_alloc_disk - allocate a gendisk structure - * @node_id: numa node to allocate on - * - * Allocate and pre-initialize a gendisk structure for use with BIO based - * drivers. - * - * Context: can sleep - */ -#define blk_alloc_disk(node_id) \ -({ \ - static struct lock_class_key __key; \ - \ - __blk_alloc_disk(node_id, &__key); \ -}) -void blk_cleanup_disk(struct gendisk *disk); - -int __register_blkdev(unsigned int major, const char *name, - void (*probe)(dev_t devt)); -#define register_blkdev(major, name) \ - __register_blkdev(major, name, NULL) -void unregister_blkdev(unsigned int major, const char *name); - -bool bdev_check_media_change(struct block_device *bdev); -int __invalidate_device(struct block_device *bdev, bool kill_dirty); -void set_capacity(struct gendisk *disk, sector_t size); - -#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED -int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); -void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); -int bd_register_pending_holders(struct gendisk *disk); -#else -static inline int bd_link_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ - return 0; -} -static inline void bd_unlink_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ -} -static inline int bd_register_pending_holders(struct gendisk *disk) -{ - return 0; -} -#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ - -dev_t part_devt(struct gendisk *disk, u8 partno); -void inc_diskseq(struct gendisk *disk); -dev_t blk_lookup_devt(const char *name, int partno); -void blk_request_module(dev_t devt); -#ifdef CONFIG_BLOCK -void printk_all_partitions(void); -#else /* CONFIG_BLOCK */ -static inline void printk_all_partitions(void) -{ -} -#endif /* CONFIG_BLOCK */ - -#endif /* _LINUX_GENHD_H */ diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 6f7949b2fd8d..abeba356bc3f 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -2,7 +2,7 @@ #ifndef _LINUX_PART_STAT_H #define _LINUX_PART_STAT_H -#include +#include #include struct disk_stats { -- cgit v1.2.3 From 0a3140ea0fae377c9eaa031b7db1670ae422ed47 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 24 Jan 2022 10:11:02 +0100 Subject: block: pass a block_device and opf to blk_next_bio All callers need to set the block_device and operation, so lift that into the common code. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124091107.642561-15-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 117d7f248ac9..edeae54074ed 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -790,6 +790,7 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) bio->bi_opf |= REQ_NOWAIT; } -struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp); +struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, + unsigned int nr_pages, unsigned int opf, gfp_t gfp); #endif /* __LINUX_BIO_H */ -- cgit v1.2.3 From 609be1066731fea86436f5f91022f82e592ab456 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:03 +0100 Subject: block: pass a block_device and opf to bio_alloc_bioset Pass the block_device and operation that we plan to use this bio for to bio_alloc_bioset to optimize the assigment. NULL/0 can be passed, both for the passthrough case on a raw request_queue and to temporarily avoid refactoring some nasty code. Also move the gfp_mask argument after the nr_vecs argument for a much more logical calling convention matching what most of the kernel does. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-16-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index edeae54074ed..2f63ae9a71e1 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -405,8 +405,9 @@ extern void bioset_exit(struct bio_set *); extern int biovec_init_pool(mempool_t *pool, int pool_entries); extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); -struct bio *bio_alloc_bioset(gfp_t gfp, unsigned short nr_iovecs, - struct bio_set *bs); +struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, + unsigned int opf, gfp_t gfp_mask, + struct bio_set *bs); struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs, struct bio_set *bs); struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); @@ -419,7 +420,7 @@ extern struct bio_set fs_bio_set; static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs) { - return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set); + return bio_alloc_bioset(NULL, nr_iovecs, 0, gfp_mask, &fs_bio_set); } void submit_bio(struct bio *bio); -- cgit v1.2.3 From b77c88c2100ce6a5ec8126c13599b5a7f6663e32 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:04 +0100 Subject: block: pass a block_device and opf to bio_alloc_kiocb Pass the block_device and operation that we plan to use this bio for to bio_alloc_kiocb to optimize the assigment. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-17-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 2f63ae9a71e1..5c5ada2ebb27 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -408,8 +408,8 @@ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask, struct bio_set *bs); -struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs, - struct bio_set *bs); +struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, + unsigned short nr_vecs, unsigned int opf, struct bio_set *bs); struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); extern void bio_put(struct bio *); -- cgit v1.2.3 From 07888c665b405b1cd3577ddebfeb74f4717a84c4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:05 +0100 Subject: block: pass a block_device and opf to bio_alloc Pass the block_device and operation that we plan to use this bio for to bio_alloc to optimize the assignment. NULL/0 can be passed, both for the passthrough case on a raw request_queue and to temporarily avoid refactoring some nasty code. Also move the gfp_mask argument after the nr_vecs argument for a much more logical calling convention matching what most of the kernel does. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-18-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 5c5ada2ebb27..be6ac92913d4 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -418,9 +418,10 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio_set fs_bio_set; -static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs) +static inline struct bio *bio_alloc(struct block_device *bdev, + unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask) { - return bio_alloc_bioset(NULL, nr_iovecs, 0, gfp_mask, &fs_bio_set); + return bio_alloc_bioset(bdev, nr_vecs, opf, gfp_mask, &fs_bio_set); } void submit_bio(struct bio *bio); -- cgit v1.2.3 From 49add4966d79244013fce35f95c6833fae82b8b1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:06 +0100 Subject: block: pass a block_device and opf to bio_init Pass the block_device that we plan to use this bio for and the operation to bio_init to optimize the assignment. A NULL block_device can be passed, both for the passthrough case on a raw request_queue and to temporarily avoid refactoring some nasty code. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-19-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index be6ac92913d4..41bedf727f59 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -456,8 +456,8 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) struct request_queue; extern int submit_bio_wait(struct bio *bio); -extern void bio_init(struct bio *bio, struct bio_vec *table, - unsigned short max_vecs); +void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, + unsigned short max_vecs, unsigned int opf); extern void bio_uninit(struct bio *); extern void bio_reset(struct bio *); void bio_chain(struct bio *, struct bio *); -- cgit v1.2.3 From a7c50c940477bae89fb2b4f51bd969a2d95d7512 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:07 +0100 Subject: block: pass a block_device and opf to bio_reset Pass the block_device that we plan to use this bio for and the operation to bio_reset to optimize the assigment. A NULL block_device can be passed, both for the passthrough case on a raw request_queue and to temporarily avoid refactoring some nasty code. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-20-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 41bedf727f59..18cfe5bb41ea 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -459,7 +459,7 @@ extern int submit_bio_wait(struct bio *bio); void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, unsigned short max_vecs, unsigned int opf); extern void bio_uninit(struct bio *); -extern void bio_reset(struct bio *); +void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf); void bio_chain(struct bio *, struct bio *); int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off); @@ -517,13 +517,6 @@ static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) bio_associate_blkg(bio); } -static inline void bio_copy_dev(struct bio *dst, struct bio *src) -{ - bio_clear_flag(dst, BIO_REMAPPED); - dst->bi_bdev = src->bi_bdev; - bio_clone_blkg_association(dst, src); -} - /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * -- cgit v1.2.3 From b1f866b013e6e5583f2f0bf4a61d13eddb9a1799 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jan 2022 08:05:48 +0100 Subject: block: remove blk_needs_flush_plug blk_needs_flush_plug fails to account for the cb_list, which needs flushing as well. Remove it and just check if there is a plug instead of poking into the internals of the plug structure. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220127070549.1377856-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 99a4384bb8a5..f902a1c2fac0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1055,14 +1055,6 @@ extern void blk_finish_plug(struct blk_plug *); void blk_flush_plug(struct blk_plug *plug, bool from_schedule); -static inline bool blk_needs_flush_plug(struct task_struct *tsk) -{ - struct blk_plug *plug = tsk->plug; - - return plug && - (plug->mq_list || !list_empty(&plug->cb_list)); -} - int blkdev_issue_flush(struct block_device *bdev); long nr_blockdev_pages(void); #else /* CONFIG_BLOCK */ @@ -1086,11 +1078,6 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) { } -static inline bool blk_needs_flush_plug(struct task_struct *tsk) -{ - return false; -} - static inline int blkdev_issue_flush(struct block_device *bdev) { return 0; -- cgit v1.2.3 From aa8dcccaf32bfdc09f2aff089d5d60c37da5b7b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jan 2022 08:05:49 +0100 Subject: block: check that there is a plug in blk_flush_plug Rename blk_flush_plug to __blk_flush_plug and add a wrapper that includes the NULL check instead of open coding that check everywhere. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220127070549.1377856-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f902a1c2fac0..654163d3b903 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1053,7 +1053,12 @@ extern void blk_start_plug(struct blk_plug *); extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short); extern void blk_finish_plug(struct blk_plug *); -void blk_flush_plug(struct blk_plug *plug, bool from_schedule); +void __blk_flush_plug(struct blk_plug *plug, bool from_schedule); +static inline void blk_flush_plug(struct blk_plug *plug, bool async) +{ + if (plug) + __blk_flush_plug(plug, async); +} int blkdev_issue_flush(struct block_device *bdev); long nr_blockdev_pages(void); -- cgit v1.2.3 From b42c1fc3d55e077d36718ad9800d89100b2aff81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jan 2022 07:41:25 +0100 Subject: block: fix the kerneldoc for bio_end_io_acct Document the actually existing parameter name. Reported-by: Stephen Rothwell Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220127064125.1314347-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 654163d3b903..3bfc75a2a450 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1520,7 +1520,7 @@ void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, /** * bio_end_io_acct - end I/O accounting for bio based drivers * @bio: bio to end account for - * @start: start time returned by bio_start_io_acct() + * @start_time: start time returned by bio_start_io_acct() */ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) { -- cgit v1.2.3 From c86d86131ab75696fc52d98571148842e067d620 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 2 Feb 2022 06:09:04 +0300 Subject: Partially revert "net/smc: Add netlink net namespace support" The change of sizeof(struct smc_diag_linkinfo) by commit 79d39fc503b4 ("net/smc: Add netlink net namespace support") introduced an ABI regression: since struct smc_diag_lgrinfo contains an object of type "struct smc_diag_linkinfo", offset of all subsequent members of struct smc_diag_lgrinfo was changed by that change. As result, applications compiled with the old version of struct smc_diag_linkinfo will receive garbage in struct smc_diag_lgrinfo.role if the kernel implements this new version of struct smc_diag_linkinfo. Fix this regression by reverting the part of commit 79d39fc503b4 that changes struct smc_diag_linkinfo. After all, there is SMC_GEN_NETLINK interface which is good enough, so there is probably no need to touch the smc_diag ABI in the first place. Fixes: 79d39fc503b4 ("net/smc: Add netlink net namespace support") Signed-off-by: Dmitry V. Levin Reviewed-by: Karsten Graul Link: https://lore.kernel.org/r/20220202030904.GA9742@altlinux.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/smc_diag.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index c7008d87f1a4..8cb3a6fef553 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -84,12 +84,11 @@ struct smc_diag_conninfo { /* SMC_DIAG_LINKINFO */ struct smc_diag_linkinfo { - __u8 link_id; /* link identifier */ - __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ - __u8 ibport; /* RDMA device port number */ - __u8 gid[40]; /* local GID */ - __u8 peer_gid[40]; /* peer GID */ - __aligned_u64 net_cookie; /* RDMA device net namespace */ + __u8 link_id; /* link identifier */ + __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ + __u8 ibport; /* RDMA device port number */ + __u8 gid[40]; /* local GID */ + __u8 peer_gid[40]; /* peer GID */ }; struct smc_diag_lgrinfo { -- cgit v1.2.3 From e1d2699b96793d19388e302fa095e0da2c145701 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Jan 2022 22:10:52 -0500 Subject: NFS: Avoid duplicate uncached readdir calls on eof If we've reached the end of the directory, then cache that information in the context so that we don't need to do an uncached readdir in order to rediscover that fact. Fixes: 794092c57f89 ("NFS: Do uncached readdir when we're seeking a cookie in an empty page cache") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 02aa49323d1d..68f81d8d36de 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -107,6 +107,7 @@ struct nfs_open_dir_context { __u64 dup_cookie; pgoff_t page_index; signed char duped; + bool eof; }; /* -- cgit v1.2.3 From 2ea88716369ac9a7486a8cb309d6bf1239ea156c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sun, 23 Jan 2022 17:27:47 +0100 Subject: libceph: make recv path in secure mode work the same as send path The recv path of secure mode is intertwined with that of crc mode. While it's slightly more efficient that way (the ciphertext is read into the destination buffer and decrypted in place, thus avoiding two potentially heavy memory allocations for the bounce buffer and the corresponding sg array), it isn't really amenable to changes. Sacrifice that edge and align with the send path which always uses a full-sized bounce buffer (currently there is no other way -- if the kernel crypto API ever grows support for streaming (piecewise) en/decryption for GCM [1], we would be able to easily take advantage of that on both sides). [1] https://lore.kernel.org/all/20141225202830.GA18794@gondor.apana.org.au/ Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- include/linux/ceph/messenger.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index ff99ce094cfa..6c6b6ea52bb8 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -383,6 +383,10 @@ struct ceph_connection_v2_info { struct ceph_gcm_nonce in_gcm_nonce; struct ceph_gcm_nonce out_gcm_nonce; + struct page **in_enc_pages; + int in_enc_page_cnt; + int in_enc_resid; + int in_enc_i; struct page **out_enc_pages; int out_enc_page_cnt; int out_enc_resid; -- cgit v1.2.3 From 038b8d1d1ab1cce11a158d30bf080ff41a2cfd15 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 30 Dec 2021 15:13:32 +0100 Subject: libceph: optionally use bounce buffer on recv path in crc mode Both msgr1 and msgr2 in crc mode are zero copy in the sense that message data is read from the socket directly into the destination buffer. We assume that the destination buffer is stable (i.e. remains unchanged while it is being read to) though. Otherwise, CRC errors ensue: libceph: read_partial_message 0000000048edf8ad data crc 1063286393 != exp. 228122706 libceph: osd1 (1)192.168.122.1:6843 bad crc/signature libceph: bad data crc, calculated 57958023, expected 1805382778 libceph: osd2 (2)192.168.122.1:6876 integrity error, bad crc Introduce rxbounce option to enable use of a bounce buffer when receiving message data. In particular this is needed if a mapped image is a Windows VM disk, passed to QEMU. Windows has a system-wide "dummy" page that may be mapped into the destination buffer (potentially more than once into the same buffer) by the Windows Memory Manager in an effort to generate a single large I/O [1][2]. QEMU makes a point of preserving overlap relationships when cloning I/O vectors, so krbd gets exposed to this behaviour. [1] "What Is Really in That MDL?" https://docs.microsoft.com/en-us/previous-versions/windows/hardware/design/dn614012(v=vs.85) [2] https://blogs.msmvps.com/kernelmustard/2005/05/04/dummy-pages/ URL: https://bugzilla.redhat.com/show_bug.cgi?id=1973317 Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- include/linux/ceph/libceph.h | 1 + include/linux/ceph/messenger.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 6a89ea410e43..edf62eaa6285 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -35,6 +35,7 @@ #define CEPH_OPT_TCP_NODELAY (1<<4) /* TCP_NODELAY on TCP sockets */ #define CEPH_OPT_NOMSGSIGN (1<<5) /* don't sign msgs (msgr1) */ #define CEPH_OPT_ABORT_ON_FULL (1<<6) /* abort w/ ENOSPC when full */ +#define CEPH_OPT_RXBOUNCE (1<<7) /* double-buffer read data */ #define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 6c6b6ea52bb8..e7f2fb2fc207 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -461,6 +461,7 @@ struct ceph_connection { struct ceph_msg *out_msg; /* sending message (== tail of out_sent) */ + struct page *bounce_page; u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */ -- cgit v1.2.3 From 043cfff99a18933fda2fb2e163daee73cc07910b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 23 Dec 2021 17:23:00 +0100 Subject: firmware: ti_sci: Fix compilation failure when CONFIG_TI_SCI_PROTOCOL is not defined Remove an extra ";" which breaks compilation. Fixes: 53bf2b0e4e4c ("firmware: ti_sci: Add support for getting resource with subtype") Signed-off-by: Christophe JAILLET Signed-off-by: Nishanth Menon Link: https://lore.kernel.org/r/e6c3cb793e1a6a2a0ae2528d5a5650dfe6a4b6ff.1640276505.git.christophe.jaillet@wanadoo.fr --- include/linux/soc/ti/ti_sci_protocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 0aad7009b50e..bd0d11af76c5 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -645,7 +645,7 @@ devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, static inline struct ti_sci_resource * devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, - u32 dev_id, u32 sub_type); + u32 dev_id, u32 sub_type) { return ERR_PTR(-EINVAL); } -- cgit v1.2.3 From bfb1a7c91fb7758273b4a8d735313d9cc388b502 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 2 Feb 2022 12:55:53 -0800 Subject: x86/bug: Merge annotate_reachable() into _BUG_FLAGS() asm In __WARN_FLAGS(), we had two asm statements (abbreviated): asm volatile("ud2"); asm volatile(".pushsection .discard.reachable"); These pair of statements are used to trigger an exception, but then help objtool understand that for warnings, control flow will be restored immediately afterwards. The problem is that volatile is not a compiler barrier. GCC explicitly documents this: > Note that the compiler can move even volatile asm instructions > relative to other code, including across jump instructions. Also, no clobbers are specified to prevent instructions from subsequent statements from being scheduled by compiler before the second asm statement. This can lead to instructions from subsequent statements being emitted by the compiler before the second asm statement. Providing a scheduling model such as via -march= options enables the compiler to better schedule instructions with known latencies to hide latencies from data hazards compared to inline asm statements in which latencies are not estimated. If an instruction gets scheduled by the compiler between the two asm statements, then objtool will think that it is not reachable, producing a warning. To prevent instructions from being scheduled in between the two asm statements, merge them. Also remove an unnecessary unreachable() asm annotation from BUG() in favor of __builtin_unreachable(). objtool is able to track that the ud2 from BUG() terminates control flow within the function. Link: https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Volatile Link: https://github.com/ClangBuiltLinux/linux/issues/1483 Signed-off-by: Nick Desaulniers Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220202205557.2260694-1-ndesaulniers@google.com --- include/linux/compiler.h | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 429dcebe2b99..0f7fd205ab7e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -117,14 +117,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define __stringify_label(n) #n -#define __annotate_reachable(c) ({ \ - asm volatile(__stringify_label(c) ":\n\t" \ - ".pushsection .discard.reachable\n\t" \ - ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t" : : "i" (c)); \ -}) -#define annotate_reachable() __annotate_reachable(__COUNTER__) - #define __annotate_unreachable(c) ({ \ asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.unreachable\n\t" \ @@ -133,24 +125,21 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, }) #define annotate_unreachable() __annotate_unreachable(__COUNTER__) -#define ASM_UNREACHABLE \ - "999:\n\t" \ - ".pushsection .discard.unreachable\n\t" \ - ".long 999b - .\n\t" \ +#define ASM_REACHABLE \ + "998:\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long 998b - .\n\t" \ ".popsection\n\t" /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(".rodata..c_jump_table") #else -#define annotate_reachable() #define annotate_unreachable() +# define ASM_REACHABLE #define __annotate_jump_table #endif -#ifndef ASM_UNREACHABLE -# define ASM_UNREACHABLE -#endif #ifndef unreachable # define unreachable() do { \ annotate_unreachable(); \ -- cgit v1.2.3 From 4a81f6da9cb2d1ef911131a6fd8bd15cb61fc772 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 1 Feb 2022 20:39:42 +0100 Subject: net, neigh: Do not trigger immediate probes on NUD_FAILED from neigh_managed_work syzkaller was able to trigger a deadlock for NTF_MANAGED entries [0]: kworker/0:16/14617 is trying to acquire lock: ffffffff8d4dd370 (&tbl->lock){++-.}-{2:2}, at: ___neigh_create+0x9e1/0x2990 net/core/neighbour.c:652 [...] but task is already holding lock: ffffffff8d4dd370 (&tbl->lock){++-.}-{2:2}, at: neigh_managed_work+0x35/0x250 net/core/neighbour.c:1572 The neighbor entry turned to NUD_FAILED state, where __neigh_event_send() triggered an immediate probe as per commit cd28ca0a3dd1 ("neigh: reduce arp latency") via neigh_probe() given table lock was held. One option to fix this situation is to defer the neigh_probe() back to the neigh_timer_handler() similarly as pre cd28ca0a3dd1. For the case of NTF_MANAGED, this deferral is acceptable given this only happens on actual failure state and regular / expected state is NUD_VALID with the entry already present. The fix adds a parameter to __neigh_event_send() in order to communicate whether immediate probe is allowed or disallowed. Existing call-sites of neigh_event_send() default as-is to immediate probe. However, the neigh_managed_work() disables it via use of neigh_event_send_probe(). [0] __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_deadlock_bug kernel/locking/lockdep.c:2956 [inline] check_deadlock kernel/locking/lockdep.c:2999 [inline] validate_chain kernel/locking/lockdep.c:3788 [inline] __lock_acquire.cold+0x149/0x3ab kernel/locking/lockdep.c:5027 lock_acquire kernel/locking/lockdep.c:5639 [inline] lock_acquire+0x1ab/0x510 kernel/locking/lockdep.c:5604 __raw_write_lock_bh include/linux/rwlock_api_smp.h:202 [inline] _raw_write_lock_bh+0x2f/0x40 kernel/locking/spinlock.c:334 ___neigh_create+0x9e1/0x2990 net/core/neighbour.c:652 ip6_finish_output2+0x1070/0x14f0 net/ipv6/ip6_output.c:123 __ip6_finish_output net/ipv6/ip6_output.c:191 [inline] __ip6_finish_output+0x61e/0xe90 net/ipv6/ip6_output.c:170 ip6_finish_output+0x32/0x200 net/ipv6/ip6_output.c:201 NF_HOOK_COND include/linux/netfilter.h:296 [inline] ip6_output+0x1e4/0x530 net/ipv6/ip6_output.c:224 dst_output include/net/dst.h:451 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ndisc_send_skb+0xa99/0x17f0 net/ipv6/ndisc.c:508 ndisc_send_ns+0x3a9/0x840 net/ipv6/ndisc.c:650 ndisc_solicit+0x2cd/0x4f0 net/ipv6/ndisc.c:742 neigh_probe+0xc2/0x110 net/core/neighbour.c:1040 __neigh_event_send+0x37d/0x1570 net/core/neighbour.c:1201 neigh_event_send include/net/neighbour.h:470 [inline] neigh_managed_work+0x162/0x250 net/core/neighbour.c:1574 process_one_work+0x9ac/0x1650 kernel/workqueue.c:2307 worker_thread+0x657/0x1110 kernel/workqueue.c:2454 kthread+0x2e9/0x3a0 kernel/kthread.c:377 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Fixes: 7482e3841d52 ("net, neigh: Add NTF_MANAGED flag for managed neighbor entries") Reported-by: syzbot+5239d0e1778a500d477a@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Cc: Roopa Prabhu Tested-by: syzbot+5239d0e1778a500d477a@syzkaller.appspotmail.com Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220201193942.5055-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 937389e04c8e..87419f7f5421 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -350,7 +350,8 @@ static inline struct neighbour *neigh_create(struct neigh_table *tbl, return __neigh_create(tbl, pkey, dev, true); } void neigh_destroy(struct neighbour *neigh); -int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); +int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, + const bool immediate_ok); int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); @@ -460,17 +461,24 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh) #define neigh_hold(n) refcount_inc(&(n)->refcnt) -static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +static __always_inline int neigh_event_send_probe(struct neighbour *neigh, + struct sk_buff *skb, + const bool immediate_ok) { unsigned long now = jiffies; - + if (READ_ONCE(neigh->used) != now) WRITE_ONCE(neigh->used, now); - if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) - return __neigh_event_send(neigh, skb); + if (!(neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))) + return __neigh_event_send(neigh, skb, immediate_ok); return 0; } +static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +{ + return neigh_event_send_probe(neigh, skb, true); +} + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) { -- cgit v1.2.3 From 4564661af6ee321942ec1ab012191d7adedd3e00 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 30 Jan 2022 11:17:05 -0800 Subject: xen: xenbus_dev.h: delete incorrect file name It is better/preferred not to include file names in source files because (a) they are not needed and (b) they can be incorrect, so just delete this incorrect file name. Signed-off-by: Randy Dunlap Reviewed-by: Juergen Gross Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Cc: xen-devel@lists.xenproject.org Link: https://lore.kernel.org/r/20220130191705.24971-1-rdunlap@infradead.org Signed-off-by: Juergen Gross --- include/xen/xenbus_dev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/xen/xenbus_dev.h b/include/xen/xenbus_dev.h index bbee8c6a349d..4dc45a51c044 100644 --- a/include/xen/xenbus_dev.h +++ b/include/xen/xenbus_dev.h @@ -1,6 +1,4 @@ /****************************************************************************** - * evtchn.h - * * Interface to /dev/xen/xenbus_backend. * * Copyright (c) 2011 Bastian Blank -- cgit v1.2.3 From 164666fa66669d437bdcc8d5f1744a2aee73be41 Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Mon, 31 Jan 2022 12:23:07 -0500 Subject: Improve docs for IOCTL_GNTDEV_MAP_GRANT_REF --------------cKY3Ggs6VDUCSn4I6iN78sHA Content-Type: multipart/mixed; boundary="------------g0T69ASidFiPhh4eOY4XzIg1" --------------g0T69ASidFiPhh4eOY4XzIg1 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable The current implementation of gntdev guarantees that the first call to IOCTL_GNTDEV_MAP_GRANT_REF will set @index to 0. This is required to use gntdev for Wayland, which is a future desire of Qubes OS. Additionally, requesting zero grants results in an error, but this was not documented either. Document both of these. Signed-off-by: Demi Marie Obenour Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/f66c5a4e-2034-00b5-a635-6983bd999c07@gmail.com Signed-off-by: Juergen Gross --- include/uapi/xen/gntdev.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/xen/gntdev.h b/include/uapi/xen/gntdev.h index 9ac5515b9bc2..7a7145395c09 100644 --- a/include/uapi/xen/gntdev.h +++ b/include/uapi/xen/gntdev.h @@ -47,7 +47,13 @@ struct ioctl_gntdev_grant_ref { /* * Inserts the grant references into the mapping table of an instance * of gntdev. N.B. This does not perform the mapping, which is deferred - * until mmap() is called with @index as the offset. + * until mmap() is called with @index as the offset. @index should be + * considered opaque to userspace, with one exception: if no grant + * references have ever been inserted into the mapping table of this + * instance, @index will be set to 0. This is necessary to use gntdev + * with userspace APIs that expect a file descriptor that can be + * mmap()'d at offset 0, such as Wayland. If @count is set to 0, this + * ioctl will fail. */ #define IOCTL_GNTDEV_MAP_GRANT_REF \ _IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref)) -- cgit v1.2.3 From 52cc6ffc0ab2c61a76127b9347567fc97c15582f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 31 Jan 2022 08:40:01 -0800 Subject: page_pool: Refactor page_pool to enable fragmenting after allocation This change is meant to permit a driver to perform "fragmenting" of the page from within the driver instead of the current model which requires pre-partitioning the page. The main motivation behind this is to support use cases where the page will be split up by the driver after DMA instead of before. With this change it becomes possible to start using page pool to replace some of the existing use cases where multiple references were being used for a single page, but the number needed was unknown as the size could be dynamic. For example, with this code it would be possible to do something like the following to handle allocation: page = page_pool_alloc_pages(); if (!page) return NULL; page_pool_fragment_page(page, DRIVER_PAGECNT_BIAS_MAX); rx_buf->page = page; rx_buf->pagecnt_bias = DRIVER_PAGECNT_BIAS_MAX; Then we would process a received buffer by handling it with: rx_buf->pagecnt_bias--; Once the page has been fully consumed we could then flush the remaining instances with: if (page_pool_defrag_page(page, rx_buf->pagecnt_bias)) continue; page_pool_put_defragged_page(pool, page -1, !!budget); The general idea is that we want to have the ability to allocate a page with excess fragment count and then trim off the unneeded fragments. Signed-off-by: Alexander Duyck Reviewed-by: Ilias Apalodimas Signed-off-by: David S. Miller --- include/net/page_pool.h | 82 +++++++++++++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 79a805542d0f..97c3c19872ff 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -201,21 +201,67 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, } #endif -void page_pool_put_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, bool allow_direct); +void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, + bool allow_direct); -/* Same as above but will try to sync the entire area pool->max_len */ -static inline void page_pool_put_full_page(struct page_pool *pool, - struct page *page, bool allow_direct) +static inline void page_pool_fragment_page(struct page *page, long nr) +{ + atomic_long_set(&page->pp_frag_count, nr); +} + +static inline long page_pool_defrag_page(struct page *page, long nr) +{ + long ret; + + /* If nr == pp_frag_count then we have cleared all remaining + * references to the page. No need to actually overwrite it, instead + * we can leave this to be overwritten by the calling function. + * + * The main advantage to doing this is that an atomic_read is + * generally a much cheaper operation than an atomic update, + * especially when dealing with a page that may be partitioned + * into only 2 or 3 pieces. + */ + if (atomic_long_read(&page->pp_frag_count) == nr) + return 0; + + ret = atomic_long_sub_return(nr, &page->pp_frag_count); + WARN_ON(ret < 0); + return ret; +} + +static inline bool page_pool_is_last_frag(struct page_pool *pool, + struct page *page) +{ + /* If fragments aren't enabled or count is 0 we were the last user */ + return !(pool->p.flags & PP_FLAG_PAGE_FRAG) || + (page_pool_defrag_page(page, 1) == 0); +} + +static inline void page_pool_put_page(struct page_pool *pool, + struct page *page, + unsigned int dma_sync_size, + bool allow_direct) { /* When page_pool isn't compiled-in, net/core/xdp.c doesn't * allow registering MEM_TYPE_PAGE_POOL, but shield linker. */ #ifdef CONFIG_PAGE_POOL - page_pool_put_page(pool, page, -1, allow_direct); + if (!page_pool_is_last_frag(pool, page)) + return; + + page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct); #endif } +/* Same as above but will try to sync the entire area pool->max_len */ +static inline void page_pool_put_full_page(struct page_pool *pool, + struct page *page, bool allow_direct) +{ + page_pool_put_page(pool, page, -1, allow_direct); +} + /* Same as above but the caller must guarantee safe context. e.g NAPI */ static inline void page_pool_recycle_direct(struct page_pool *pool, struct page *page) @@ -243,30 +289,6 @@ static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) page->dma_addr_upper = upper_32_bits(addr); } -static inline void page_pool_set_frag_count(struct page *page, long nr) -{ - atomic_long_set(&page->pp_frag_count, nr); -} - -static inline long page_pool_atomic_sub_frag_count_return(struct page *page, - long nr) -{ - long ret; - - /* As suggested by Alexander, atomic_long_read() may cover up the - * reference count errors, so avoid calling atomic_long_read() in - * the cases of freeing or draining the page_frags, where we would - * not expect it to match or that are slowpath anyway. - */ - if (__builtin_constant_p(nr) && - atomic_long_read(&page->pp_frag_count) == nr) - return 0; - - ret = atomic_long_sub_return(nr, &page->pp_frag_count); - WARN_ON(ret < 0); - return ret; -} - static inline bool is_page_pool_compiled_in(void) { #ifdef CONFIG_PAGE_POOL -- cgit v1.2.3 From 7af4a361a62f59b44ec301f21090823365dd0244 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Thu, 3 Feb 2022 11:16:53 +0100 Subject: net: dsa: mv88e6xxx: Improve isolation of standalone ports Clear MapDA on standalone ports to bypass any ATU lookup that might point the packet in the wrong direction. This means that all packets are flooded using the PVT config. So make sure that standalone ports are only allowed to communicate with the local upstream port. Here is a scenario in which this is needed: CPU | .----. .---0---. | .--0--. | sw0 | | | sw1 | '-1-2-3-' | '-1-2-' '---' - sw0p1 and sw1p1 are bridged - sw0p2 and sw1p2 are in standalone mode - Learning must be enabled on sw0p3 in order for hardware forwarding to work properly between bridged ports 1. A packet with SA :aa comes in on sw1p2 1a. Egresses sw1p0 1b. Ingresses sw0p3, ATU adds an entry for :aa towards port 3 1c. Egresses sw0p0 2. A packet with DA :aa comes in on sw0p2 2a. If an ATU lookup is done at this point, the packet will be incorrectly forwarded towards sw0p3. With this change in place, the ATU is bypassed and the packet is forwarded in accordance with the PVT, which only contains the CPU port. Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 43c4153ef53a..6e5ef62a7dce 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -591,6 +591,18 @@ static inline bool dsa_is_upstream_port(struct dsa_switch *ds, int port) return port == dsa_upstream_port(ds, port); } +/* Return the local port used to reach the CPU port */ +static inline unsigned int dsa_switch_upstream_port(struct dsa_switch *ds) +{ + struct dsa_port *dp; + + dsa_switch_for_each_available_port(dp, ds) { + return dsa_upstream_port(ds, dp->index); + } + + return ds->num_ports; +} + /* Return true if @upstream_ds is an upstream switch of @downstream_ds, meaning * that the routing port from @downstream_ds to @upstream_ds is also the port * which @downstream_ds uses to reach its dedicated CPU. -- cgit v1.2.3 From d352b20f4174a6bd998992329b773ab513232880 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Thu, 3 Feb 2022 11:16:56 +0100 Subject: net: dsa: mv88e6xxx: Improve multichip isolation of standalone ports Given that standalone ports are now configured to bypass the ATU and forward all frames towards the upstream port, extend the ATU bypass to multichip systems. Load VID 0 (standalone) into the VTU with the policy bit set. Since VID 4095 (bridged) is already loaded, we now know that all VIDs in use are always available in all VTUs. Therefore, we can safely enable 802.1Q on DSA ports. Setting the DSA ports' VTU policy to TRAP means that all incoming frames on VID 0 will be classified as MGMT - as a result, the ATU is bypassed on all subsequent switches. With this isolation in place, we are able to support configurations that are simultaneously very quirky and very useful. Quirky because it involves looping cables between local switchports like in this example: CPU | .------. .---0---. | .----0----. | sw0 | | | sw1 | '-1-2-3-' | '-1-2-3-4-' $ @ '---' $ @ % % We have three physically looped pairs ($, @, and %). This is very useful because it allows us to run the kernel's kselftests for the bridge on mv88e6xxx hardware. Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 6e5ef62a7dce..ca8c14b547b4 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -591,6 +591,12 @@ static inline bool dsa_is_upstream_port(struct dsa_switch *ds, int port) return port == dsa_upstream_port(ds, port); } +/* Return true if this is a DSA port leading away from the CPU */ +static inline bool dsa_is_downstream_port(struct dsa_switch *ds, int port) +{ + return dsa_is_dsa_port(ds, port) && !dsa_is_upstream_port(ds, port); +} + /* Return the local port used to reach the CPU port */ static inline unsigned int dsa_switch_upstream_port(struct dsa_switch *ds) { -- cgit v1.2.3 From e85c81ba8859a4c839bcd69c5d83b32954133a5b Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Mon, 17 Jan 2022 17:36:54 +0800 Subject: ext4: fast commit may not fallback for ineligible commit For the follow scenario: 1. jbd start commit transaction n 2. task A get new handle for transaction n+1 3. task A do some ineligible actions and mark FC_INELIGIBLE 4. jbd complete transaction n and clean FC_INELIGIBLE 5. task A call fsync In this case fast commit will not fallback to full commit and transaction n+1 also not handled by jbd. Make ext4_fc_mark_ineligible() also record transaction tid for latest ineligible case, when call ext4_fc_cleanup() check current transaction tid, if small than latest ineligible tid do not clear the EXT4_MF_FC_INELIGIBLE. Reported-by: kernel test robot Reported-by: Dan Carpenter Reported-by: Ritesh Harjani Suggested-by: Harshad Shirwadkar Signed-off-by: Xin Yin Link: https://lore.kernel.org/r/20220117093655.35160-2-yinxin.x@bytedance.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index fd933c45281a..d63b8106796e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1295,7 +1295,7 @@ struct journal_s * Clean-up after fast commit or full commit. JBD2 calls this function * after every commit operation. */ - void (*j_fc_cleanup_callback)(struct journal_s *journal, int); + void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid); /** * @j_fc_replay_callback: -- cgit v1.2.3 From 3ca40c0d329113a9f76f6aa01abe73d9f16ace9d Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Mon, 17 Jan 2022 17:41:50 +0530 Subject: jbd2: cleanup unused functions declarations from jbd2.h During code review found no references of few of these below function declarations. This patch cleans those up from jbd2.h Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/30d1fc327becda197a4136cf9cdc73d9baa3b7b9.1642416995.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d63b8106796e..afc5572e7b8a 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1419,9 +1419,7 @@ extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); extern bool __jbd2_journal_refile_buffer(struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); -extern void __journal_free_buffer(struct journal_head *bh); extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); -extern void __journal_clean_data_list(transaction_t *transaction); static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh) { list_add_tail(&bh->b_assoc_buffers, head); @@ -1486,9 +1484,6 @@ extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct buffer_head **bh_out, sector_t blocknr); -/* Transaction locking */ -extern void __wait_on_journal (journal_t *); - /* Transaction cache support */ extern void jbd2_journal_destroy_transaction_cache(void); extern int __init jbd2_journal_init_transaction_cache(void); @@ -1774,8 +1769,6 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) #define BJ_Reserved 4 /* Buffer is reserved for access by journal */ #define BJ_Types 5 -extern int jbd_blocks_per_page(struct inode *inode); - /* JBD uses a CRC32 checksum */ #define JBD_MAX_CHECKSUM_SIZE 4 -- cgit v1.2.3 From 4f98186848707f530669238d90e0562d92a78aab Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Mon, 17 Jan 2022 17:41:51 +0530 Subject: jbd2: refactor wait logic for transaction updates into a common function No functionality change as such in this patch. This only refactors the common piece of code which waits for t_updates to finish into a common function named as jbd2_journal_wait_updates(journal_t *) Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/8c564f70f4b2591171677a2a74fccb22a7b6c3a4.1642416995.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index afc5572e7b8a..9c3ada74ffb1 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -594,7 +594,7 @@ struct transaction_s */ unsigned long t_log_start; - /* + /* * Number of buffers on the t_buffers list [j_list_lock, no locks * needed for jbd2 thread] */ @@ -1538,6 +1538,8 @@ extern int jbd2_journal_flush(journal_t *journal, unsigned int flags); extern void jbd2_journal_lock_updates (journal_t *); extern void jbd2_journal_unlock_updates (journal_t *); +void jbd2_journal_wait_updates(journal_t *); + extern journal_t * jbd2_journal_init_dev(struct block_device *bdev, struct block_device *fs_dev, unsigned long long start, int len, int bsize); -- cgit v1.2.3 From bb747becf8084ebbbb8986f7927057034d5c3329 Mon Sep 17 00:00:00 2001 From: Akhil R Date: Mon, 24 Jan 2022 16:48:14 +0530 Subject: dt-bindings: Add headers for Tegra234 I2C Add dt-bindings header files for I2C controllers for Tegra234 Signed-off-by: Akhil R Signed-off-by: Thierry Reding --- include/dt-bindings/clock/tegra234-clock.h | 19 ++++++++++++++++++- include/dt-bindings/reset/tegra234-reset.h | 8 ++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/tegra234-clock.h b/include/dt-bindings/clock/tegra234-clock.h index 8d7e66e1b6ef..dc524e6e8d66 100644 --- a/include/dt-bindings/clock/tegra234-clock.h +++ b/include/dt-bindings/clock/tegra234-clock.h @@ -20,6 +20,24 @@ #define TEGRA234_CLK_EMC 31U /** @brief output of gate CLK_ENB_FUSE */ #define TEGRA234_CLK_FUSE 40U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2C1 */ +#define TEGRA234_CLK_I2C1 48U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2C2 */ +#define TEGRA234_CLK_I2C2 49U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2C3 */ +#define TEGRA234_CLK_I2C3 50U +/** output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2C4 */ +#define TEGRA234_CLK_I2C4 51U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2C6 */ +#define TEGRA234_CLK_I2C6 52U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2C7 */ +#define TEGRA234_CLK_I2C7 53U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2C8 */ +#define TEGRA234_CLK_I2C8 54U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2C9 */ +#define TEGRA234_CLK_I2C9 55U +/** @brief PLLP clk output */ +#define TEGRA234_CLK_PLLP_OUT0 102U /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_SDMMC4 */ #define TEGRA234_CLK_SDMMC4 123U /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_UARTA */ @@ -30,5 +48,4 @@ #define TEGRA234_CLK_PLLC4 237U /** @brief 32K input clock provided by PMIC */ #define TEGRA234_CLK_CLK_32K 289U - #endif diff --git a/include/dt-bindings/reset/tegra234-reset.h b/include/dt-bindings/reset/tegra234-reset.h index 50e13bced642..29632598fe11 100644 --- a/include/dt-bindings/reset/tegra234-reset.h +++ b/include/dt-bindings/reset/tegra234-reset.h @@ -10,6 +10,14 @@ * @brief Identifiers for Resets controllable by firmware * @{ */ +#define TEGRA234_RESET_I2C1 24U +#define TEGRA234_RESET_I2C2 29U +#define TEGRA234_RESET_I2C3 30U +#define TEGRA234_RESET_I2C4 31U +#define TEGRA234_RESET_I2C6 32U +#define TEGRA234_RESET_I2C7 33U +#define TEGRA234_RESET_I2C8 34U +#define TEGRA234_RESET_I2C9 35U #define TEGRA234_RESET_SDMMC4 85U #define TEGRA234_RESET_UARTA 100U -- cgit v1.2.3 From 38eb21a5fcd2ae482c8377ccb75c265037ef538f Mon Sep 17 00:00:00 2001 From: Akhil R Date: Mon, 24 Jan 2022 16:48:16 +0530 Subject: dt-bindings: Add headers for Tegra234 PWM Add dt-bindings header files for PWM of Tegra234 Signed-off-by: Akhil R Signed-off-by: Thierry Reding --- include/dt-bindings/clock/tegra234-clock.h | 16 ++++++++++++++++ include/dt-bindings/reset/tegra234-reset.h | 8 ++++++++ 2 files changed, 24 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/tegra234-clock.h b/include/dt-bindings/clock/tegra234-clock.h index dc524e6e8d66..2529e7ec0bf4 100644 --- a/include/dt-bindings/clock/tegra234-clock.h +++ b/include/dt-bindings/clock/tegra234-clock.h @@ -38,6 +38,22 @@ #define TEGRA234_CLK_I2C9 55U /** @brief PLLP clk output */ #define TEGRA234_CLK_PLLP_OUT0 102U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_PWM1 */ +#define TEGRA234_CLK_PWM1 105U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_PWM2 */ +#define TEGRA234_CLK_PWM2 106U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_PWM3 */ +#define TEGRA234_CLK_PWM3 107U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_PWM4 */ +#define TEGRA234_CLK_PWM4 108U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_PWM5 */ +#define TEGRA234_CLK_PWM5 109U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_PWM6 */ +#define TEGRA234_CLK_PWM6 110U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_PWM7 */ +#define TEGRA234_CLK_PWM7 111U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_PWM8 */ +#define TEGRA234_CLK_PWM8 112U /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_SDMMC4 */ #define TEGRA234_CLK_SDMMC4 123U /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_UARTA */ diff --git a/include/dt-bindings/reset/tegra234-reset.h b/include/dt-bindings/reset/tegra234-reset.h index 29632598fe11..ba390b86361d 100644 --- a/include/dt-bindings/reset/tegra234-reset.h +++ b/include/dt-bindings/reset/tegra234-reset.h @@ -18,6 +18,14 @@ #define TEGRA234_RESET_I2C7 33U #define TEGRA234_RESET_I2C8 34U #define TEGRA234_RESET_I2C9 35U +#define TEGRA234_RESET_PWM1 68U +#define TEGRA234_RESET_PWM2 69U +#define TEGRA234_RESET_PWM3 70U +#define TEGRA234_RESET_PWM4 71U +#define TEGRA234_RESET_PWM5 72U +#define TEGRA234_RESET_PWM6 73U +#define TEGRA234_RESET_PWM7 74U +#define TEGRA234_RESET_PWM8 75U #define TEGRA234_RESET_SDMMC4 85U #define TEGRA234_RESET_UARTA 100U -- cgit v1.2.3 From e4b1eb24ce5a696ef7229f9926ff34d7502f0582 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 27 Jan 2022 11:34:53 -0800 Subject: thermal: netlink: Add a new event to notify CPU capabilities change Add a new netlink event to notify change in CPU capabilities in terms of performance and efficiency. Firmware may change CPU capabilities as a result of thermal events in the system or to account for changes in the TDP (thermal design power) level. This notification type will allow user space to avoid running workloads on certain CPUs or proactively adjust power limits to avoid future events. The netlink message consists of a nested attribute (THERMAL_GENL_ATTR_CPU_CAPABILITY) with three attributes: * THERMAL_GENL_ATTR_CPU_CAPABILITY_ID (type u32): -- logical CPU number * THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE (type u32): -- Scaled performance from 0-1023 * THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY (type u32): -- Scaled efficiency from 0-1023 Reviewed-by: Len Brown Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/thermal.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index 9aa2fedfa309..fc78bf3aead7 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -44,7 +44,10 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_CDEV_MAX_STATE, THERMAL_GENL_ATTR_CDEV_NAME, THERMAL_GENL_ATTR_GOV_NAME, - + THERMAL_GENL_ATTR_CPU_CAPABILITY, + THERMAL_GENL_ATTR_CPU_CAPABILITY_ID, + THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE, + THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY, __THERMAL_GENL_ATTR_MAX, }; #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1) @@ -71,6 +74,7 @@ enum thermal_genl_event { THERMAL_GENL_EVENT_CDEV_DELETE, /* Cdev unbound */ THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, /* Cdev state updated */ THERMAL_GENL_EVENT_TZ_GOV_CHANGE, /* Governor policy changed */ + THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, /* CPU capability changed */ __THERMAL_GENL_EVENT_MAX, }; #define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1) -- cgit v1.2.3 From 67d6212afda218d564890d1674bab28e8612170f Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Thu, 27 Jan 2022 15:39:53 -0800 Subject: Revert "module, async: async_synchronize_full() on module init iff async is used" This reverts commit 774a1221e862b343388347bac9b318767336b20b. We need to finish all async code before the module init sequence is done. In the reverted commit the PF_USED_ASYNC flag was added to mark a thread that called async_schedule(). Then the PF_USED_ASYNC flag was used to determine whether or not async_synchronize_full() needs to be invoked. This works when modprobe thread is calling async_schedule(), but it does not work if module dispatches init code to a worker thread which then calls async_schedule(). For example, PCI driver probing is invoked from a worker thread based on a node where device is attached: if (cpu < nr_cpu_ids) error = work_on_cpu(cpu, local_pci_probe, &ddi); else error = local_pci_probe(&ddi); We end up in a situation where a worker thread gets the PF_USED_ASYNC flag set instead of the modprobe thread. As a result, async_synchronize_full() is not invoked and modprobe completes without waiting for the async code to finish. The issue was discovered while loading the pm80xx driver: (scsi_mod.scan=async) modprobe pm80xx worker ... do_init_module() ... pci_call_probe() work_on_cpu(local_pci_probe) local_pci_probe() pm8001_pci_probe() scsi_scan_host() async_schedule() worker->flags |= PF_USED_ASYNC; ... < return from worker > ... if (current->flags & PF_USED_ASYNC) <--- false async_synchronize_full(); Commit 21c3c5d28007 ("block: don't request module during elevator init") fixed the deadlock issue which the reverted commit 774a1221e862 ("module, async: async_synchronize_full() on module init iff async is used") tried to fix. Since commit 0fdff3ec6d87 ("async, kmod: warn on synchronous request_module() from async workers") synchronous module loading from async is not allowed. Given that the original deadlock issue is fixed and it is no longer allowed to call synchronous request_module() from async we can remove PF_USED_ASYNC flag to make module init consistently invoke async_synchronize_full() unless async module probe is requested. Signed-off-by: Igor Pylypiv Reviewed-by: Changyuan Lyu Reviewed-by: Luis Chamberlain Acked-by: Tejun Heo Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f5b2be39a78c..75ba8aa60248 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1680,7 +1680,6 @@ extern struct pid *cad_pid; #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ -#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ -- cgit v1.2.3 From 22f56b8e890d4e2835951b437bb6eeebfd1cb18b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 3 Feb 2022 16:01:39 -0500 Subject: XArray: Include bitmap.h from xarray.h xas_find_chunk() calls find_next_bit(), which is defined in find.h, included from bitmap.h. Inside the kernel, this isn't a problem because bitmap.h is included from cpumask.h which is dragged in (eventually) by gfp.h. When building the test-suite, that doesn't happen, so we need to include bitmap.h explicitly. Fixes: 4ade0818cf04 ("tools: sync tools/bitmap with mother linux") Reported-by: Liam Howlett Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index d6d5da6ed735..66e28bc1a023 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -9,6 +9,7 @@ * See Documentation/core-api/xarray.rst for how to use the XArray. */ +#include #include #include #include -- cgit v1.2.3 From 00edb2bac29f2e9c1674e85479fa5e3aa8cc648f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Jan 2022 10:55:44 -0800 Subject: i40e: remove enum i40e_client_state It's not used. Signed-off-by: Jakub Kicinski Reviewed-by: Jesse Brandeburg Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- include/linux/net/intel/i40e_client.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/net/intel/i40e_client.h b/include/linux/net/intel/i40e_client.h index 6b3267b49755..ed42bd5f639f 100644 --- a/include/linux/net/intel/i40e_client.h +++ b/include/linux/net/intel/i40e_client.h @@ -26,11 +26,6 @@ struct i40e_client_version { u8 rsvd; }; -enum i40e_client_state { - __I40E_CLIENT_NULL, - __I40E_CLIENT_REGISTERED -}; - enum i40e_client_instance_state { __I40E_CLIENT_INSTANCE_NONE, __I40E_CLIENT_INSTANCE_OPENED, @@ -190,11 +185,6 @@ struct i40e_client { const struct i40e_client_ops *ops; /* client ops provided by the client */ }; -static inline bool i40e_client_is_registered(struct i40e_client *client) -{ - return test_bit(__I40E_CLIENT_REGISTERED, &client->state); -} - void i40e_client_device_register(struct i40e_info *ldev, struct i40e_client *client); void i40e_client_device_unregister(struct i40e_info *ldev); -- cgit v1.2.3 From 3a99f121fe0bfa4b65ff74d9e980018caf54c2d4 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 27 Jan 2022 18:55:01 -0800 Subject: firmware: qcom: scm: Introduce pas_metadata context Starting with Qualcomm SM8450, some new security enhancements has been done in the secure world, which results in the requirement to keep the metadata segment accessible by the secure world from init_image() until auth_and_reset(). Introduce a "PAS metadata context" object that can be passed to init_image() for tracking the mapped memory and a related release function for client drivers to release the mapping once either auth_and_reset() has been invoked or in error handling paths on the way there. Signed-off-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220128025513.97188-2-bjorn.andersson@linaro.org --- include/linux/qcom_scm.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index ca4a88d7cbdc..681748619890 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -68,8 +68,16 @@ extern int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus); extern void qcom_scm_cpu_power_down(u32 flags); extern int qcom_scm_set_remote_state(u32 state, u32 id); +struct qcom_scm_pas_metadata { + void *ptr; + dma_addr_t phys; + ssize_t size; +}; + extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, - size_t size); + size_t size, + struct qcom_scm_pas_metadata *ctx); +void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, phys_addr_t size); extern int qcom_scm_pas_auth_and_reset(u32 peripheral); -- cgit v1.2.3 From 8bd42e2341a7857010001f08ee1729ced3b0e394 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 27 Jan 2022 18:55:03 -0800 Subject: soc: qcom: mdt_loader: Allow hash segment to be split out It's been observed that some firmware found in a Qualcomm SM8450 device has the hash table in a separate .bNN file. Use the newly extracted helper function to load this segment from the separate file, if it's determined that the hashes are not part of the already loaded firmware. In order to do this, the function needs access to the firmware basename and to provide more useful error messages a struct device to associate the errors with. Signed-off-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220128025513.97188-4-bjorn.andersson@linaro.org --- include/linux/soc/qcom/mdt_loader.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h index afd47217996b..46bdb7bace9a 100644 --- a/include/linux/soc/qcom/mdt_loader.h +++ b/include/linux/soc/qcom/mdt_loader.h @@ -23,7 +23,8 @@ int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw, const char *fw_name, int pas_id, void *mem_region, phys_addr_t mem_phys, size_t mem_size, phys_addr_t *reloc_base); -void *qcom_mdt_read_metadata(const struct firmware *fw, size_t *data_len); +void *qcom_mdt_read_metadata(const struct firmware *fw, size_t *data_len, + const char *fw_name, struct device *dev); #else /* !IS_ENABLED(CONFIG_QCOM_MDT_LOADER) */ @@ -51,7 +52,8 @@ static inline int qcom_mdt_load_no_init(struct device *dev, } static inline void *qcom_mdt_read_metadata(const struct firmware *fw, - size_t *data_len) + size_t *data_len, const char *fw_name, + struct device *dev) { return ERR_PTR(-ENODEV); } -- cgit v1.2.3 From 87563a043cef044fed5db7967a75741cc16ad2b1 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 3 Feb 2022 23:08:11 +0800 Subject: ax25: fix reference count leaks of ax25_dev The previous commit d01ffb9eee4a ("ax25: add refcount in ax25_dev to avoid UAF bugs") introduces refcount into ax25_dev, but there are reference leak paths in ax25_ctl_ioctl(), ax25_fwd_ioctl(), ax25_rt_add(), ax25_rt_del() and ax25_rt_opt(). This patch uses ax25_dev_put() and adjusts the position of ax25_addr_ax25dev() to fix reference cout leaks of ax25_dev. Fixes: d01ffb9eee4a ("ax25: add refcount in ax25_dev to avoid UAF bugs") Signed-off-by: Duoming Zhou Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20220203150811.42256-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski --- include/net/ax25.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ax25.h b/include/net/ax25.h index 50b417df6221..8221af1811df 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -294,10 +294,12 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) } } -#define ax25_dev_hold(__ax25_dev) \ - refcount_inc(&((__ax25_dev)->refcount)) +static inline void ax25_dev_hold(ax25_dev *ax25_dev) +{ + refcount_inc(&ax25_dev->refcount); +} -static __inline__ void ax25_dev_put(ax25_dev *ax25_dev) +static inline void ax25_dev_put(ax25_dev *ax25_dev) { if (refcount_dec_and_test(&ax25_dev->refcount)) { kfree(ax25_dev); -- cgit v1.2.3 From b794eecb2af77145d36b9c28f056e242add9c4b2 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Tue, 23 Nov 2021 10:25:36 -0800 Subject: ice: add support for DSCP QoS for IDC The ice driver provides QoS information to auxiliary drivers through the exported function ice_get_qos_params. This function doesn't currently support L3 DSCP QoS. Add the necessary defines, structure elements and code to support DSCP QoS through the IIDC functions. Signed-off-by: Dave Ertman Signed-off-by: Shiraz Saleem Signed-off-by: Tony Nguyen --- include/linux/net/intel/iidc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h index 1289593411d3..1c1332e4df26 100644 --- a/include/linux/net/intel/iidc.h +++ b/include/linux/net/intel/iidc.h @@ -32,6 +32,8 @@ enum iidc_rdma_protocol { }; #define IIDC_MAX_USER_PRIORITY 8 +#define IIDC_MAX_DSCP_MAPPING 64 +#define IIDC_DSCP_PFC_MODE 0x1 /* Struct to hold per RDMA Qset info */ struct iidc_rdma_qset_params { @@ -60,6 +62,8 @@ struct iidc_qos_params { u8 vport_relative_bw; u8 vport_priority_type; u8 num_tc; + u8 pfc_mode; + u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; }; struct iidc_event { -- cgit v1.2.3 From f4e526ff7e38e27bb87d53131d227a6fd6f73ab5 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 27 Jan 2022 18:55:08 -0800 Subject: soc: qcom: mdt_loader: Extract PAS operations Rather than passing a boolean to indicate if the PAS operations should be performed from within __mdt_load(), extract them to their own helper function. This will allow clients to invoke this directly, with some qcom_scm_pas_metadata context that they later needs to release, without further having to complicate the prototype of qcom_mdt_load(). Signed-off-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220128025513.97188-9-bjorn.andersson@linaro.org --- include/linux/soc/qcom/mdt_loader.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h index 46bdb7bace9a..9e8e60421192 100644 --- a/include/linux/soc/qcom/mdt_loader.h +++ b/include/linux/soc/qcom/mdt_loader.h @@ -10,10 +10,14 @@ struct device; struct firmware; +struct qcom_scm_pas_metadata; #if IS_ENABLED(CONFIG_QCOM_MDT_LOADER) ssize_t qcom_mdt_get_size(const struct firmware *fw); +int qcom_mdt_pas_init(struct device *dev, const struct firmware *fw, + const char *fw_name, int pas_id, phys_addr_t mem_phys, + struct qcom_scm_pas_metadata *pas_metadata_ctx); int qcom_mdt_load(struct device *dev, const struct firmware *fw, const char *fw_name, int pas_id, void *mem_region, phys_addr_t mem_phys, size_t mem_size, @@ -33,6 +37,13 @@ static inline ssize_t qcom_mdt_get_size(const struct firmware *fw) return -ENODEV; } +static inline int qcom_mdt_pas_init(struct device *dev, const struct firmware *fw, + const char *fw_name, int pas_id, phys_addr_t mem_phys, + struct qcom_scm_pas_metadata *pas_metadata_ctx) +{ + return -ENODEV; +} + static inline int qcom_mdt_load(struct device *dev, const struct firmware *fw, const char *fw_name, int pas_id, void *mem_region, phys_addr_t mem_phys, -- cgit v1.2.3 From 52beb1fc237d67cdc64277dc90047767a6fc52d7 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 1 Dec 2021 14:05:04 +0100 Subject: firmware: qcom: scm: Drop cpumask parameter from set_boot_addr() qcom_scm_set_cold/warm_boot_addr() currently take a cpumask parameter, but it's not very useful because at the end we always set the same entry address for all CPUs. This also allows speeding up probe of cpuidle-qcom-spm a bit because only one SCM call needs to be made to the TrustZone firmware, instead of one per CPU. The main reason for this change is that it allows implementing the "multi-cluster" variant of the set_boot_addr() call more easily without having to rely on functions that break in certain build configurations or that are not exported to modules. Signed-off-by: Stephan Gerhold Acked-by: Daniel Lezcano Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211201130505.257379-4-stephan@gerhold.net --- include/linux/qcom_scm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 681748619890..f8335644a01a 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -63,8 +63,8 @@ enum qcom_scm_ice_cipher { extern bool qcom_scm_is_available(void); -extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus); -extern int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus); +extern int qcom_scm_set_cold_boot_addr(void *entry); +extern int qcom_scm_set_warm_boot_addr(void *entry); extern void qcom_scm_cpu_power_down(u32 flags); extern int qcom_scm_set_remote_state(u32 state, u32 id); -- cgit v1.2.3 From 2651bf680bc2ad9a078b7222b0873145ab4ece07 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 3 Feb 2022 11:28:25 -0800 Subject: block: introduce BLK_STS_OFFLINE Currently, drivers reports BLK_STS_IOERR for devices that are not full online or being removed. This behavior could cause confusion for users, as they are not really I/O errors from the device. Solve this issue with a new state BLK_STS_OFFLINE, which reports "device offline error" in dmesg instead of "I/O error". EIO is intentionally kept to not change user visible return value. Signed-off-by: Song Liu Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220203192827.1370270-2-song@kernel.org Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index fe065c394fff..5561e58d158a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -153,6 +153,13 @@ typedef u8 __bitwise blk_status_t; */ #define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16) +/* + * BLK_STS_OFFLINE is returned from the driver when the target device is offline + * or is being taken offline. This could help differentiate the case where a + * device is intentionally being shut down from a real I/O error. + */ +#define BLK_STS_OFFLINE ((__force blk_status_t)17) + /** * blk_path_error - returns true if error may be path related * @error: status the request was completed with -- cgit v1.2.3 From d1ca60efc53d665cf89ed847a14a510a81770b81 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 2 Feb 2022 12:00:56 +0100 Subject: netfilter: ctnetlink: disable helper autoassign When userspace, e.g. conntrackd, inserts an entry with a specified helper, its possible that the helper is lost immediately after its added: ctnetlink_create_conntrack -> nf_ct_helper_ext_add + assign helper -> ctnetlink_setup_nat -> ctnetlink_parse_nat_setup -> parse_nat_setup -> nfnetlink_parse_nat_setup -> nf_nat_setup_info -> nf_conntrack_alter_reply -> __nf_ct_try_assign_helper ... and __nf_ct_try_assign_helper will zero the helper again. Set IPS_HELPER bit to bypass auto-assign logic, its unwanted, just like when helper is assigned via ruleset. Dropped old 'not strictly necessary' comment, it referred to use of rcu_assign_pointer() before it got replaced by RCU_INIT_POINTER(). NB: Fixes tag intentionally incorrect, this extends the referenced commit, but this change won't build without IPS_HELPER introduced there. Fixes: 6714cf5465d280 ("netfilter: nf_conntrack: fix explicit helper attachment and NAT") Reported-by: Pham Thanh Tuyen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 4b3395082d15..26071021e986 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -106,7 +106,7 @@ enum ip_conntrack_status { IPS_NAT_CLASH = IPS_UNTRACKED, #endif - /* Conntrack got a helper explicitly attached via CT target. */ + /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */ IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), -- cgit v1.2.3 From 8b5413647262dda8d8d0e07e14ea1de9ac7cf0b2 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 17 Jan 2022 21:56:13 +0100 Subject: netfilter: nfqueue: enable to get skb->priority This info could be useful to improve traffic analysis. Signed-off-by: Nicolas Dichtel Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_queue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index aed90c4df0c8..ef7c97f21a15 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -61,6 +61,7 @@ enum nfqnl_attr_type { NFQA_SECCTX, /* security context string */ NFQA_VLAN, /* nested attribute: packet vlan info */ NFQA_L2HDR, /* full L2 header */ + NFQA_PRIORITY, /* skb->priority */ __NFQA_MAX }; -- cgit v1.2.3 From bb62a765b1b5597d32a426096aa78d2a8eb6b091 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 20 Jan 2022 13:06:59 +0100 Subject: netfilter: conntrack: make all extensions 8-byte alignned All extensions except one need 8 byte alignment, so just make that the default. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_extend.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index c7515d82ab06..705a4487f023 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -49,7 +49,7 @@ enum nf_ct_ext_id { struct nf_ct_ext { u8 offset[NF_CT_EXT_NUM]; u8 len; - char data[]; + char data[] __aligned(8); }; static inline bool __nf_ct_ext_exist(const struct nf_ct_ext *ext, u8 id) @@ -83,10 +83,7 @@ struct nf_ct_ext_type { void (*destroy)(struct nf_conn *ct); enum nf_ct_ext_id id; - - /* Length and min alignment. */ u8 len; - u8 align; }; int nf_ct_extend_register(const struct nf_ct_ext_type *type); -- cgit v1.2.3 From 5f31edc0676b55cd6baf5ba119d1881f3fbadee1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 20 Jan 2022 13:07:00 +0100 Subject: netfilter: conntrack: move extension sizes into core No need to specify this in the registration modules, we already collect all sizes for build-time checks on the maximum combined size. After this change, all extensions except nat have no meaningful content in their nf_ct_ext_type struct definition. Next patch handles nat, this will then allow to remove the dynamic register api completely. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_extend.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 705a4487f023..87d818414092 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -83,7 +83,6 @@ struct nf_ct_ext_type { void (*destroy)(struct nf_conn *ct); enum nf_ct_ext_id id; - u8 len; }; int nf_ct_extend_register(const struct nf_ct_ext_type *type); -- cgit v1.2.3 From 1bc91a5ddf3eaea0e0ea957cccf3abdcfcace00e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 20 Jan 2022 13:07:01 +0100 Subject: netfilter: conntrack: handle ->destroy hook via nat_ops instead The nat module already exposes a few functions to the conntrack core. Move the nat extension destroy hook to it. After this, no conntrack extension needs a destroy hook. 'struct nf_ct_ext_type' and the register/unregister api can be removed in a followup patch. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 1 + include/net/netfilter/nf_conntrack_extend.h | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 15e71bfff726..c2c6f332fb90 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -379,6 +379,7 @@ struct nf_nat_hook { unsigned int (*manip_pkt)(struct sk_buff *skb, struct nf_conn *ct, enum nf_nat_manip_type mtype, enum ip_conntrack_dir dir); + void (*remove_nat_bysrc)(struct nf_conn *ct); }; extern const struct nf_nat_hook __rcu *nf_nat_hook; diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 87d818414092..343f9194423a 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -79,9 +79,6 @@ void nf_ct_ext_destroy(struct nf_conn *ct); void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); struct nf_ct_ext_type { - /* Destroys relationships (can be NULL). */ - void (*destroy)(struct nf_conn *ct); - enum nf_ct_ext_id id; }; -- cgit v1.2.3 From 1015c3de23eedb8ac5a163165434484df44cfe00 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 20 Jan 2022 13:07:02 +0100 Subject: netfilter: conntrack: remove extension register api These no longer register/unregister a meaningful structure so remove it. Cc: Paul Blakey Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_acct.h | 1 - include/net/netfilter/nf_conntrack_ecache.h | 13 ------------- include/net/netfilter/nf_conntrack_extend.h | 9 --------- include/net/netfilter/nf_conntrack_labels.h | 3 --- include/net/netfilter/nf_conntrack_seqadj.h | 3 --- include/net/netfilter/nf_conntrack_timeout.h | 12 ------------ include/net/netfilter/nf_conntrack_timestamp.h | 13 ------------- 7 files changed, 54 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h index 7f44a771530e..4b2b7f8914ea 100644 --- a/include/net/netfilter/nf_conntrack_acct.h +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -78,7 +78,6 @@ static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir, void nf_conntrack_acct_pernet_init(struct net *net); -int nf_conntrack_acct_init(void); void nf_conntrack_acct_fini(void); #endif /* _NF_CONNTRACK_ACCT_H */ diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index d932e22edcb4..16bcff809b18 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -166,9 +166,6 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state); void nf_conntrack_ecache_pernet_init(struct net *net); void nf_conntrack_ecache_pernet_fini(struct net *net); -int nf_conntrack_ecache_init(void); -void nf_conntrack_ecache_fini(void); - static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return net->ct.ecache_dwork_pending; @@ -194,16 +191,6 @@ static inline void nf_conntrack_ecache_pernet_init(struct net *net) static inline void nf_conntrack_ecache_pernet_fini(struct net *net) { } - -static inline int nf_conntrack_ecache_init(void) -{ - return 0; -} - -static inline void nf_conntrack_ecache_fini(void) -{ -} - static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return false; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ #endif /*_NF_CONNTRACK_ECACHE_H*/ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 343f9194423a..96635ad2acc7 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -72,16 +72,7 @@ static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id) #define nf_ct_ext_find(ext, id) \ ((id##_TYPE *)__nf_ct_ext_find((ext), (id))) -/* Destroy all relationships */ -void nf_ct_ext_destroy(struct nf_conn *ct); - /* Add this type, returns pointer to data or NULL. */ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); -struct nf_ct_ext_type { - enum nf_ct_ext_id id; -}; - -int nf_ct_extend_register(const struct nf_ct_ext_type *type); -void nf_ct_extend_unregister(const struct nf_ct_ext_type *type); #endif /* _NF_CONNTRACK_EXTEND_H */ diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index ba916411c4e1..3c23298e68ca 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -45,12 +45,9 @@ int nf_connlabels_replace(struct nf_conn *ct, #ifdef CONFIG_NF_CONNTRACK_LABELS int nf_conntrack_labels_init(void); -void nf_conntrack_labels_fini(void); int nf_connlabels_get(struct net *net, unsigned int bit); void nf_connlabels_put(struct net *net); #else -static inline int nf_conntrack_labels_init(void) { return 0; } -static inline void nf_conntrack_labels_fini(void) {} static inline int nf_connlabels_get(struct net *net, unsigned int bit) { return 0; } static inline void nf_connlabels_put(struct net *net) {} #endif diff --git a/include/net/netfilter/nf_conntrack_seqadj.h b/include/net/netfilter/nf_conntrack_seqadj.h index 0a10b50537ae..883c414b768e 100644 --- a/include/net/netfilter/nf_conntrack_seqadj.h +++ b/include/net/netfilter/nf_conntrack_seqadj.h @@ -42,7 +42,4 @@ int nf_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff); s32 nf_ct_seq_offset(const struct nf_conn *ct, enum ip_conntrack_dir, u32 seq); -int nf_conntrack_seqadj_init(void); -void nf_conntrack_seqadj_fini(void); - #endif /* _NF_CONNTRACK_SEQADJ_H */ diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 659b0ea25b4d..db507e4a65bb 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -89,23 +89,11 @@ static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct) } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -int nf_conntrack_timeout_init(void); -void nf_conntrack_timeout_fini(void); void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout); int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, const char *timeout_name); void nf_ct_destroy_timeout(struct nf_conn *ct); #else -static inline int nf_conntrack_timeout_init(void) -{ - return 0; -} - -static inline void nf_conntrack_timeout_fini(void) -{ - return; -} - static inline int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, const char *timeout_name) diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h index 820ea34b6029..57138d974a9f 100644 --- a/include/net/netfilter/nf_conntrack_timestamp.h +++ b/include/net/netfilter/nf_conntrack_timestamp.h @@ -40,21 +40,8 @@ struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp) #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP void nf_conntrack_tstamp_pernet_init(struct net *net); - -int nf_conntrack_tstamp_init(void); -void nf_conntrack_tstamp_fini(void); #else static inline void nf_conntrack_tstamp_pernet_init(struct net *net) {} - -static inline int nf_conntrack_tstamp_init(void) -{ - return 0; -} - -static inline void nf_conntrack_tstamp_fini(void) -{ - return; -} #endif /* CONFIG_NF_CONNTRACK_TIMESTAMP */ #endif /* _NF_CONNTRACK_TSTAMP_H */ -- cgit v1.2.3 From 20ff32024624102596f2b4083a17a97ca71d6cd8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 20 Jan 2022 16:09:13 +0100 Subject: netfilter: conntrack: pptp: use single option structure Instead of exposing the four hooks individually use a sinle hook ops structure. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_pptp.h | 38 +++++++++++++---------------- 1 file changed, 17 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h index a28aa289afdc..c3bdb4370938 100644 --- a/include/linux/netfilter/nf_conntrack_pptp.h +++ b/include/linux/netfilter/nf_conntrack_pptp.h @@ -300,26 +300,22 @@ union pptp_ctrl_union { struct PptpSetLinkInfo setlink; }; -extern int -(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb, - struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned int protoff, - struct PptpControlHeader *ctlh, - union pptp_ctrl_union *pptpReq); - -extern int -(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb, - struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned int protoff, - struct PptpControlHeader *ctlh, - union pptp_ctrl_union *pptpReq); - -extern void -(*nf_nat_pptp_hook_exp_gre)(struct nf_conntrack_expect *exp_orig, - struct nf_conntrack_expect *exp_reply); - -extern void -(*nf_nat_pptp_hook_expectfn)(struct nf_conn *ct, - struct nf_conntrack_expect *exp); +struct nf_nat_pptp_hook { + int (*outbound)(struct sk_buff *skb, + struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + int (*inbound)(struct sk_buff *skb, + struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + void (*exp_gre)(struct nf_conntrack_expect *exp_orig, + struct nf_conntrack_expect *exp_reply); + void (*expectfn)(struct nf_conn *ct, + struct nf_conntrack_expect *exp); +}; +extern const struct nf_nat_pptp_hook __rcu *nf_nat_pptp_hook; #endif /* _NF_CONNTRACK_PPTP_H */ -- cgit v1.2.3 From ac9f0c810684a1b161c18eb4b91ce84cbc13c91d Mon Sep 17 00:00:00 2001 From: Anton Lundin Date: Thu, 3 Feb 2022 10:41:35 +0100 Subject: ata: libata-core: Introduce ATA_HORKAGE_NO_LOG_DIR horkage 06f6c4c6c3e8 ("ata: libata: add missing ata_identify_page_supported() calls") introduced additional calls to ata_identify_page_supported(), thus also adding indirectly accesses to the device log directory log page through ata_log_supported(). Reading this log page causes SATADOM-ML 3ME devices to lock up. Introduce the horkage flag ATA_HORKAGE_NO_LOG_DIR to prevent accesses to the log directory in ata_log_supported() and add a blacklist entry with this flag for "SATADOM-ML 3ME" devices. Fixes: 636f6e2af4fb ("libata: add horkage for missing Identify Device log") Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Anton Lundin Signed-off-by: Damien Le Moal --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 605756f645be..7f99b4d78822 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -380,6 +380,7 @@ enum { ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28), /* Identify device log missing */ + ATA_HORKAGE_NO_LOG_DIR = (1 << 29), /* Do not read log directory */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From b93235e68921b9acd38ee309953a3a9808105289 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Feb 2022 14:20:31 -0800 Subject: tls: cap the output scatter list to something reasonable TLS recvmsg() passes user pages as destination for decrypt. The decrypt operation is repeated record by record, each record being 16kB, max. TLS allocates an sg_table and uses iov_iter_get_pages() to populate it with enough pages to fit the decrypted record. Even though we decrypt a single message at a time we size the sg_table based on the entire length of the iovec. This leads to unnecessarily large allocations, risking triggering OOM conditions. Use iov_iter_truncate() / iov_iter_reexpand() to construct a "capped" version of iov_iter_npages(). Alternatively we could parametrize iov_iter_npages() to take the size as arg instead of using i->count, or do something else.. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/uio.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/uio.h b/include/linux/uio.h index 1198a2bfc9bf..739285fe5a2f 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -273,6 +273,23 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) i->count = count; } +static inline int +iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes) +{ + size_t shorted = 0; + int npages; + + if (iov_iter_count(i) > max_bytes) { + shorted = iov_iter_count(i) - max_bytes; + iov_iter_truncate(i, max_bytes); + } + npages = iov_iter_npages(i, INT_MAX); + if (shorted) + iov_iter_reexpand(i, iov_iter_count(i) + shorted); + + return npages; +} + struct csum_state { __wsum csum; size_t off; -- cgit v1.2.3 From 56b4b5abcdab6daf71c5536fca2772f178590e06 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:01:06 +0100 Subject: block: clone crypto and integrity data in __bio_clone_fast __bio_clone_fast should also clone integrity and crypto data, as a clone without those is incomplete. Right now the only caller that can actually support crypto and integrity data (dm) does it manually for the one callchain that supports these, but we better do it properly in the core. Note that all callers except for the above mentioned one also don't need to handle failure at all, given that the integrity and crypto clones are based on mempool allocations that won't fail for sleeping allocations. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-11-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 18cfe5bb41ea..b814361c957b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -413,7 +413,7 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); extern void bio_put(struct bio *); -extern void __bio_clone_fast(struct bio *, struct bio *); +int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp); extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio_set fs_bio_set; -- cgit v1.2.3 From abfc426d1b2fb2176df59851a64223b58ddae7e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:01:09 +0100 Subject: block: pass a block_device to bio_clone_fast Pass a block_device to bio_clone_fast and __bio_clone_fast and give the functions more suitable names. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-14-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index b814361c957b..7523aba4ddf7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -413,8 +413,10 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); extern void bio_put(struct bio *); -int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp); -extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); +struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src, + gfp_t gfp, struct bio_set *bs); +int bio_init_clone(struct block_device *bdev, struct bio *bio, + struct bio *bio_src, gfp_t gfp); extern struct bio_set fs_bio_set; -- cgit v1.2.3 From 916acbf6b4b9262df7de1d2b6208a4efa209a88f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 3 Feb 2022 16:45:21 +0200 Subject: serial: core: Fix the definition name in the comment of UPF_* flags From day 1 the UPF_LAST_USER wasn't defined, a specific number of the last bit for userspace. Instead the code always relies on ASYNCB_LAST_USER. Fix comment accordingly. Fixes: 904326ecac02 ("tty,serial: Unify UPF_* and ASYNC_* flag definitions") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220203144521.16457-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index c58cc142d23f..31f7fe527395 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -171,7 +171,7 @@ struct uart_port { * assigned from the serial_struct flags in uart_set_info() * [for bit definitions in the UPF_CHANGE_MASK] * - * Bits [0..UPF_LAST_USER] are userspace defined/visible/changeable + * Bits [0..ASYNCB_LAST_USER] are userspace defined/visible/changeable * The remaining bits are serial-core specific and not modifiable by * userspace. */ -- cgit v1.2.3 From 84564481bc4520c47e7fe9c594c0523d81e6a97a Mon Sep 17 00:00:00 2001 From: Aswath Govindraju Date: Fri, 7 Jan 2022 08:44:25 +0100 Subject: mux: Add support for reading mux state from consumer DT node In some cases, we might need to provide the state of the mux to be set for the operation of a given peripheral. Therefore, pass this information using mux-states property. Link: https://lore.kernel.org/lkml/20211123081222.27979-1-a-govindraju@ti.com Signed-off-by: Aswath Govindraju Signed-off-by: Peter Rosin (minor edits) Link: https://lore.kernel.org/r/aac25be8-9515-a980-f7cb-709938c84822@axentia.se Signed-off-by: Greg Kroah-Hartman --- include/linux/mux/consumer.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/mux/consumer.h b/include/linux/mux/consumer.h index 7a09b040ac39..2e25c838f831 100644 --- a/include/linux/mux/consumer.h +++ b/include/linux/mux/consumer.h @@ -14,14 +14,19 @@ struct device; struct mux_control; +struct mux_state; unsigned int mux_control_states(struct mux_control *mux); int __must_check mux_control_select_delay(struct mux_control *mux, unsigned int state, unsigned int delay_us); +int __must_check mux_state_select_delay(struct mux_state *mstate, + unsigned int delay_us); int __must_check mux_control_try_select_delay(struct mux_control *mux, unsigned int state, unsigned int delay_us); +int __must_check mux_state_try_select_delay(struct mux_state *mstate, + unsigned int delay_us); static inline int __must_check mux_control_select(struct mux_control *mux, unsigned int state) @@ -29,18 +34,31 @@ static inline int __must_check mux_control_select(struct mux_control *mux, return mux_control_select_delay(mux, state, 0); } +static inline int __must_check mux_state_select(struct mux_state *mstate) +{ + return mux_state_select_delay(mstate, 0); +} + static inline int __must_check mux_control_try_select(struct mux_control *mux, unsigned int state) { return mux_control_try_select_delay(mux, state, 0); } +static inline int __must_check mux_state_try_select(struct mux_state *mstate) +{ + return mux_state_try_select_delay(mstate, 0); +} + int mux_control_deselect(struct mux_control *mux); +int mux_state_deselect(struct mux_state *mstate); struct mux_control *mux_control_get(struct device *dev, const char *mux_name); void mux_control_put(struct mux_control *mux); struct mux_control *devm_mux_control_get(struct device *dev, const char *mux_name); +struct mux_state *devm_mux_state_get(struct device *dev, + const char *mux_name); #endif /* _LINUX_MUX_CONSUMER_H */ -- cgit v1.2.3 From bb6e8c28414335a551da5973d44cc537f7abe65a Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 12 Jan 2022 08:00:53 -0800 Subject: firmware_loader: simplfy builtin or module check The existing check is outdated and confuses developers. Use the already existing IS_REACHABLE() defined on kconfig.h which makes the intention much clearer. Cc: Randy Dunlap Cc: Masahiro Yamada Reported-by: Borislav Petkov Reported-by: Greg Kroah-Hartman Suggested-by: Masahiro Yamada Signed-off-by: Luis Chamberlain Ackd-by: Randy Dunlap Link: https://lore.kernel.org/r/20220112160053.723795-1-mcgrof@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 3b057dfc8284..ec2ccfebef65 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -34,7 +34,7 @@ static inline bool firmware_request_builtin(struct firmware *fw, } #endif -#if defined(CONFIG_FW_LOADER) || (defined(CONFIG_FW_LOADER_MODULE) && defined(MODULE)) +#if IS_REACHABLE(CONFIG_FW_LOADER) int request_firmware(const struct firmware **fw, const char *name, struct device *device); int firmware_request_nowarn(const struct firmware **fw, const char *name, -- cgit v1.2.3 From bed89478934a9803d1a4d97574dcc30e509aa972 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 2 Feb 2022 10:49:38 +0200 Subject: ieee80211: fix -Wcast-qual warnings When enabling -Wcast-qual e.g. via W=3, we get a lot of warnings from this file, whenever it's included. Since the fixes are simple, just do that. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20220202104617.79ec4a4bab29.I8177a0c79d656c552e22c88931d8da06f2977896@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 559b6c644938..60ee7b3f58e7 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2427,7 +2427,7 @@ struct ieee80211_tx_pwr_env { static inline u8 ieee80211_he_oper_size(const u8 *he_oper_ie) { - struct ieee80211_he_operation *he_oper = (void *)he_oper_ie; + const struct ieee80211_he_operation *he_oper = (const void *)he_oper_ie; u8 oper_len = sizeof(struct ieee80211_he_operation); u32 he_oper_params; @@ -2460,7 +2460,7 @@ ieee80211_he_oper_size(const u8 *he_oper_ie) static inline const struct ieee80211_he_6ghz_oper * ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper) { - const u8 *ret = (void *)&he_oper->optional; + const u8 *ret = (const void *)&he_oper->optional; u32 he_oper_params; if (!he_oper) @@ -2475,7 +2475,7 @@ ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper) if (he_oper_params & IEEE80211_HE_OPERATION_CO_HOSTED_BSS) ret++; - return (void *)ret; + return (const void *)ret; } /* HE Spatial Reuse defines */ @@ -2496,7 +2496,7 @@ ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper) static inline u8 ieee80211_he_spr_size(const u8 *he_spr_ie) { - struct ieee80211_he_spr *he_spr = (void *)he_spr_ie; + const struct ieee80211_he_spr *he_spr = (const void *)he_spr_ie; u8 spr_len = sizeof(struct ieee80211_he_spr); u8 he_spr_params; -- cgit v1.2.3 From 7e367b06f16b3d22e884af55117601d2a73fca03 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 2 Feb 2022 10:49:39 +0200 Subject: cfg80211: fix -Wcast-qual warnings When enabling -Wcast-qual e.g. via W=3, we get a lot of warnings from this file, whenever it's included. Since the fixes are simple, just do that. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20220202104617.6a1d52213019.I92d82e7251cf712faa43fd09db3142327a3bce3d@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d19e48f9fdc6..f6db085ff3df 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2604,7 +2604,7 @@ const struct element *ieee80211_bss_get_elem(struct cfg80211_bss *bss, u8 id); */ static inline const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 id) { - return (void *)ieee80211_bss_get_elem(bss, id); + return (const void *)ieee80211_bss_get_elem(bss, id); } @@ -5970,9 +5970,9 @@ cfg80211_find_ie_match(u8 eid, const u8 *ies, unsigned int len, (!match_len && match_offset))) return NULL; - return (void *)cfg80211_find_elem_match(eid, ies, len, - match, match_len, - match_offset ? + return (const void *)cfg80211_find_elem_match(eid, ies, len, + match, match_len, + match_offset ? match_offset - 2 : 0); } @@ -6099,7 +6099,7 @@ static inline const u8 * cfg80211_find_vendor_ie(unsigned int oui, int oui_type, const u8 *ies, unsigned int len) { - return (void *)cfg80211_find_vendor_elem(oui, oui_type, ies, len); + return (const void *)cfg80211_find_vendor_elem(oui, oui_type, ies, len); } /** -- cgit v1.2.3 From 5beb53d6ba4f39743d057e756db22bd8c079fc21 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 2 Feb 2022 10:49:40 +0200 Subject: ieee80211: radiotap: fix -Wcast-qual warnings When enabling -Wcast-qual e.g. via W=3, we get a lot of warnings from this file, whenever it's included. Since the fixes are simple, just do that. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20220202104617.cc733aeb1a18.I03396e1bf7a1af364cbd0916037f65d800035039@changeid Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 11630351c978..598f53d2a3a0 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2017 Intel Deutschland GmbH - * Copyright (c) 2018-2019 Intel Corporation + * Copyright (c) 2018-2019, 2021 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -365,7 +365,7 @@ enum ieee80211_radiotap_zero_len_psdu_type { */ static inline u16 ieee80211_get_radiotap_len(const char *data) { - struct ieee80211_radiotap_header *hdr = (void *)data; + const struct ieee80211_radiotap_header *hdr = (const void *)data; return get_unaligned_le16(&hdr->it_len); } -- cgit v1.2.3 From ea5907db2a9ccf37fdb6d1e67bcb620c1fea10f8 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Wed, 2 Feb 2022 10:49:47 +0200 Subject: mac80211: fix struct ieee80211_tx_info size The size of the status_driver_data field was not adjusted when the is_valid_ack_signal field was added. Since the size of struct ieee80211_tx_info is limited, replace the is_valid_ack_signal field with a flags field, and adjust the struct size accordingly. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20220202104617.0ff363d4fa56.I45792c0187034a6d0e1c99a7db741996ef7caba3@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c50221d7e82c..bd6912d0292b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7,7 +7,7 @@ * Copyright 2007-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2021 Intel Corporation + * Copyright (C) 2018 - 2022 Intel Corporation */ #ifndef MAC80211_H @@ -883,6 +883,17 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_DONT_REORDER = BIT(8), }; +/** + * enum mac80211_tx_status_flags - flags to describe transmit status + * + * @IEEE80211_TX_STATUS_ACK_SIGNAL_VALID: ACK signal is valid + * + * These flags are used in tx_info->status.flags. + */ +enum mac80211_tx_status_flags { + IEEE80211_TX_STATUS_ACK_SIGNAL_VALID = BIT(0), +}; + /* * This definition is used as a mask to clear all temporary flags, which are * set by the tx handlers for each transmission attempt by the mac80211 stack. @@ -1046,7 +1057,7 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) * @status.antenna: (legacy, kept only for iwlegacy) * @status.tx_time: airtime consumed for transmission; note this is only * used for WMM AC, not for airtime fairness - * @status.is_valid_ack_signal: ACK signal is valid + * @status.flags: status flags, see &enum mac80211_tx_status_flags * @status.status_driver_data: driver use area * @ack: union part for pure ACK data * @ack.cookie: cookie for the ACK @@ -1099,8 +1110,8 @@ struct ieee80211_tx_info { u8 ampdu_len; u8 antenna; u16 tx_time; - bool is_valid_ack_signal; - void *status_driver_data[19 / sizeof(void *)]; + u8 flags; + void *status_driver_data[18 / sizeof(void *)]; } status; struct { struct ieee80211_tx_rate driver_rates[ -- cgit v1.2.3 From b9794a822281944ef3de5b1812a94cbdb8134320 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 28 Jan 2022 17:35:33 +0100 Subject: powercap/drivers/dtpm: Convert the init table section to a simple array The init table section is freed after the system booted. However the next changes will make per module the DTPM description, so the table won't be accessible when the module is loaded. In order to fix that, we should move the table to the data section where there are very few entries and that makes strange to add it there. The main goal of the table was to keep self-encapsulated code and we can keep it almost as it by using an array instead. Suggested-by: Ulf Hansson Reviewed-by: Ulf Hansson Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20220128163537.212248-2-daniel.lezcano@linaro.org --- include/asm-generic/vmlinux.lds.h | 11 ----------- include/linux/dtpm.h | 24 +++--------------------- 2 files changed, 3 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 42f3866bca69..2a10db2f0bc5 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -321,16 +321,6 @@ #define THERMAL_TABLE(name) #endif -#ifdef CONFIG_DTPM -#define DTPM_TABLE() \ - . = ALIGN(8); \ - __dtpm_table = .; \ - KEEP(*(__dtpm_table)) \ - __dtpm_table_end = .; -#else -#define DTPM_TABLE() -#endif - #define KERNEL_DTB() \ STRUCT_ALIGN(); \ __dtb_start = .; \ @@ -723,7 +713,6 @@ ACPI_PROBE_TABLE(irqchip) \ ACPI_PROBE_TABLE(timer) \ THERMAL_TABLE(governor) \ - DTPM_TABLE() \ EARLYCON_TABLE() \ LSM_TABLE() \ EARLY_LSM_TABLE() \ diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index d37e5d06a357..506048158a50 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -32,29 +32,11 @@ struct dtpm_ops { void (*release)(struct dtpm *); }; -typedef int (*dtpm_init_t)(void); - -struct dtpm_descr { - dtpm_init_t init; +struct dtpm_subsys_ops { + const char *name; + int (*init)(void); }; -/* Init section thermal table */ -extern struct dtpm_descr __dtpm_table[]; -extern struct dtpm_descr __dtpm_table_end[]; - -#define DTPM_TABLE_ENTRY(name, __init) \ - static struct dtpm_descr __dtpm_table_entry_##name \ - __used __section("__dtpm_table") = { \ - .init = __init, \ - } - -#define DTPM_DECLARE(name, init) DTPM_TABLE_ENTRY(name, init) - -#define for_each_dtpm_table(__dtpm) \ - for (__dtpm = __dtpm_table; \ - __dtpm < __dtpm_table_end; \ - __dtpm++) - static inline struct dtpm *to_dtpm(struct powercap_zone *zone) { return container_of(zone, struct dtpm, zone); -- cgit v1.2.3 From 3759ec678e8944dc2ea70cab77a300408f78ae27 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 28 Jan 2022 17:35:34 +0100 Subject: powercap/drivers/dtpm: Add hierarchy creation The DTPM framework is available but without a way to configure it. This change provides a way to create a hierarchy of DTPM node where the power consumption reflects the sum of the children's power consumption. It is up to the platform to specify an array of dtpm nodes where each element has a pointer to its parent, except the top most one. The type of the node gives the indication of which initialization callback to call. At this time, we can create a virtual node, where its purpose is to be a parent in the hierarchy, and a DT node where the name describes its path. In order to ensure a nice self-encapsulation, the DTPM subsys array contains a couple of initialization functions, one to setup the DTPM backend and one to initialize it up. With this approach, the DTPM framework has a very few material to export. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220128163537.212248-3-daniel.lezcano@linaro.org --- include/linux/dtpm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index 506048158a50..f7a25c70dd4c 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -32,9 +32,23 @@ struct dtpm_ops { void (*release)(struct dtpm *); }; +struct device_node; + struct dtpm_subsys_ops { const char *name; int (*init)(void); + int (*setup)(struct dtpm *, struct device_node *); +}; + +enum DTPM_NODE_TYPE { + DTPM_NODE_VIRTUAL = 0, + DTPM_NODE_DT, +}; + +struct dtpm_node { + enum DTPM_NODE_TYPE type; + const char *name; + struct dtpm_node *parent; }; static inline struct dtpm *to_dtpm(struct powercap_zone *zone) @@ -52,4 +66,5 @@ void dtpm_unregister(struct dtpm *dtpm); int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent); +int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table); #endif -- cgit v1.2.3 From 80110bbfbba6f0078d5a1cbc8df004506db8ffe5 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 3 Feb 2022 20:49:24 -0800 Subject: mm/page_table_check: check entries at pmd levels syzbot detected a case where the page table counters were not properly updated. syzkaller login: ------------[ cut here ]------------ kernel BUG at mm/page_table_check.c:162! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 3099 Comm: pasha Not tainted 5.16.0+ #48 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIO4 RIP: 0010:__page_table_check_zero+0x159/0x1a0 Call Trace: free_pcp_prepare+0x3be/0xaa0 free_unref_page+0x1c/0x650 free_compound_page+0xec/0x130 free_transhuge_page+0x1be/0x260 __put_compound_page+0x90/0xd0 release_pages+0x54c/0x1060 __pagevec_release+0x7c/0x110 shmem_undo_range+0x85e/0x1250 ... The repro involved having a huge page that is split due to uprobe event temporarily replacing one of the pages in the huge page. Later the huge page was combined again, but the counters were off, as the PTE level was not properly updated. Make sure that when PMD is cleared and prior to freeing the level the PTEs are updated. Link: https://lkml.kernel.org/r/20220131203249.2832273-5-pasha.tatashin@soleen.com Fixes: df4e817b7108 ("mm: page table check") Signed-off-by: Pasha Tatashin Acked-by: David Rientjes Cc: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Dave Hansen Cc: Greg Thelen Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jiri Slaby Cc: Mike Rapoport Cc: Muchun Song Cc: Paul Turner Cc: Wei Xu Cc: Will Deacon Cc: Zi Yan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_table_check.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h index 38cace1da7b6..01e16c7696ec 100644 --- a/include/linux/page_table_check.h +++ b/include/linux/page_table_check.h @@ -26,6 +26,9 @@ void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud); +void __page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd); static inline void page_table_check_alloc(struct page *page, unsigned int order) { @@ -100,6 +103,16 @@ static inline void page_table_check_pud_set(struct mm_struct *mm, __page_table_check_pud_set(mm, addr, pudp, pud); } +static inline void page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pte_clear_range(mm, addr, pmd); +} + #else static inline void page_table_check_alloc(struct page *page, unsigned int order) @@ -143,5 +156,11 @@ static inline void page_table_check_pud_set(struct mm_struct *mm, { } +static inline void page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd) +{ +} + #endif /* CONFIG_PAGE_TABLE_CHECK */ #endif /* __LINUX_PAGE_TABLE_CHECK_H */ -- cgit v1.2.3 From 314c459a6fe0957b5885fbc65c53d51444092880 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 3 Feb 2022 20:49:29 -0800 Subject: mm/pgtable: define pte_index so that preprocessor could recognize it Since commit 974b9b2c68f3 ("mm: consolidate pte_index() and pte_offset_*() definitions") pte_index is a static inline and there is no define for it that can be recognized by the preprocessor. As a result, vm_insert_pages() uses slower loop over vm_insert_page() instead of insert_pages() that amortizes the cost of spinlock operations when inserting multiple pages. Link: https://lkml.kernel.org/r/20220111145457.20748-1-rppt@kernel.org Fixes: 974b9b2c68f3 ("mm: consolidate pte_index() and pte_offset_*() definitions") Signed-off-by: Mike Rapoport Reported-by: Christian Dietrich Reviewed-by: Khalid Aziz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pgtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index bc8713a76e03..f4f4077b97aa 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -62,6 +62,7 @@ static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); } +#define pte_index pte_index #ifndef pmd_index static inline unsigned long pmd_index(unsigned long address) -- cgit v1.2.3 From d2a02e3c8bb6b347818518edff5a4b40ff52d6d8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 19 Jan 2022 14:35:06 +0100 Subject: lib/crypto: blake2s: avoid indirect calls to compression function for Clang CFI blake2s_compress_generic is weakly aliased by blake2s_compress. The current harness for function selection uses a function pointer, which is ordinarily inlined and resolved at compile time. But when Clang's CFI is enabled, CFI still triggers when making an indirect call via a weak symbol. This seems like a bug in Clang's CFI, as though it's bucketing weak symbols and strong symbols differently. It also only seems to trigger when "full LTO" mode is used, rather than "thin LTO". [ 0.000000][ T0] Kernel panic - not syncing: CFI failure (target: blake2s_compress_generic+0x0/0x1444) [ 0.000000][ T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.16.0-mainline-06981-g076c855b846e #1 [ 0.000000][ T0] Hardware name: MT6873 (DT) [ 0.000000][ T0] Call trace: [ 0.000000][ T0] dump_backtrace+0xfc/0x1dc [ 0.000000][ T0] dump_stack_lvl+0xa8/0x11c [ 0.000000][ T0] panic+0x194/0x464 [ 0.000000][ T0] __cfi_check_fail+0x54/0x58 [ 0.000000][ T0] __cfi_slowpath_diag+0x354/0x4b0 [ 0.000000][ T0] blake2s_update+0x14c/0x178 [ 0.000000][ T0] _extract_entropy+0xf4/0x29c [ 0.000000][ T0] crng_initialize_primary+0x24/0x94 [ 0.000000][ T0] rand_initialize+0x2c/0x6c [ 0.000000][ T0] start_kernel+0x2f8/0x65c [ 0.000000][ T0] __primary_switched+0xc4/0x7be4 [ 0.000000][ T0] Rebooting in 5 seconds.. Nonetheless, the function pointer method isn't so terrific anyway, so this patch replaces it with a simple boolean, which also gets inlined away. This successfully works around the Clang bug. In general, I'm not too keen on all of the indirection involved here; it clearly does more harm than good. Hopefully the whole thing can get cleaned up down the road when lib/crypto is overhauled more comprehensively. But for now, we go with a simple bandaid. Fixes: 6048fdcc5f26 ("lib/crypto: blake2s: include as built-in") Link: https://github.com/ClangBuiltLinux/linux/issues/1567 Reported-by: Miles Chen Tested-by: Miles Chen Tested-by: Nathan Chancellor Tested-by: John Stultz Acked-by: Nick Desaulniers Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- include/crypto/internal/blake2s.h | 40 ++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index d39cfa0d333e..52363eee2b20 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -24,14 +24,11 @@ static inline void blake2s_set_lastblock(struct blake2s_state *state) state->f[0] = -1; } -typedef void (*blake2s_compress_t)(struct blake2s_state *state, - const u8 *block, size_t nblocks, u32 inc); - /* Helper functions for BLAKE2s shared by the library and shash APIs */ -static inline void __blake2s_update(struct blake2s_state *state, - const u8 *in, size_t inlen, - blake2s_compress_t compress) +static __always_inline void +__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen, + bool force_generic) { const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; @@ -39,7 +36,12 @@ static inline void __blake2s_update(struct blake2s_state *state, return; if (inlen > fill) { memcpy(state->buf + state->buflen, in, fill); - (*compress)(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); + if (force_generic) + blake2s_compress_generic(state, state->buf, 1, + BLAKE2S_BLOCK_SIZE); + else + blake2s_compress(state, state->buf, 1, + BLAKE2S_BLOCK_SIZE); state->buflen = 0; in += fill; inlen -= fill; @@ -47,7 +49,12 @@ static inline void __blake2s_update(struct blake2s_state *state, if (inlen > BLAKE2S_BLOCK_SIZE) { const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); /* Hash one less (full) block than strictly possible */ - (*compress)(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); + if (force_generic) + blake2s_compress_generic(state, in, nblocks - 1, + BLAKE2S_BLOCK_SIZE); + else + blake2s_compress(state, in, nblocks - 1, + BLAKE2S_BLOCK_SIZE); in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); } @@ -55,13 +62,16 @@ static inline void __blake2s_update(struct blake2s_state *state, state->buflen += inlen; } -static inline void __blake2s_final(struct blake2s_state *state, u8 *out, - blake2s_compress_t compress) +static __always_inline void +__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic) { blake2s_set_lastblock(state); memset(state->buf + state->buflen, 0, BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ - (*compress)(state, state->buf, 1, state->buflen); + if (force_generic) + blake2s_compress_generic(state, state->buf, 1, state->buflen); + else + blake2s_compress(state, state->buf, 1, state->buflen); cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); memcpy(out, state->h, state->outlen); } @@ -99,20 +109,20 @@ static inline int crypto_blake2s_init(struct shash_desc *desc) static inline int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, unsigned int inlen, - blake2s_compress_t compress) + bool force_generic) { struct blake2s_state *state = shash_desc_ctx(desc); - __blake2s_update(state, in, inlen, compress); + __blake2s_update(state, in, inlen, force_generic); return 0; } static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out, - blake2s_compress_t compress) + bool force_generic) { struct blake2s_state *state = shash_desc_ctx(desc); - __blake2s_final(state, out, compress); + __blake2s_final(state, out, force_generic); return 0; } -- cgit v1.2.3 From ae26508651272695a3ab353f75ab9a8daf3da324 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Sun, 23 Jan 2022 20:45:06 +0800 Subject: cpufreq: Move to_gov_attr_set() to cpufreq.h So it can be reused by other codes. Signed-off-by: Kevin Hao Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1ab29e61b078..f0dfc0b260ec 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -658,6 +658,11 @@ struct gov_attr_set { /* sysfs ops for cpufreq governors */ extern const struct sysfs_ops governor_sysfs_ops; +static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj) +{ + return container_of(kobj, struct gov_attr_set, kobj); +} + void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node); void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node); unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node); -- cgit v1.2.3 From e70e13e7d4ab8f932f49db1c9500b30a34a6d420 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 4 Feb 2022 01:55:18 +0100 Subject: bpf: Implement bpf_core_types_are_compat(). Adopt libbpf's bpf_core_types_are_compat() for kernel duty by adding explicit recursion limit of 2 which is enough to handle 2 levels of function prototypes. Signed-off-by: Matteo Croce Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220204005519.60361-2-mcroce@linux.microsoft.com --- include/linux/btf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index f6c43dd513fa..36bc09b8e890 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -327,6 +327,11 @@ static inline const struct btf_var_secinfo *btf_type_var_secinfo( return (const struct btf_var_secinfo *)(t + 1); } +static inline struct btf_param *btf_params(const struct btf_type *t) +{ + return (struct btf_param *)(t + 1); +} + #ifdef CONFIG_BPF_SYSCALL struct bpf_prog; -- cgit v1.2.3 From c78b8b20e34920231eda02fb40c7aca7d88be837 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 3 Feb 2022 15:12:40 -0800 Subject: net: don't include ndisc.h from ipv6.h Nothing in ipv6.h needs ndisc.h, drop it. Link: https://lore.kernel.org/r/20220203043457.2222388-1-kuba@kernel.org Acked-by: Jeremy Kerr Acked-by: Stefan Schmidt Link: https://lore.kernel.org/r/20220203231240.2297588-1-kuba@kernel.org Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 1 - include/net/ipv6_frag.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 082f30256f59..cda1f205f391 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 0a4779175a52..5052c66e22d2 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _IPV6_FRAG_H #define _IPV6_FRAG_H +#include #include #include #include -- cgit v1.2.3 From be847673cfffce8bb6e9ed6ae186081280c58831 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Wed, 2 Feb 2022 15:25:53 +0100 Subject: uapi: ioam: Insertion frequency Add the insertion frequency uapi for IOAM lwtunnels. Signed-off-by: Justin Iurman Signed-off-by: Jakub Kicinski --- include/uapi/linux/ioam6_iptunnel.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h index 829ffdfcacca..38f6a8fdfd34 100644 --- a/include/uapi/linux/ioam6_iptunnel.h +++ b/include/uapi/linux/ioam6_iptunnel.h @@ -41,6 +41,15 @@ enum { /* IOAM Trace Header */ IOAM6_IPTUNNEL_TRACE, /* struct ioam6_trace_hdr */ + /* Insertion frequency: + * "k over n" packets (0 < k <= n) + * [0.0001% ... 100%] + */ +#define IOAM6_IPTUNNEL_FREQ_MIN 1 +#define IOAM6_IPTUNNEL_FREQ_MAX 1000000 + IOAM6_IPTUNNEL_FREQ_K, /* u32 */ + IOAM6_IPTUNNEL_FREQ_N, /* u32 */ + __IOAM6_IPTUNNEL_MAX, }; -- cgit v1.2.3 From 35d39fecbc242150af5587506e58ec1f8541fb68 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 3 Feb 2022 10:44:30 +0200 Subject: net/sched: Enable tc skb ext allocation on chain miss only when needed Currently tc skb extension is used to send miss info from tc to ovs datapath module, and driver to tc. For the tc to ovs miss it is currently always allocated even if it will not be used by ovs datapath (as it depends on a requested feature). Export the static key which is used by openvswitch module to guard this code path as well, so it will be skipped if ovs datapath doesn't need it. Enable this code path once ovs datapath needs it. Signed-off-by: Paul Blakey Reviewed-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 676cb8ea9e15..a3b57a93228a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -1028,4 +1028,15 @@ struct tc_fifo_qopt_offload { }; }; +#ifdef CONFIG_NET_CLS_ACT +DECLARE_STATIC_KEY_FALSE(tc_skb_ext_tc); +void tc_skb_ext_tc_enable(void); +void tc_skb_ext_tc_disable(void); +#define tc_skb_ext_tc_enabled() static_branch_unlikely(&tc_skb_ext_tc) +#else /* CONFIG_NET_CLS_ACT */ +static inline void tc_skb_ext_tc_enable(void) { } +static inline void tc_skb_ext_tc_disable(void) { } +#define tc_skb_ext_tc_enabled() false +#endif + #endif -- cgit v1.2.3 From de5a1f3ce4c862150dc442530dba19e1d1dc6bc2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 4 Feb 2022 12:28:37 +0100 Subject: net: gro: minor optimization for dev_gro_receive() While inspecting some perf report, I noticed that the compiler emits suboptimal code for the napi CB initialization, fetching and storing multiple times the memory for flags bitfield. This is with gcc 10.3.1, but I observed the same with older compiler versions. We can help the compiler to do a nicer work clearing several fields at once using an u32 alias. The generated code is quite smaller, with the same number of conditional. Before: objdump -t net/core/gro.o | grep " F .text" 0000000000000bb0 l F .text 0000000000000357 dev_gro_receive After: 0000000000000bb0 l F .text 000000000000033c dev_gro_receive v1 -> v2: - use struct_group (Alexander and Alex) RFC -> v1: - use __struct_group to delimit the zeroed area (Alexander) Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/gro.h | 52 ++++++++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/gro.h b/include/net/gro.h index 8f75802d50fd..a765fedda5c4 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -29,46 +29,50 @@ struct napi_gro_cb { /* Number of segments aggregated. */ u16 count; - /* Start offset for remote checksum offload */ - u16 gro_remcsum_start; + /* Used in ipv6_gro_receive() and foo-over-udp */ + u16 proto; /* jiffies when first packet was created/queued */ unsigned long age; - /* Used in ipv6_gro_receive() and foo-over-udp */ - u16 proto; + /* portion of the cb set to zero at every gro iteration */ + struct_group(zeroed, + + /* Start offset for remote checksum offload */ + u16 gro_remcsum_start; - /* This is non-zero if the packet may be of the same flow. */ - u8 same_flow:1; + /* This is non-zero if the packet may be of the same flow. */ + u8 same_flow:1; - /* Used in tunnel GRO receive */ - u8 encap_mark:1; + /* Used in tunnel GRO receive */ + u8 encap_mark:1; - /* GRO checksum is valid */ - u8 csum_valid:1; + /* GRO checksum is valid */ + u8 csum_valid:1; - /* Number of checksums via CHECKSUM_UNNECESSARY */ - u8 csum_cnt:3; + /* Number of checksums via CHECKSUM_UNNECESSARY */ + u8 csum_cnt:3; - /* Free the skb? */ - u8 free:2; + /* Free the skb? */ + u8 free:2; #define NAPI_GRO_FREE 1 #define NAPI_GRO_FREE_STOLEN_HEAD 2 - /* Used in foo-over-udp, set in udp[46]_gro_receive */ - u8 is_ipv6:1; + /* Used in foo-over-udp, set in udp[46]_gro_receive */ + u8 is_ipv6:1; - /* Used in GRE, set in fou/gue_gro_receive */ - u8 is_fou:1; + /* Used in GRE, set in fou/gue_gro_receive */ + u8 is_fou:1; - /* Used to determine if flush_id can be ignored */ - u8 is_atomic:1; + /* Used to determine if flush_id can be ignored */ + u8 is_atomic:1; - /* Number of gro_receive callbacks this packet already went through */ - u8 recursion_counter:4; + /* Number of gro_receive callbacks this packet already went through */ + u8 recursion_counter:4; - /* GRO is done by frag_list pointer chaining. */ - u8 is_flist:1; + /* GRO is done by frag_list pointer chaining. */ + u8 is_flist:1; + ); /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; -- cgit v1.2.3 From 0463e320421b313db320bbcf2928ebf49f556b67 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 4 Feb 2022 11:42:10 +0000 Subject: net: phylink: remove phylink_set_10g_modes() phylink_set_10g_modes() is no longer used with the conversion of drivers to phylink_generic_validate(), so we can remove it. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/phylink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 713a0c928b7c..cca149f78d35 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -582,7 +582,6 @@ int phylink_speed_up(struct phylink *pl); #define phylink_test(bm, mode) __phylink_do_bit(test_bit, bm, mode) void phylink_set_port_modes(unsigned long *bits); -void phylink_set_10g_modes(unsigned long *mask); void phylink_helper_basex_speed(struct phylink_link_state *state); void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, -- cgit v1.2.3 From 145c7a793838add5e004e7d49a67654dc7eba147 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Feb 2022 12:15:45 -0800 Subject: ipv6: make mc_forwarding atomic This fixes minor data-races in ip6_mc_input() and batadv_mcast_mla_rtr_flags_softif_get_ipv6() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 1e0f8a31f3de..16870f86c74d 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -51,7 +51,7 @@ struct ipv6_devconf { __s32 use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE - __s32 mc_forwarding; + atomic_t mc_forwarding; #endif __s32 disable_ipv6; __s32 drop_unicast_in_l2_multicast; -- cgit v1.2.3 From e3ececfe668facd87d920b608349a32607060e66 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Feb 2022 14:42:35 -0800 Subject: ref_tracker: implement use-after-free detection Whenever ref_tracker_dir_init() is called, mark the struct ref_tracker_dir as dead. Test the dead status from ref_tracker_alloc() and ref_tracker_free() This should detect buggy dev_put()/dev_hold() happening too late in netdevice dismantle process. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ref_tracker.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index 60f3453be23e..a443abda937d 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -13,6 +13,7 @@ struct ref_tracker_dir { spinlock_t lock; unsigned int quarantine_avail; refcount_t untracked; + bool dead; struct list_head list; /* List of active trackers */ struct list_head quarantine; /* List of dead trackers */ #endif @@ -26,6 +27,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, INIT_LIST_HEAD(&dir->quarantine); spin_lock_init(&dir->lock); dir->quarantine_avail = quarantine_count; + dir->dead = false; refcount_set(&dir->untracked, 1); stack_depot_init(); } -- cgit v1.2.3 From 8fd5522f44dcd7f05454ddc4f16d0f821b676cd9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Feb 2022 14:42:36 -0800 Subject: ref_tracker: add a count of untracked references We are still chasing a netdev refcount imbalance, and we suspect we have one rogue dev_put() that is consuming a reference taken from a dev_hold_track() To detect this case, allow ref_tracker_alloc() and ref_tracker_free() to be called with a NULL @trackerp parameter, and use a dedicated refcount_t just for them. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ref_tracker.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index a443abda937d..9ca353ab712b 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -13,6 +13,7 @@ struct ref_tracker_dir { spinlock_t lock; unsigned int quarantine_avail; refcount_t untracked; + refcount_t no_tracker; bool dead; struct list_head list; /* List of active trackers */ struct list_head quarantine; /* List of dead trackers */ @@ -29,6 +30,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, dir->quarantine_avail = quarantine_count; dir->dead = false; refcount_set(&dir->untracked, 1); + refcount_set(&dir->no_tracker, 1); stack_depot_init(); } -- cgit v1.2.3 From 4c6c11ea0f7b00a1894803efe980dfaf3b074886 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Feb 2022 14:42:37 -0800 Subject: net: refine dev_put()/dev_hold() debugging We are still chasing some syzbot reports where we think a rogue dev_put() is called with no corresponding prior dev_hold(). Unfortunately it eats a reference on dev->dev_refcnt taken by innocent dev_hold_track(), meaning that the refcount saturation splat comes too late to be useful. Make sure that 'not tracked' dev_put() and dev_hold() better use CONFIG_NET_DEV_REFCNT_TRACKER=y debug infrastructure: Prior patch in the series allowed ref_tracker_alloc() and ref_tracker_free() to be called with a NULL @trackerp parameter, and to use a separate refcount only to detect too many put() even in the following case: dev_hold_track(dev, tracker_1, GFP_ATOMIC); dev_hold(dev); dev_put(dev); dev_put(dev); // Should complain loudly here. dev_put_track(dev, tracker_1); // instead of here Add clarification about netdev_tracker_alloc() role. v2: I replaced the dev_put() in linkwatch_do_dev() with __dev_put() because callers called netdev_tracker_free(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 69 +++++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e490b84732d1..3fb6fb67ed77 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3817,14 +3817,7 @@ extern unsigned int netdev_budget_usecs; /* Called by rtnetlink.c:rtnl_unlock() */ void netdev_run_todo(void); -/** - * dev_put - release reference to device - * @dev: network device - * - * Release reference to device to allow it to be freed. - * Try using dev_put_track() instead. - */ -static inline void dev_put(struct net_device *dev) +static inline void __dev_put(struct net_device *dev) { if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT @@ -3835,14 +3828,7 @@ static inline void dev_put(struct net_device *dev) } } -/** - * dev_hold - get reference to device - * @dev: network device - * - * Hold reference to device to keep it from being freed. - * Try using dev_hold_track() instead. - */ -static inline void dev_hold(struct net_device *dev) +static inline void __dev_hold(struct net_device *dev) { if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT @@ -3853,11 +3839,24 @@ static inline void dev_hold(struct net_device *dev) } } +static inline void __netdev_tracker_alloc(struct net_device *dev, + netdevice_tracker *tracker, + gfp_t gfp) +{ +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp); +#endif +} + +/* netdev_tracker_alloc() can upgrade a prior untracked reference + * taken by dev_get_by_name()/dev_get_by_index() to a tracked one. + */ static inline void netdev_tracker_alloc(struct net_device *dev, netdevice_tracker *tracker, gfp_t gfp) { #ifdef CONFIG_NET_DEV_REFCNT_TRACKER - ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp); + refcount_dec(&dev->refcnt_tracker.no_tracker); + __netdev_tracker_alloc(dev, tracker, gfp); #endif } @@ -3873,8 +3872,8 @@ static inline void dev_hold_track(struct net_device *dev, netdevice_tracker *tracker, gfp_t gfp) { if (dev) { - dev_hold(dev); - netdev_tracker_alloc(dev, tracker, gfp); + __dev_hold(dev); + __netdev_tracker_alloc(dev, tracker, gfp); } } @@ -3883,10 +3882,34 @@ static inline void dev_put_track(struct net_device *dev, { if (dev) { netdev_tracker_free(dev, tracker); - dev_put(dev); + __dev_put(dev); } } +/** + * dev_hold - get reference to device + * @dev: network device + * + * Hold reference to device to keep it from being freed. + * Try using dev_hold_track() instead. + */ +static inline void dev_hold(struct net_device *dev) +{ + dev_hold_track(dev, NULL, GFP_ATOMIC); +} + +/** + * dev_put - release reference to device + * @dev: network device + * + * Release reference to device to allow it to be freed. + * Try using dev_put_track() instead. + */ +static inline void dev_put(struct net_device *dev) +{ + dev_put_track(dev, NULL); +} + static inline void dev_replace_track(struct net_device *odev, struct net_device *ndev, netdevice_tracker *tracker, @@ -3895,11 +3918,11 @@ static inline void dev_replace_track(struct net_device *odev, if (odev) netdev_tracker_free(odev, tracker); - dev_hold(ndev); - dev_put(odev); + __dev_hold(ndev); + __dev_put(odev); if (ndev) - netdev_tracker_alloc(ndev, tracker, gfp); + __netdev_tracker_alloc(ndev, tracker, gfp); } /* Carrier loss detection, dial on demand. The functions netif_carrier_on -- cgit v1.2.3 From 5a8fb33e530512ee67a11b30f3451a4f030f4b01 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Feb 2022 20:56:14 -0800 Subject: skmsg: convert struct sk_msg_sg::copy to a bitmap We have plans for increasing MAX_SKB_FRAGS, but sk_msg_sg::copy is currently an unsigned long, limiting MAX_SKB_FRAGS to 30 on 32bit arches. Convert it to a bitmap, as Jakub suggested. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skmsg.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 18a717fe62eb..1ff68a88c58d 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -29,7 +29,7 @@ struct sk_msg_sg { u32 end; u32 size; u32 copybreak; - unsigned long copy; + DECLARE_BITMAP(copy, MAX_MSG_FRAGS + 2); /* The extra two elements: * 1) used for chaining the front and sections when the list becomes * partitioned (e.g. end < start). The crypto APIs require the @@ -38,7 +38,6 @@ struct sk_msg_sg { */ struct scatterlist data[MAX_MSG_FRAGS + 2]; }; -static_assert(BITS_PER_LONG >= NR_MSG_FRAG_IDS); /* UAPI in filter.c depends on struct sk_msg_sg being first element. */ struct sk_msg { @@ -234,7 +233,7 @@ static inline void sk_msg_compute_data_pointers(struct sk_msg *msg) { struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start); - if (test_bit(msg->sg.start, &msg->sg.copy)) { + if (test_bit(msg->sg.start, msg->sg.copy)) { msg->data = NULL; msg->data_end = NULL; } else { @@ -253,7 +252,7 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, sg_set_page(sge, page, len, offset); sg_unmark_end(sge); - __set_bit(msg->sg.end, &msg->sg.copy); + __set_bit(msg->sg.end, msg->sg.copy); msg->sg.size += len; sk_msg_iter_next(msg, end); } @@ -262,9 +261,9 @@ static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state) { do { if (copy_state) - __set_bit(i, &msg->sg.copy); + __set_bit(i, msg->sg.copy); else - __clear_bit(i, &msg->sg.copy); + __clear_bit(i, msg->sg.copy); sk_msg_iter_var_next(i); if (i == msg->sg.end) break; -- cgit v1.2.3 From 9c1be1935fb68b2413796cdc03d019b8cf35ab51 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 5 Feb 2022 09:01:25 -0800 Subject: net: initialize init_net earlier While testing a patch that will follow later ("net: add netns refcount tracker to struct nsproxy") I found that devtmpfs_init() was called before init_net was initialized. This is a bug, because devtmpfs_setup() calls ksys_unshare(CLONE_NEWNS); This has the effect of increasing init_net refcount, which will be later overwritten to 1, as part of setup_net(&init_net) We had too many prior patches [1] trying to work around the root cause. Really, make sure init_net is in BSS section, and that net_ns_init() is called earlier at boot time. Note that another patch ("vfs: add netns refcount tracker to struct fs_context") also will need net_ns_init() being called before vfs_caches_init() As a bonus, this patch saves around 4KB in .data section. [1] f8c46cb39079 ("netns: do not call pernet ops for not yet set up init_net namespace") b5082df8019a ("net: Initialise init_net.count to 1") 734b65417b24 ("net: Statically initialize init_net.dev_base_head") v2: fixed a build error reported by kernel build bots (CONFIG_NET=n) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/net_namespace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 5b61c462e534..374cc7b260fc 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -513,4 +513,10 @@ static inline void fnhe_genid_bump(struct net *net) atomic_inc(&net->fnhe_genid); } +#ifdef CONFIG_NET +void net_ns_init(void); +#else +static inline void net_ns_init(void) {} +#endif + #endif /* __NET_NET_NAMESPACE_H */ -- cgit v1.2.3 From 88590b369354092183bcba04e2368010c462557f Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 5 Feb 2022 15:47:33 +0800 Subject: net: skb_drop_reason: add document for drop reasons Add document for following existing drop reasons: SKB_DROP_REASON_NOT_SPECIFIED SKB_DROP_REASON_NO_SOCKET SKB_DROP_REASON_PKT_TOO_SMALL SKB_DROP_REASON_TCP_CSUM SKB_DROP_REASON_SOCKET_FILTER SKB_DROP_REASON_UDP_CSUM Signed-off-by: Menglong Dong Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a27bcc4f7e9a..f04e3a1f4455 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -314,12 +314,12 @@ struct sk_buff; * used to translate the reason to string. */ enum skb_drop_reason { - SKB_DROP_REASON_NOT_SPECIFIED, - SKB_DROP_REASON_NO_SOCKET, - SKB_DROP_REASON_PKT_TOO_SMALL, - SKB_DROP_REASON_TCP_CSUM, - SKB_DROP_REASON_SOCKET_FILTER, - SKB_DROP_REASON_UDP_CSUM, + SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ + SKB_DROP_REASON_NO_SOCKET, /* socket not found */ + SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ + SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ + SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ + SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ SKB_DROP_REASON_MAX, }; -- cgit v1.2.3 From 2df3041ba3be950376e8c25a8f6da22f7fcc765c Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 5 Feb 2022 15:47:34 +0800 Subject: net: netfilter: use kfree_drop_reason() for NF_DROP Replace kfree_skb() with kfree_skb_reason() in nf_hook_slow() when skb is dropped by reason of NF_DROP. Following new drop reasons are introduced: SKB_DROP_REASON_NETFILTER_DROP Signed-off-by: Menglong Dong Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/trace/events/skb.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f04e3a1f4455..9060159b4375 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -320,6 +320,7 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ + SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index a8a64b97504d..3d89f7b09a43 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -16,6 +16,7 @@ EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ + EM(SKB_DROP_REASON_NETFILTER_DROP, NETFILTER_DROP) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 33cba42985c8144eef78d618fc1e51aaa074b169 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 5 Feb 2022 15:47:35 +0800 Subject: net: ipv4: use kfree_skb_reason() in ip_rcv_core() Replace kfree_skb() with kfree_skb_reason() in ip_rcv_core(). Three new drop reasons are introduced: SKB_DROP_REASON_OTHERHOST SKB_DROP_REASON_IP_CSUM SKB_DROP_REASON_IP_INHDR Signed-off-by: Menglong Dong Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++++ include/trace/events/skb.h | 3 +++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9060159b4375..8e82130b3c52 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -321,6 +321,15 @@ enum skb_drop_reason { SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ + SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current + * host (interface is in promisc + * mode) + */ + SKB_DROP_REASON_IP_CSUM, /* IP checksum error */ + SKB_DROP_REASON_IP_INHDR, /* there is something wrong with + * IP header (see + * IPSTATS_MIB_INHDRERRORS) + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 3d89f7b09a43..f2b1778485f0 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -17,6 +17,9 @@ EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ EM(SKB_DROP_REASON_NETFILTER_DROP, NETFILTER_DROP) \ + EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST) \ + EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM) \ + EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From c1f166d1f7eef212096a98b22f5acf92f9af353d Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 5 Feb 2022 15:47:36 +0800 Subject: net: ipv4: use kfree_skb_reason() in ip_rcv_finish_core() Replace kfree_skb() with kfree_skb_reason() in ip_rcv_finish_core(), following drop reasons are introduced: SKB_DROP_REASON_IP_RPFILTER SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST Signed-off-by: Menglong Dong Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++++ include/trace/events/skb.h | 3 +++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8e82130b3c52..4baba45f223d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -330,6 +330,15 @@ enum skb_drop_reason { * IP header (see * IPSTATS_MIB_INHDRERRORS) */ + SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed. + * see the document for rp_filter + * in ip-sysctl.rst for more + * information + */ + SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2 + * is multicast, but L3 is + * unicast. + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index f2b1778485f0..485a1d3034a4 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -20,6 +20,9 @@ EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST) \ EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM) \ EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR) \ + EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER) \ + EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, \ + UNICAST_IN_L2_MULTICAST) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 10580c4791902571777603cb980414892dd5f149 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 5 Feb 2022 15:47:37 +0800 Subject: net: ipv4: use kfree_skb_reason() in ip_protocol_deliver_rcu() Replace kfree_skb() with kfree_skb_reason() in ip_protocol_deliver_rcu(). Following new drop reasons are introduced: SKB_DROP_REASON_XFRM_POLICY SKB_DROP_REASON_IP_NOPROTO Signed-off-by: Menglong Dong Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ include/trace/events/skb.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4baba45f223d..2a64afa97910 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -339,6 +339,8 @@ enum skb_drop_reason { * is multicast, but L3 is * unicast. */ + SKB_DROP_REASON_XFRM_POLICY, /* xfrm policy check failed */ + SKB_DROP_REASON_IP_NOPROTO, /* no support for IP protocol */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 485a1d3034a4..985e481c092d 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -23,6 +23,8 @@ EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER) \ EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, \ UNICAST_IN_L2_MULTICAST) \ + EM(SKB_DROP_REASON_XFRM_POLICY, XFRM_POLICY) \ + EM(SKB_DROP_REASON_IP_NOPROTO, IP_NOPROTO) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 08d4c0370c400fa6ef2194f9ee2e8dccc4a7ab39 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 5 Feb 2022 15:47:39 +0800 Subject: net: udp: use kfree_skb_reason() in __udp_queue_rcv_skb() Replace kfree_skb() with kfree_skb_reason() in __udp_queue_rcv_skb(). Following new drop reasons are introduced: SKB_DROP_REASON_SOCKET_RCVBUFF SKB_DROP_REASON_PROTO_MEM Signed-off-by: Menglong Dong Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ include/trace/events/skb.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2a64afa97910..a5adbf6b51e8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -341,6 +341,11 @@ enum skb_drop_reason { */ SKB_DROP_REASON_XFRM_POLICY, /* xfrm policy check failed */ SKB_DROP_REASON_IP_NOPROTO, /* no support for IP protocol */ + SKB_DROP_REASON_SOCKET_RCVBUFF, /* socket receive buff is full */ + SKB_DROP_REASON_PROTO_MEM, /* proto memory limition, such as + * udp packet drop out of + * udp_memory_allocated. + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 985e481c092d..cfcfd26399f7 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -25,6 +25,8 @@ UNICAST_IN_L2_MULTICAST) \ EM(SKB_DROP_REASON_XFRM_POLICY, XFRM_POLICY) \ EM(SKB_DROP_REASON_IP_NOPROTO, IP_NOPROTO) \ + EM(SKB_DROP_REASON_SOCKET_RCVBUFF, SOCKET_RCVBUFF) \ + EM(SKB_DROP_REASON_PROTO_MEM, PROTO_MEM) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From fda17afc6166e975bec1197bd94cd2a3317bce3f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 7 Feb 2022 11:27:53 +0900 Subject: ata: libata-core: Fix ata_dev_config_cpr() The concurrent positioning ranges log page 47h is a general purpose log page and not a subpage of the indentify device log. Using ata_identify_page_supported() to test for concurrent positioning ranges support is thus wrong. ata_log_supported() must be used. Furthermore, unlike other advanced ATA features (e.g. NCQ priority), accesses to the concurrent positioning ranges log page are not gated by a feature bit from the device IDENTIFY data. Since many older drives react badly to the READ LOG EXT and/or READ LOG DMA EXT commands isued to read device log pages, avoid problems with older drives by limiting the concurrent positioning ranges support detection to drives implementing at least the ACS-4 ATA standard (major version 11). This additional condition effectively turns ata_dev_config_cpr() into a nop for older drives, avoiding problems in the field. Fixes: fe22e1c2f705 ("libata: support concurrent positioning ranges log") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215519 Cc: stable@vger.kernel.org Reviewed-by: Hannes Reinecke Tested-by: Abderraouf Adjal Signed-off-by: Damien Le Moal --- include/linux/ata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ata.h b/include/linux/ata.h index 199e47e97d64..21292b5bbb55 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -324,12 +324,12 @@ enum { ATA_LOG_NCQ_NON_DATA = 0x12, ATA_LOG_NCQ_SEND_RECV = 0x13, ATA_LOG_IDENTIFY_DEVICE = 0x30, + ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, /* Identify device log pages: */ ATA_LOG_SECURITY = 0x06, ATA_LOG_SATA_SETTINGS = 0x08, ATA_LOG_ZONED_INFORMATION = 0x09, - ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, /* Identify device SATA settings log:*/ ATA_LOG_DEVSLP_OFFSET = 0x30, -- cgit v1.2.3 From ad5e35f58384179bbd3cdb349904e150f364c4f3 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 28 Jan 2022 12:34:14 +0100 Subject: mtd: Replace the expert mode symbols with a single helper Reduce the number of exported symbols by replacing: - mtd_expert_analysis_warning (the error string) - mtd_expert_analysis_mode (the boolean) with a single helper: - mtd_check_expert_analysis_mode Calling this helper will both check/return the content of the internal boolean -which is not exported anymore- and as well conditionally WARN_ONCE() the user, like it was done before. While on this function, make the error string local to the helper and set it const. Only export this helper when CONFIG_DEBUG_FS is defined to limit the growth of the Linux kernel size only for a debug feature on production kernels. Mechanically update all the consumers. Suggested-by: Geert Uytterhoeven Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220128113414.1121924-1-miquel.raynal@bootlin.com --- include/linux/mtd/mtd.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 1ffa933121f6..1b3fc8c71ab3 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -711,7 +711,11 @@ static inline int mtd_is_bitflip_or_eccerr(int err) { unsigned mtd_mmap_capabilities(struct mtd_info *mtd); -extern char *mtd_expert_analysis_warning; -extern bool mtd_expert_analysis_mode; +#ifdef CONFIG_DEBUG_FS +bool mtd_check_expert_analysis_mode(void); +#else +static inline bool mtd_check_expert_analysis_mode(void) { return false; } +#endif + #endif /* __MTD_MTD_H__ */ -- cgit v1.2.3 From 74703b13f9d2ef286ef588f29295a2fd30b5f295 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Nov 2021 19:58:42 +0000 Subject: dt-bindings: apple,aic: Add CPU PMU per-cpu pseudo-interrupts Advertise the two pseudo-interrupts that tied to the two PMU flavours present in the Apple M1 SoC. We choose the expose two different pseudo-interrupts to the OS as the e-core PMU is obviously different from the p-core one, effectively presenting two different devices. Acked-by: Rob Herring Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier --- include/dt-bindings/interrupt-controller/apple-aic.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/interrupt-controller/apple-aic.h b/include/dt-bindings/interrupt-controller/apple-aic.h index 604f2bb30ac0..bf3aac0e5491 100644 --- a/include/dt-bindings/interrupt-controller/apple-aic.h +++ b/include/dt-bindings/interrupt-controller/apple-aic.h @@ -11,5 +11,7 @@ #define AIC_TMR_HV_VIRT 1 #define AIC_TMR_GUEST_PHYS 2 #define AIC_TMR_GUEST_VIRT 3 +#define AIC_CPU_PMU_E 4 +#define AIC_CPU_PMU_P 5 #endif -- cgit v1.2.3 From 6bf625a4140f24b490766043b307f8252519578b Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 6 Feb 2022 11:36:56 -0800 Subject: Drivers: hv: vmbus: Rework use of DMA_BIT_MASK(64) Using DMA_BIT_MASK(64) as an initializer for a global variable causes problems with Clang 12.0.1. The compiler doesn't understand that value 64 is excluded from the shift at compile time, resulting in a build error. While this is a compiler problem, avoid the issue by setting up the dma_mask memory as part of struct hv_device, and initialize it using dma_set_mask(). Reported-by: Nathan Chancellor Reported-by: Vitaly Chikunov Reported-by: Jakub Kicinski Fixes: 743b237c3a7b ("scsi: storvsc: Add Isolation VM support for storvsc driver") Signed-off-by: Michael Kelley Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/1644176216-12531-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- include/linux/hyperv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index f565a8938836..fe2e0179ed51 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1262,6 +1262,7 @@ struct hv_device { struct vmbus_channel *channel; struct kset *channels_kset; struct device_dma_parameters dma_parms; + u64 dma_mask; /* place holder to keep track of the dir for hv device in debugfs */ struct dentry *debug_dir; -- cgit v1.2.3 From cb1f65c1e1424a4b5e4a86da8aa3b8fd8459c8ec Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 4 Feb 2022 18:35:22 +0100 Subject: PM: s2idle: ACPI: Fix wakeup interrupts handling After commit e3728b50cd9b ("ACPI: PM: s2idle: Avoid possible race related to the EC GPE") wakeup interrupts occurring immediately after the one discarded by acpi_s2idle_wake() may be missed. Moreover, if the SCI triggers again immediately after the rearming in acpi_s2idle_wake(), that wakeup may be missed too. The problem is that pm_system_irq_wakeup() only calls pm_system_wakeup() when pm_wakeup_irq is 0, but that's not the case any more after the interrupt causing acpi_s2idle_wake() to run until pm_wakeup_irq is cleared by the pm_wakeup_clear() call in s2idle_loop(). However, there may be wakeup interrupts occurring in that time frame and if that happens, they will be missed. To address that issue first move the clearing of pm_wakeup_irq to the point at which it is known that the interrupt causing acpi_s2idle_wake() to tun will be discarded, before rearming the SCI for wakeup. Moreover, because that only reduces the size of the time window in which the issue may manifest itself, allow pm_system_irq_wakeup() to register two second wakeup interrupts in a row and, when discarding the first one, replace it with the second one. [Of course, this assumes that only one wakeup interrupt can be discarded in one go, but currently that is the case and I am not aware of any plans to change that.] Fixes: e3728b50cd9b ("ACPI: PM: s2idle: Avoid possible race related to the EC GPE") Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 3e8ecdebe601..300273ff40cc 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -497,14 +497,14 @@ extern void ksys_sync_helper(void); /* drivers/base/power/wakeup.c */ extern bool events_check_enabled; -extern unsigned int pm_wakeup_irq; extern suspend_state_t pm_suspend_target_state; extern bool pm_wakeup_pending(void); extern void pm_system_wakeup(void); extern void pm_system_cancel_wakeup(void); -extern void pm_wakeup_clear(bool reset); +extern void pm_wakeup_clear(unsigned int irq_number); extern void pm_system_irq_wakeup(unsigned int irq_number); +extern unsigned int pm_wakeup_irq(void); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); -- cgit v1.2.3 From 3698807094ecae945436921325f5c309d1123f11 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 24 Aug 2021 16:13:41 -0400 Subject: drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs Checkpoint-Restore in userspace (CRIU) is a powerful tool that can snapshot a running process and later restore it on same or a remote machine but expects the processes that have a device file (e.g. GPU) associated with them, provide necessary driver support to assist CRIU and its extensible plugin interface. Thus, In order to support the Checkpoint-Restore of any ROCm process, the AMD Radeon Open Compute Kernel driver, needs to provide a set of new APIs that provide necessary VRAM metadata and its contents to a userspace component (CRIU plugin) that can store it in form of image files. This introduces some new ioctls which will be used to checkpoint-Restore any KFD bound user process. KFD only allows ioctl calls from the same process that opened the KFD file descriptor. Since these ioctls are expected to be called from a KFD criu plugin which has elevated ptrace attached privileges and CAP_CHECKPOINT_RESTORE capabilities attached with the file descriptors so modify KFD to allow such calls. (API redesigned by David Yat Sin) Suggested-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: David Yat Sin Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 81 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index af96af174dc4..49429a6c42fc 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -468,6 +468,82 @@ struct kfd_ioctl_smi_events_args { __u32 anon_fd; /* from KFD */ }; +/************************************************************************************************** + * CRIU IOCTLs (Checkpoint Restore In Userspace) + * + * When checkpointing a process, the userspace application will perform: + * 1. PROCESS_INFO op to determine current process information. This pauses execution and evicts + * all the queues. + * 2. CHECKPOINT op to checkpoint process contents (BOs, queues, events, svm-ranges) + * 3. UNPAUSE op to un-evict all the queues + * + * When restoring a process, the CRIU userspace application will perform: + * + * 1. RESTORE op to restore process contents + * 2. RESUME op to start the process + * + * Note: Queues are forced into an evicted state after a successful PROCESS_INFO. User + * application needs to perform an UNPAUSE operation after calling PROCESS_INFO. + */ + +enum kfd_criu_op { + KFD_CRIU_OP_PROCESS_INFO, + KFD_CRIU_OP_CHECKPOINT, + KFD_CRIU_OP_UNPAUSE, + KFD_CRIU_OP_RESTORE, + KFD_CRIU_OP_RESUME, +}; + +/** + * kfd_ioctl_criu_args - Arguments perform CRIU operation + * @devices: [in/out] User pointer to memory location for devices information. + * This is an array of type kfd_criu_device_bucket. + * @bos: [in/out] User pointer to memory location for BOs information + * This is an array of type kfd_criu_bo_bucket. + * @priv_data: [in/out] User pointer to memory location for private data + * @priv_data_size: [in/out] Size of priv_data in bytes + * @num_devices: [in/out] Number of GPUs used by process. Size of @devices array. + * @num_bos [in/out] Number of BOs used by process. Size of @bos array. + * @num_objects: [in/out] Number of objects used by process. Objects are opaque to + * user application. + * @pid: [in/out] PID of the process being checkpointed + * @op [in] Type of operation (kfd_criu_op) + * + * Return: 0 on success, -errno on failure + */ +struct kfd_ioctl_criu_args { + __u64 devices; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 bos; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 priv_data; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 priv_data_size; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_devices; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_bos; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_objects; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 pid; /* Used during ops: PROCESS_INFO, RESUME */ + __u32 op; +}; + +struct kfd_criu_device_bucket { + __u32 user_gpu_id; + __u32 actual_gpu_id; + __u32 drm_fd; + __u32 pad; +}; + +struct kfd_criu_bo_bucket { + __u64 addr; + __u64 size; + __u64 offset; + __u64 restored_offset; /* During restore, updated offset for BO */ + __u32 gpu_id; /* This is the user_gpu_id */ + __u32 alloc_flags; + __u32 dmabuf_fd; + __u32 pad; +}; + +/* CRIU IOCTLs - END */ +/**************************************************************************************************/ + /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { @@ -742,7 +818,10 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_SET_XNACK_MODE \ AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) +#define AMDKFD_IOC_CRIU_OP \ + AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x22 +#define AMDKFD_COMMAND_END 0x23 #endif -- cgit v1.2.3 From 692996f2bef7aa1737e07554255ba0d9a73fb750 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 18 Jan 2022 01:47:58 -0500 Subject: drm/amdkfd: Bump up KFD API version for CRIU - Change KFD minor version to 7 for CRIU Proposed userspace changes: https://github.com/RadeonOpenCompute/criu Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 49429a6c42fc..e6a56c146920 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -32,9 +32,10 @@ * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API + * - 1.7 - Checkpoint Restore (CRIU) API */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 6 +#define KFD_IOCTL_MINOR_VERSION 7 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ -- cgit v1.2.3 From 7938f4218168ae9fc4bdddb15976f9ebbae41999 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 4 Feb 2022 09:05:41 -0800 Subject: dma-buf-map: Rename to iosys-map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename struct dma_buf_map to struct iosys_map and corresponding APIs. Over time dma-buf-map grew up to more functionality than the one used by dma-buf: in fact it's just a shim layer to abstract system memory, that can be accessed via regular load and store, from IO memory that needs to be acessed via arch helpers. The idea is to extend this API so it can fulfill other needs, internal to a single driver. Example: in the i915 driver it's desired to share the implementation for integrated graphics, which uses mostly system memory, with discrete graphics, which may need to access IO memory. The conversion was mostly done with the following semantic patch: @r1@ @@ - struct dma_buf_map + struct iosys_map @r2@ @@ ( - DMA_BUF_MAP_INIT_VADDR + IOSYS_MAP_INIT_VADDR | - dma_buf_map_set_vaddr + iosys_map_set_vaddr | - dma_buf_map_set_vaddr_iomem + iosys_map_set_vaddr_iomem | - dma_buf_map_is_equal + iosys_map_is_equal | - dma_buf_map_is_null + iosys_map_is_null | - dma_buf_map_is_set + iosys_map_is_set | - dma_buf_map_clear + iosys_map_clear | - dma_buf_map_memcpy_to + iosys_map_memcpy_to | - dma_buf_map_incr + iosys_map_incr ) @@ @@ - #include + #include Then some files had their includes adjusted and some comments were update to remove mentions to dma-buf-map. Since this is not specific to dma-buf anymore, move the documentation to the "Bus-Independent Device Accesses" section. v2: - Squash patches v3: - Fix wrong removal of dma-buf.h from MAINTAINERS - Move documentation from dma-buf.rst to device-io.rst v4: - Change documentation title and level Signed-off-by: Lucas De Marchi Acked-by: Christian König Acked-by: Sumit Semwal Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220204170541.829227-1-lucas.demarchi@intel.com --- include/drm/drm_cache.h | 6 +- include/drm/drm_client.h | 7 +- include/drm/drm_gem.h | 6 +- include/drm/drm_gem_atomic_helper.h | 6 +- include/drm/drm_gem_cma_helper.h | 6 +- include/drm/drm_gem_framebuffer_helper.h | 8 +- include/drm/drm_gem_shmem_helper.h | 12 +- include/drm/drm_gem_ttm_helper.h | 6 +- include/drm/drm_gem_vram_helper.h | 9 +- include/drm/drm_prime.h | 6 +- include/drm/ttm/ttm_bo_api.h | 10 +- include/drm/ttm/ttm_kmap_iter.h | 10 +- include/drm/ttm/ttm_resource.h | 6 +- include/linux/dma-buf-map.h | 266 ------------------------------- include/linux/dma-buf.h | 12 +- include/linux/iosys-map.h | 257 +++++++++++++++++++++++++++++ 16 files changed, 316 insertions(+), 317 deletions(-) delete mode 100644 include/linux/dma-buf-map.h create mode 100644 include/linux/iosys-map.h (limited to 'include') diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index cc9de1632dd3..22deb216b59c 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -35,7 +35,7 @@ #include -struct dma_buf_map; +struct iosys_map; void drm_clflush_pages(struct page *pages[], unsigned long num_pages); void drm_clflush_sg(struct sg_table *st); @@ -74,7 +74,7 @@ static inline bool drm_arch_can_wc_memory(void) void drm_memcpy_init_early(void); -void drm_memcpy_from_wc(struct dma_buf_map *dst, - const struct dma_buf_map *src, +void drm_memcpy_from_wc(struct iosys_map *dst, + const struct iosys_map *src, unsigned long len); #endif diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h index f07f2fb02e75..4fc8018eddda 100644 --- a/include/drm/drm_client.h +++ b/include/drm/drm_client.h @@ -3,7 +3,7 @@ #ifndef _DRM_CLIENT_H_ #define _DRM_CLIENT_H_ -#include +#include #include #include #include @@ -144,7 +144,7 @@ struct drm_client_buffer { /** * @map: Virtual address for the buffer */ - struct dma_buf_map map; + struct iosys_map map; /** * @fb: DRM framebuffer @@ -156,7 +156,8 @@ struct drm_client_buffer * drm_client_framebuffer_create(struct drm_client_dev *client, u32 width, u32 height, u32 format); void drm_client_framebuffer_delete(struct drm_client_buffer *buffer); int drm_client_framebuffer_flush(struct drm_client_buffer *buffer, struct drm_rect *rect); -int drm_client_buffer_vmap(struct drm_client_buffer *buffer, struct dma_buf_map *map); +int drm_client_buffer_vmap(struct drm_client_buffer *buffer, + struct iosys_map *map); void drm_client_buffer_vunmap(struct drm_client_buffer *buffer); int drm_client_modeset_create(struct drm_client_dev *client); diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 35e7f44c2a75..e2941cee14b6 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -39,7 +39,7 @@ #include -struct dma_buf_map; +struct iosys_map; struct drm_gem_object; /** @@ -139,7 +139,7 @@ struct drm_gem_object_funcs { * * This callback is optional. */ - int (*vmap)(struct drm_gem_object *obj, struct dma_buf_map *map); + int (*vmap)(struct drm_gem_object *obj, struct iosys_map *map); /** * @vunmap: @@ -149,7 +149,7 @@ struct drm_gem_object_funcs { * * This callback is optional. */ - void (*vunmap)(struct drm_gem_object *obj, struct dma_buf_map *map); + void (*vunmap)(struct drm_gem_object *obj, struct iosys_map *map); /** * @mmap: diff --git a/include/drm/drm_gem_atomic_helper.h b/include/drm/drm_gem_atomic_helper.h index 0b1e2dd2ac3f..6e3319e9001a 100644 --- a/include/drm/drm_gem_atomic_helper.h +++ b/include/drm/drm_gem_atomic_helper.h @@ -3,7 +3,7 @@ #ifndef __DRM_GEM_ATOMIC_HELPER_H__ #define __DRM_GEM_ATOMIC_HELPER_H__ -#include +#include #include #include @@ -59,7 +59,7 @@ struct drm_shadow_plane_state { * The memory mappings stored in map should be established in the plane's * prepare_fb callback and removed in the cleanup_fb callback. */ - struct dma_buf_map map[DRM_FORMAT_MAX_PLANES]; + struct iosys_map map[DRM_FORMAT_MAX_PLANES]; /** * @data: Address of each framebuffer BO's data @@ -67,7 +67,7 @@ struct drm_shadow_plane_state { * The address of the data stored in each mapping. This is different * for framebuffers with non-zero offset fields. */ - struct dma_buf_map data[DRM_FORMAT_MAX_PLANES]; + struct iosys_map data[DRM_FORMAT_MAX_PLANES]; }; /** diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h index adb507a9dbf0..fbda4ce5d5fb 100644 --- a/include/drm/drm_gem_cma_helper.h +++ b/include/drm/drm_gem_cma_helper.h @@ -38,7 +38,8 @@ void drm_gem_cma_free(struct drm_gem_cma_object *cma_obj); void drm_gem_cma_print_info(const struct drm_gem_cma_object *cma_obj, struct drm_printer *p, unsigned int indent); struct sg_table *drm_gem_cma_get_sg_table(struct drm_gem_cma_object *cma_obj); -int drm_gem_cma_vmap(struct drm_gem_cma_object *cma_obj, struct dma_buf_map *map); +int drm_gem_cma_vmap(struct drm_gem_cma_object *cma_obj, + struct iosys_map *map); int drm_gem_cma_mmap(struct drm_gem_cma_object *cma_obj, struct vm_area_struct *vma); extern const struct vm_operations_struct drm_gem_cma_vm_ops; @@ -106,7 +107,8 @@ static inline struct sg_table *drm_gem_cma_object_get_sg_table(struct drm_gem_ob * Returns: * 0 on success or a negative error code on failure. */ -static inline int drm_gem_cma_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) +static inline int drm_gem_cma_object_vmap(struct drm_gem_object *obj, + struct iosys_map *map) { struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); diff --git a/include/drm/drm_gem_framebuffer_helper.h b/include/drm/drm_gem_framebuffer_helper.h index 905727719ead..1091e4fa08cb 100644 --- a/include/drm/drm_gem_framebuffer_helper.h +++ b/include/drm/drm_gem_framebuffer_helper.h @@ -2,7 +2,7 @@ #define __DRM_GEM_FB_HELPER_H__ #include -#include +#include #include @@ -40,10 +40,10 @@ drm_gem_fb_create_with_dirty(struct drm_device *dev, struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd); int drm_gem_fb_vmap(struct drm_framebuffer *fb, - struct dma_buf_map map[static DRM_FORMAT_MAX_PLANES], - struct dma_buf_map data[DRM_FORMAT_MAX_PLANES]); + struct iosys_map map[static DRM_FORMAT_MAX_PLANES], + struct iosys_map data[DRM_FORMAT_MAX_PLANES]); void drm_gem_fb_vunmap(struct drm_framebuffer *fb, - struct dma_buf_map map[static DRM_FORMAT_MAX_PLANES]); + struct iosys_map map[static DRM_FORMAT_MAX_PLANES]); int drm_gem_fb_begin_cpu_access(struct drm_framebuffer *fb, enum dma_data_direction dir); void drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_data_direction dir); diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 311d66c9cf4b..68347b63fc71 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -113,8 +113,10 @@ int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem); void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem); int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem); void drm_gem_shmem_unpin(struct drm_gem_shmem_object *shmem); -int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map); -void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map); +int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, + struct iosys_map *map); +void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, + struct iosys_map *map); int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma); int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv); @@ -226,7 +228,8 @@ static inline struct sg_table *drm_gem_shmem_object_get_sg_table(struct drm_gem_ * Returns: * 0 on success or a negative error code on failure. */ -static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) +static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, + struct iosys_map *map) { struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); @@ -241,7 +244,8 @@ static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, struct d * This function wraps drm_gem_shmem_vunmap(). Drivers that employ the shmem helpers should * use it as their &drm_gem_object_funcs.vunmap handler. */ -static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map) +static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, + struct iosys_map *map) { struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); diff --git a/include/drm/drm_gem_ttm_helper.h b/include/drm/drm_gem_ttm_helper.h index 78040f6cc6f3..4c003b4f173e 100644 --- a/include/drm/drm_gem_ttm_helper.h +++ b/include/drm/drm_gem_ttm_helper.h @@ -10,7 +10,7 @@ #include #include -struct dma_buf_map; +struct iosys_map; #define drm_gem_ttm_of_gem(gem_obj) \ container_of(gem_obj, struct ttm_buffer_object, base) @@ -18,9 +18,9 @@ struct dma_buf_map; void drm_gem_ttm_print_info(struct drm_printer *p, unsigned int indent, const struct drm_gem_object *gem); int drm_gem_ttm_vmap(struct drm_gem_object *gem, - struct dma_buf_map *map); + struct iosys_map *map); void drm_gem_ttm_vunmap(struct drm_gem_object *gem, - struct dma_buf_map *map); + struct iosys_map *map); int drm_gem_ttm_mmap(struct drm_gem_object *gem, struct vm_area_struct *vma); diff --git a/include/drm/drm_gem_vram_helper.h b/include/drm/drm_gem_vram_helper.h index b4ce27a72773..c083a1d71cf4 100644 --- a/include/drm/drm_gem_vram_helper.h +++ b/include/drm/drm_gem_vram_helper.h @@ -12,7 +12,7 @@ #include #include -#include +#include struct drm_mode_create_dumb; struct drm_plane; @@ -51,7 +51,7 @@ struct vm_area_struct; */ struct drm_gem_vram_object { struct ttm_buffer_object bo; - struct dma_buf_map map; + struct iosys_map map; /** * @vmap_use_count: @@ -97,8 +97,9 @@ void drm_gem_vram_put(struct drm_gem_vram_object *gbo); s64 drm_gem_vram_offset(struct drm_gem_vram_object *gbo); int drm_gem_vram_pin(struct drm_gem_vram_object *gbo, unsigned long pl_flag); int drm_gem_vram_unpin(struct drm_gem_vram_object *gbo); -int drm_gem_vram_vmap(struct drm_gem_vram_object *gbo, struct dma_buf_map *map); -void drm_gem_vram_vunmap(struct drm_gem_vram_object *gbo, struct dma_buf_map *map); +int drm_gem_vram_vmap(struct drm_gem_vram_object *gbo, struct iosys_map *map); +void drm_gem_vram_vunmap(struct drm_gem_vram_object *gbo, + struct iosys_map *map); int drm_gem_vram_fill_create_dumb(struct drm_file *file, struct drm_device *dev, diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h index 54f2c58305d2..2a1d01e5b56b 100644 --- a/include/drm/drm_prime.h +++ b/include/drm/drm_prime.h @@ -54,7 +54,7 @@ struct device; struct dma_buf_export_info; struct dma_buf; struct dma_buf_attachment; -struct dma_buf_map; +struct iosys_map; enum dma_data_direction; @@ -83,8 +83,8 @@ struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach, void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir); -int drm_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct dma_buf_map *map); -void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, struct dma_buf_map *map); +int drm_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct iosys_map *map); +void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map); int drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma); diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index c17b2df9178b..155b19ee12fb 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -47,7 +47,7 @@ struct ttm_global; struct ttm_device; -struct dma_buf_map; +struct iosys_map; struct drm_mm_node; @@ -481,17 +481,17 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map); * ttm_bo_vmap * * @bo: The buffer object. - * @map: pointer to a struct dma_buf_map representing the map. + * @map: pointer to a struct iosys_map representing the map. * * Sets up a kernel virtual mapping, using ioremap or vmap to the * data in the buffer object. The parameter @map returns the virtual - * address as struct dma_buf_map. Unmap the buffer with ttm_bo_vunmap(). + * address as struct iosys_map. Unmap the buffer with ttm_bo_vunmap(). * * Returns * -ENOMEM: Out of memory. * -EINVAL: Invalid range. */ -int ttm_bo_vmap(struct ttm_buffer_object *bo, struct dma_buf_map *map); +int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map); /** * ttm_bo_vunmap @@ -501,7 +501,7 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct dma_buf_map *map); * * Unmaps a kernel map set up by ttm_bo_vmap(). */ -void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct dma_buf_map *map); +void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct iosys_map *map); /** * ttm_bo_mmap_obj - mmap memory backed by a ttm buffer object. diff --git a/include/drm/ttm/ttm_kmap_iter.h b/include/drm/ttm/ttm_kmap_iter.h index 8bb00fd39d6c..cc5c09a211b4 100644 --- a/include/drm/ttm/ttm_kmap_iter.h +++ b/include/drm/ttm/ttm_kmap_iter.h @@ -8,7 +8,7 @@ #include struct ttm_kmap_iter; -struct dma_buf_map; +struct iosys_map; /** * struct ttm_kmap_iter_ops - Ops structure for a struct @@ -24,22 +24,22 @@ struct ttm_kmap_iter_ops { * kmap_local semantics. * @res_iter: Pointer to the struct ttm_kmap_iter representing * the resource. - * @dmap: The struct dma_buf_map holding the virtual address after + * @dmap: The struct iosys_map holding the virtual address after * the operation. * @i: The location within the resource to map. PAGE_SIZE granularity. */ void (*map_local)(struct ttm_kmap_iter *res_iter, - struct dma_buf_map *dmap, pgoff_t i); + struct iosys_map *dmap, pgoff_t i); /** * unmap_local() - Unmap a PAGE_SIZE part of the resource previously * mapped using kmap_local. * @res_iter: Pointer to the struct ttm_kmap_iter representing * the resource. - * @dmap: The struct dma_buf_map holding the virtual address after + * @dmap: The struct iosys_map holding the virtual address after * the operation. */ void (*unmap_local)(struct ttm_kmap_iter *res_iter, - struct dma_buf_map *dmap); + struct iosys_map *dmap); bool maps_tt; }; diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 5952051091cd..6eae09e382d2 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include #include @@ -41,7 +41,7 @@ struct ttm_resource; struct ttm_place; struct ttm_buffer_object; struct ttm_placement; -struct dma_buf_map; +struct iosys_map; struct io_mapping; struct sg_table; struct scatterlist; @@ -207,7 +207,7 @@ struct ttm_kmap_iter_iomap { */ struct ttm_kmap_iter_linear_io { struct ttm_kmap_iter base; - struct dma_buf_map dmap; + struct iosys_map dmap; bool needs_unmap; }; diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h deleted file mode 100644 index 278d489e4bdd..000000000000 --- a/include/linux/dma-buf-map.h +++ /dev/null @@ -1,266 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Pointer to dma-buf-mapped memory, plus helpers. - */ - -#ifndef __DMA_BUF_MAP_H__ -#define __DMA_BUF_MAP_H__ - -#include -#include - -/** - * DOC: overview - * - * Calling dma-buf's vmap operation returns a pointer to the buffer's memory. - * Depending on the location of the buffer, users may have to access it with - * I/O operations or memory load/store operations. For example, copying to - * system memory could be done with memcpy(), copying to I/O memory would be - * done with memcpy_toio(). - * - * .. code-block:: c - * - * void *vaddr = ...; // pointer to system memory - * memcpy(vaddr, src, len); - * - * void *vaddr_iomem = ...; // pointer to I/O memory - * memcpy_toio(vaddr, _iomem, src, len); - * - * When using dma-buf's vmap operation, the returned pointer is encoded as - * :c:type:`struct dma_buf_map `. - * :c:type:`struct dma_buf_map ` stores the buffer's address in - * system or I/O memory and a flag that signals the required method of - * accessing the buffer. Use the returned instance and the helper functions - * to access the buffer's memory in the correct way. - * - * The type :c:type:`struct dma_buf_map ` and its helpers are - * actually independent from the dma-buf infrastructure. When sharing buffers - * among devices, drivers have to know the location of the memory to access - * the buffers in a safe way. :c:type:`struct dma_buf_map ` - * solves this problem for dma-buf and its users. If other drivers or - * sub-systems require similar functionality, the type could be generalized - * and moved to a more prominent header file. - * - * Open-coding access to :c:type:`struct dma_buf_map ` is - * considered bad style. Rather then accessing its fields directly, use one - * of the provided helper functions, or implement your own. For example, - * instances of :c:type:`struct dma_buf_map ` can be initialized - * statically with DMA_BUF_MAP_INIT_VADDR(), or at runtime with - * dma_buf_map_set_vaddr(). These helpers will set an address in system memory. - * - * .. code-block:: c - * - * struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(0xdeadbeaf); - * - * dma_buf_map_set_vaddr(&map. 0xdeadbeaf); - * - * To set an address in I/O memory, use dma_buf_map_set_vaddr_iomem(). - * - * .. code-block:: c - * - * dma_buf_map_set_vaddr_iomem(&map. 0xdeadbeaf); - * - * Instances of struct dma_buf_map do not have to be cleaned up, but - * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings - * always refer to system memory. - * - * .. code-block:: c - * - * dma_buf_map_clear(&map); - * - * Test if a mapping is valid with either dma_buf_map_is_set() or - * dma_buf_map_is_null(). - * - * .. code-block:: c - * - * if (dma_buf_map_is_set(&map) != dma_buf_map_is_null(&map)) - * // always true - * - * Instances of :c:type:`struct dma_buf_map ` can be compared - * for equality with dma_buf_map_is_equal(). Mappings the point to different - * memory spaces, system or I/O, are never equal. That's even true if both - * spaces are located in the same address space, both mappings contain the - * same address value, or both mappings refer to NULL. - * - * .. code-block:: c - * - * struct dma_buf_map sys_map; // refers to system memory - * struct dma_buf_map io_map; // refers to I/O memory - * - * if (dma_buf_map_is_equal(&sys_map, &io_map)) - * // always false - * - * A set up instance of struct dma_buf_map can be used to access or manipulate - * the buffer memory. Depending on the location of the memory, the provided - * helpers will pick the correct operations. Data can be copied into the memory - * with dma_buf_map_memcpy_to(). The address can be manipulated with - * dma_buf_map_incr(). - * - * .. code-block:: c - * - * const void *src = ...; // source buffer - * size_t len = ...; // length of src - * - * dma_buf_map_memcpy_to(&map, src, len); - * dma_buf_map_incr(&map, len); // go to first byte after the memcpy - */ - -/** - * struct dma_buf_map - Pointer to vmap'ed dma-buf memory. - * @vaddr_iomem: The buffer's address if in I/O memory - * @vaddr: The buffer's address if in system memory - * @is_iomem: True if the dma-buf memory is located in I/O - * memory, or false otherwise. - */ -struct dma_buf_map { - union { - void __iomem *vaddr_iomem; - void *vaddr; - }; - bool is_iomem; -}; - -/** - * DMA_BUF_MAP_INIT_VADDR - Initializes struct dma_buf_map to an address in system memory - * @vaddr_: A system-memory address - */ -#define DMA_BUF_MAP_INIT_VADDR(vaddr_) \ - { \ - .vaddr = (vaddr_), \ - .is_iomem = false, \ - } - -/** - * dma_buf_map_set_vaddr - Sets a dma-buf mapping structure to an address in system memory - * @map: The dma-buf mapping structure - * @vaddr: A system-memory address - * - * Sets the address and clears the I/O-memory flag. - */ -static inline void dma_buf_map_set_vaddr(struct dma_buf_map *map, void *vaddr) -{ - map->vaddr = vaddr; - map->is_iomem = false; -} - -/** - * dma_buf_map_set_vaddr_iomem - Sets a dma-buf mapping structure to an address in I/O memory - * @map: The dma-buf mapping structure - * @vaddr_iomem: An I/O-memory address - * - * Sets the address and the I/O-memory flag. - */ -static inline void dma_buf_map_set_vaddr_iomem(struct dma_buf_map *map, - void __iomem *vaddr_iomem) -{ - map->vaddr_iomem = vaddr_iomem; - map->is_iomem = true; -} - -/** - * dma_buf_map_is_equal - Compares two dma-buf mapping structures for equality - * @lhs: The dma-buf mapping structure - * @rhs: A dma-buf mapping structure to compare with - * - * Two dma-buf mapping structures are equal if they both refer to the same type of memory - * and to the same address within that memory. - * - * Returns: - * True is both structures are equal, or false otherwise. - */ -static inline bool dma_buf_map_is_equal(const struct dma_buf_map *lhs, - const struct dma_buf_map *rhs) -{ - if (lhs->is_iomem != rhs->is_iomem) - return false; - else if (lhs->is_iomem) - return lhs->vaddr_iomem == rhs->vaddr_iomem; - else - return lhs->vaddr == rhs->vaddr; -} - -/** - * dma_buf_map_is_null - Tests for a dma-buf mapping to be NULL - * @map: The dma-buf mapping structure - * - * Depending on the state of struct dma_buf_map.is_iomem, tests if the - * mapping is NULL. - * - * Returns: - * True if the mapping is NULL, or false otherwise. - */ -static inline bool dma_buf_map_is_null(const struct dma_buf_map *map) -{ - if (map->is_iomem) - return !map->vaddr_iomem; - return !map->vaddr; -} - -/** - * dma_buf_map_is_set - Tests is the dma-buf mapping has been set - * @map: The dma-buf mapping structure - * - * Depending on the state of struct dma_buf_map.is_iomem, tests if the - * mapping has been set. - * - * Returns: - * True if the mapping is been set, or false otherwise. - */ -static inline bool dma_buf_map_is_set(const struct dma_buf_map *map) -{ - return !dma_buf_map_is_null(map); -} - -/** - * dma_buf_map_clear - Clears a dma-buf mapping structure - * @map: The dma-buf mapping structure - * - * Clears all fields to zero; including struct dma_buf_map.is_iomem. So - * mapping structures that were set to point to I/O memory are reset for - * system memory. Pointers are cleared to NULL. This is the default. - */ -static inline void dma_buf_map_clear(struct dma_buf_map *map) -{ - if (map->is_iomem) { - map->vaddr_iomem = NULL; - map->is_iomem = false; - } else { - map->vaddr = NULL; - } -} - -/** - * dma_buf_map_memcpy_to - Memcpy into dma-buf mapping - * @dst: The dma-buf mapping structure - * @src: The source buffer - * @len: The number of byte in src - * - * Copies data into a dma-buf mapping. The source buffer is in system - * memory. Depending on the buffer's location, the helper picks the correct - * method of accessing the memory. - */ -static inline void dma_buf_map_memcpy_to(struct dma_buf_map *dst, const void *src, size_t len) -{ - if (dst->is_iomem) - memcpy_toio(dst->vaddr_iomem, src, len); - else - memcpy(dst->vaddr, src, len); -} - -/** - * dma_buf_map_incr - Increments the address stored in a dma-buf mapping - * @map: The dma-buf mapping structure - * @incr: The number of bytes to increment - * - * Increments the address stored in a dma-buf mapping. Depending on the - * buffer's location, the correct value will be updated. - */ -static inline void dma_buf_map_incr(struct dma_buf_map *map, size_t incr) -{ - if (map->is_iomem) - map->vaddr_iomem += incr; - else - map->vaddr += incr; -} - -#endif /* __DMA_BUF_MAP_H__ */ diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 7ab50076e7a6..2097760e8e95 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -13,7 +13,7 @@ #ifndef __DMA_BUF_H__ #define __DMA_BUF_H__ -#include +#include #include #include #include @@ -283,8 +283,8 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma); - int (*vmap)(struct dma_buf *dmabuf, struct dma_buf_map *map); - void (*vunmap)(struct dma_buf *dmabuf, struct dma_buf_map *map); + int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); + void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map); }; /** @@ -347,7 +347,7 @@ struct dma_buf { * @vmap_ptr: * The current vmap ptr if @vmapping_counter > 0. Protected by @lock. */ - struct dma_buf_map vmap_ptr; + struct iosys_map vmap_ptr; /** * @exp_name: @@ -628,6 +628,6 @@ int dma_buf_end_cpu_access(struct dma_buf *dma_buf, int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, unsigned long); -int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map); -void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map); +int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map); +void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map); #endif /* __DMA_BUF_H__ */ diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h new file mode 100644 index 000000000000..f4186f91caa6 --- /dev/null +++ b/include/linux/iosys-map.h @@ -0,0 +1,257 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Pointer abstraction for IO/system memory + */ + +#ifndef __IOSYS_MAP_H__ +#define __IOSYS_MAP_H__ + +#include +#include + +/** + * DOC: overview + * + * When accessing a memory region, depending on its location, users may have to + * access it with I/O operations or memory load/store operations. For example, + * copying to system memory could be done with memcpy(), copying to I/O memory + * would be done with memcpy_toio(). + * + * .. code-block:: c + * + * void *vaddr = ...; // pointer to system memory + * memcpy(vaddr, src, len); + * + * void *vaddr_iomem = ...; // pointer to I/O memory + * memcpy_toio(vaddr, _iomem, src, len); + * + * The user of such pointer may not have information about the mapping of that + * region or may want to have a single code path to handle operations on that + * buffer, regardless if it's located in system or IO memory. The type + * :c:type:`struct iosys_map ` and its helpers abstract that so the + * buffer can be passed around to other drivers or have separate duties inside + * the same driver for allocation, read and write operations. + * + * Open-coding access to :c:type:`struct iosys_map ` is considered + * bad style. Rather then accessing its fields directly, use one of the provided + * helper functions, or implement your own. For example, instances of + * :c:type:`struct iosys_map ` can be initialized statically with + * IOSYS_MAP_INIT_VADDR(), or at runtime with iosys_map_set_vaddr(). These + * helpers will set an address in system memory. + * + * .. code-block:: c + * + * struct iosys_map map = IOSYS_MAP_INIT_VADDR(0xdeadbeaf); + * + * iosys_map_set_vaddr(&map, 0xdeadbeaf); + * + * To set an address in I/O memory, use iosys_map_set_vaddr_iomem(). + * + * .. code-block:: c + * + * iosys_map_set_vaddr_iomem(&map, 0xdeadbeaf); + * + * Instances of struct iosys_map do not have to be cleaned up, but + * can be cleared to NULL with iosys_map_clear(). Cleared mappings + * always refer to system memory. + * + * .. code-block:: c + * + * iosys_map_clear(&map); + * + * Test if a mapping is valid with either iosys_map_is_set() or + * iosys_map_is_null(). + * + * .. code-block:: c + * + * if (iosys_map_is_set(&map) != iosys_map_is_null(&map)) + * // always true + * + * Instances of :c:type:`struct iosys_map ` can be compared for + * equality with iosys_map_is_equal(). Mappings that point to different memory + * spaces, system or I/O, are never equal. That's even true if both spaces are + * located in the same address space, both mappings contain the same address + * value, or both mappings refer to NULL. + * + * .. code-block:: c + * + * struct iosys_map sys_map; // refers to system memory + * struct iosys_map io_map; // refers to I/O memory + * + * if (iosys_map_is_equal(&sys_map, &io_map)) + * // always false + * + * A set up instance of struct iosys_map can be used to access or manipulate the + * buffer memory. Depending on the location of the memory, the provided helpers + * will pick the correct operations. Data can be copied into the memory with + * iosys_map_memcpy_to(). The address can be manipulated with iosys_map_incr(). + * + * .. code-block:: c + * + * const void *src = ...; // source buffer + * size_t len = ...; // length of src + * + * iosys_map_memcpy_to(&map, src, len); + * iosys_map_incr(&map, len); // go to first byte after the memcpy + */ + +/** + * struct iosys_map - Pointer to IO/system memory + * @vaddr_iomem: The buffer's address if in I/O memory + * @vaddr: The buffer's address if in system memory + * @is_iomem: True if the buffer is located in I/O memory, or false + * otherwise. + */ +struct iosys_map { + union { + void __iomem *vaddr_iomem; + void *vaddr; + }; + bool is_iomem; +}; + +/** + * IOSYS_MAP_INIT_VADDR - Initializes struct iosys_map to an address in system memory + * @vaddr_: A system-memory address + */ +#define IOSYS_MAP_INIT_VADDR(vaddr_) \ + { \ + .vaddr = (vaddr_), \ + .is_iomem = false, \ + } + +/** + * iosys_map_set_vaddr - Sets a iosys mapping structure to an address in system memory + * @map: The iosys_map structure + * @vaddr: A system-memory address + * + * Sets the address and clears the I/O-memory flag. + */ +static inline void iosys_map_set_vaddr(struct iosys_map *map, void *vaddr) +{ + map->vaddr = vaddr; + map->is_iomem = false; +} + +/** + * iosys_map_set_vaddr_iomem - Sets a iosys mapping structure to an address in I/O memory + * @map: The iosys_map structure + * @vaddr_iomem: An I/O-memory address + * + * Sets the address and the I/O-memory flag. + */ +static inline void iosys_map_set_vaddr_iomem(struct iosys_map *map, + void __iomem *vaddr_iomem) +{ + map->vaddr_iomem = vaddr_iomem; + map->is_iomem = true; +} + +/** + * iosys_map_is_equal - Compares two iosys mapping structures for equality + * @lhs: The iosys_map structure + * @rhs: A iosys_map structure to compare with + * + * Two iosys mapping structures are equal if they both refer to the same type of memory + * and to the same address within that memory. + * + * Returns: + * True is both structures are equal, or false otherwise. + */ +static inline bool iosys_map_is_equal(const struct iosys_map *lhs, + const struct iosys_map *rhs) +{ + if (lhs->is_iomem != rhs->is_iomem) + return false; + else if (lhs->is_iomem) + return lhs->vaddr_iomem == rhs->vaddr_iomem; + else + return lhs->vaddr == rhs->vaddr; +} + +/** + * iosys_map_is_null - Tests for a iosys mapping to be NULL + * @map: The iosys_map structure + * + * Depending on the state of struct iosys_map.is_iomem, tests if the + * mapping is NULL. + * + * Returns: + * True if the mapping is NULL, or false otherwise. + */ +static inline bool iosys_map_is_null(const struct iosys_map *map) +{ + if (map->is_iomem) + return !map->vaddr_iomem; + return !map->vaddr; +} + +/** + * iosys_map_is_set - Tests if the iosys mapping has been set + * @map: The iosys_map structure + * + * Depending on the state of struct iosys_map.is_iomem, tests if the + * mapping has been set. + * + * Returns: + * True if the mapping is been set, or false otherwise. + */ +static inline bool iosys_map_is_set(const struct iosys_map *map) +{ + return !iosys_map_is_null(map); +} + +/** + * iosys_map_clear - Clears a iosys mapping structure + * @map: The iosys_map structure + * + * Clears all fields to zero, including struct iosys_map.is_iomem, so + * mapping structures that were set to point to I/O memory are reset for + * system memory. Pointers are cleared to NULL. This is the default. + */ +static inline void iosys_map_clear(struct iosys_map *map) +{ + if (map->is_iomem) { + map->vaddr_iomem = NULL; + map->is_iomem = false; + } else { + map->vaddr = NULL; + } +} + +/** + * iosys_map_memcpy_to - Memcpy into iosys mapping + * @dst: The iosys_map structure + * @src: The source buffer + * @len: The number of byte in src + * + * Copies data into a iosys mapping. The source buffer is in system + * memory. Depending on the buffer's location, the helper picks the correct + * method of accessing the memory. + */ +static inline void iosys_map_memcpy_to(struct iosys_map *dst, const void *src, + size_t len) +{ + if (dst->is_iomem) + memcpy_toio(dst->vaddr_iomem, src, len); + else + memcpy(dst->vaddr, src, len); +} + +/** + * iosys_map_incr - Increments the address stored in a iosys mapping + * @map: The iosys_map structure + * @incr: The number of bytes to increment + * + * Increments the address stored in a iosys mapping. Depending on the + * buffer's location, the correct value will be updated. + */ +static inline void iosys_map_incr(struct iosys_map *map, size_t incr) +{ + if (map->is_iomem) + map->vaddr_iomem += incr; + else + map->vaddr += incr; +} + +#endif /* __IOSYS_MAP_H__ */ -- cgit v1.2.3 From 3486bedd99196ecdfe99c0ab5b67ad3c47e8a8fa Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 4 Feb 2022 10:57:35 -0800 Subject: bpf: Use bytes instead of pages for bpf_jit_[charge|uncharge]_modmem This enables sub-page memory charge and allocation. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220204185742.271030-3-song@kernel.org --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6eb0b180d33b..366f88afd56b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -846,8 +846,8 @@ void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); -int bpf_jit_charge_modmem(u32 pages); -void bpf_jit_uncharge_modmem(u32 pages); +int bpf_jit_charge_modmem(u32 size); +void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, -- cgit v1.2.3 From ed2d9e1a26cca963ff5ed3b76326d70f7d8201a9 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 4 Feb 2022 10:57:36 -0800 Subject: bpf: Use size instead of pages in bpf_binary_header This is necessary to charge sub page memory for the BPF program. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220204185742.271030-4-song@kernel.org --- include/linux/filter.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index d23e999dc032..5855eb474c62 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -548,7 +548,7 @@ struct sock_fprog_kern { #define BPF_IMAGE_ALIGNMENT 8 struct bpf_binary_header { - u32 pages; + u32 size; u8 image[] __aligned(BPF_IMAGE_ALIGNMENT); }; @@ -886,8 +886,8 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp) static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { set_vm_flush_reset_perms(hdr); - set_memory_ro((unsigned long)hdr, hdr->pages); - set_memory_x((unsigned long)hdr, hdr->pages); + set_memory_ro((unsigned long)hdr, hdr->size >> PAGE_SHIFT); + set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT); } static inline struct bpf_binary_header * -- cgit v1.2.3 From ebc1415d9b4f043cef5a1fb002ec316e32167e7a Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 4 Feb 2022 10:57:39 -0800 Subject: bpf: Introduce bpf_arch_text_copy This will be used to copy JITed text to RO protected module memory. On x86, bpf_arch_text_copy is implemented with text_poke_copy. bpf_arch_text_copy returns pointer to dst on success, and ERR_PTR(errno) on errors. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220204185742.271030-7-song@kernel.org --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 366f88afd56b..ea0d7fd4a410 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2362,6 +2362,8 @@ enum bpf_text_poke_type { int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); +void *bpf_arch_text_copy(void *dst, void *src, size_t len); + struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); -- cgit v1.2.3 From 33c9805860e584b194199cab1a1e81f4e6395408 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 4 Feb 2022 10:57:41 -0800 Subject: bpf: Introduce bpf_jit_binary_pack_[alloc|finalize|free] This is the jit binary allocator built on top of bpf_prog_pack. bpf_prog_pack allocates RO memory, which cannot be used directly by the JIT engine. Therefore, a temporary rw buffer is allocated for the JIT engine. Once JIT is done, bpf_jit_binary_pack_finalize is used to copy the program to the RO memory. bpf_jit_binary_pack_alloc reserves 16 bytes of extra space for illegal instructions, which is small than the 128 bytes space reserved by bpf_jit_binary_alloc. This change is necessary for bpf_jit_binary_hdr to find the correct header. Also, flag use_bpf_prog_pack is added to differentiate a program allocated by bpf_jit_binary_pack_alloc. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220204185742.271030-9-song@kernel.org --- include/linux/bpf.h | 1 + include/linux/filter.h | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ea0d7fd4a410..2fc7e5c5ef41 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -953,6 +953,7 @@ struct bpf_prog_aux { bool sleepable; bool tail_call_reachable; bool xdp_has_frags; + bool use_bpf_prog_pack; struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; diff --git a/include/linux/filter.h b/include/linux/filter.h index 5855eb474c62..1cb1af917617 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -890,15 +890,6 @@ static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT); } -static inline struct bpf_binary_header * -bpf_jit_binary_hdr(const struct bpf_prog *fp) -{ - unsigned long real_start = (unsigned long)fp->bpf_func; - unsigned long addr = real_start & PAGE_MASK; - - return (void *)addr; -} - int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { @@ -1068,6 +1059,18 @@ void *bpf_jit_alloc_exec(unsigned long size); void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp); +struct bpf_binary_header * +bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image, + unsigned int alignment, + struct bpf_binary_header **rw_hdr, + u8 **rw_image, + bpf_jit_fill_hole_t bpf_fill_ill_insns); +int bpf_jit_binary_pack_finalize(struct bpf_prog *prog, + struct bpf_binary_header *ro_header, + struct bpf_binary_header *rw_header); +void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, + struct bpf_binary_header *rw_header); + int bpf_jit_add_poke_descriptor(struct bpf_prog *prog, struct bpf_jit_poke_descriptor *poke); -- cgit v1.2.3 From a410a0cf98854a698a519bfbeb604145da384c0e Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 4 Feb 2022 14:58:11 +0100 Subject: ipv6: Define dscp_t and stop taking ECN bits into account in fib6-rules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define a dscp_t type and its appropriate helpers that ensure ECN bits are not taken into account when handling DSCP. Use this new type to replace the tclass field of struct fib6_rule, so that fib6-rules don't get influenced by ECN bits anymore. Before this patch, fib6-rules didn't make any distinction between the DSCP and ECN bits. Therefore, rules specifying a DSCP (tos or dsfield options in iproute2) stopped working as soon a packets had at least one of its ECN bits set (as a work around one could create four rules for each DSCP value to match, one for each possible ECN value). After this patch fib6-rules only compare the DSCP bits. ECN doesn't influence the result anymore. Also, fib6-rules now must have the ECN bits cleared or they will be rejected. Signed-off-by: Guillaume Nault Acked-by: David Ahern Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Jakub Kicinski --- include/net/inet_dscp.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ include/net/ipv6.h | 6 ++++++ 2 files changed, 63 insertions(+) create mode 100644 include/net/inet_dscp.h (limited to 'include') diff --git a/include/net/inet_dscp.h b/include/net/inet_dscp.h new file mode 100644 index 000000000000..72f250dffada --- /dev/null +++ b/include/net/inet_dscp.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * inet_dscp.h: helpers for handling differentiated services codepoints (DSCP) + * + * DSCP is defined in RFC 2474: + * + * 0 1 2 3 4 5 6 7 + * +---+---+---+---+---+---+---+---+ + * | DSCP | CU | + * +---+---+---+---+---+---+---+---+ + * + * DSCP: differentiated services codepoint + * CU: currently unused + * + * The whole DSCP + CU bits form the DS field. + * The DS field is also commonly called TOS or Traffic Class (for IPv6). + * + * Note: the CU bits are now used for Explicit Congestion Notification + * (RFC 3168). + */ + +#ifndef _INET_DSCP_H +#define _INET_DSCP_H + +#include + +/* Special type for storing DSCP values. + * + * A dscp_t variable stores a DS field with the CU (ECN) bits cleared. + * Using dscp_t allows to strictly separate DSCP and ECN bits, thus avoiding + * bugs where ECN bits are erroneously taken into account during FIB lookups + * or policy routing. + * + * Note: to get the real DSCP value contained in a dscp_t variable one would + * have to do a bit shift after calling inet_dscp_to_dsfield(). We could have + * a helper for that, but there's currently no users. + */ +typedef u8 __bitwise dscp_t; + +#define INET_DSCP_MASK 0xfc + +static inline dscp_t inet_dsfield_to_dscp(__u8 dsfield) +{ + return (__force dscp_t)(dsfield & INET_DSCP_MASK); +} + +static inline __u8 inet_dscp_to_dsfield(dscp_t dscp) +{ + return (__force __u8)dscp; +} + +static inline bool inet_validate_dscp(__u8 val) +{ + return !(val & ~INET_DSCP_MASK); +} + +#endif /* _INET_DSCP_H */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index cda1f205f391..f693784e1419 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -974,6 +975,11 @@ static inline u8 ip6_tclass(__be32 flowinfo) return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT; } +static inline dscp_t ip6_dscp(__be32 flowinfo) +{ + return inet_dsfield_to_dscp(ip6_tclass(flowinfo)); +} + static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel) { return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel; -- cgit v1.2.3 From f55fbb6afb8d701e3185e31e73f5ea9503a66744 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 4 Feb 2022 14:58:16 +0100 Subject: ipv4: Reject routes specifying ECN bits in rtm_tos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the new dscp_t type to replace the fc_tos field of fib_config, to ensure IPv4 routes aren't influenced by ECN bits when configured with non-zero rtm_tos. Before this patch, IPv4 routes specifying an rtm_tos with some of the ECN bits set were accepted. However they wouldn't work (never match) as IPv4 normally clears the ECN bits with IPTOS_RT_MASK before doing a FIB lookup (although a few buggy code paths don't). After this patch, IPv4 routes specifying an rtm_tos with any ECN bit set is rejected. Note: IPv6 routes ignore rtm_tos altogether, any rtm_tos is accepted, but treated as if it were 0. Signed-off-by: Guillaume Nault Acked-by: David Ahern Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Jakub Kicinski --- include/net/ip_fib.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c4297704bbcb..6a82bcb8813b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -24,7 +25,7 @@ struct fib_config { u8 fc_dst_len; - u8 fc_tos; + dscp_t fc_dscp; u8 fc_protocol; u8 fc_scope; u8 fc_type; -- cgit v1.2.3 From b84b6ec0f9767c82d0cfe5b2ac2c41de6b3d37e8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 3 Feb 2022 21:29:25 +0100 Subject: scsi: core: Add scsi_done_direct() for immediate completion Add scsi_done_direct() which behaves like scsi_done() except that it invokes blk_mq_complete_request_direct() in order to complete the request. Callers from process context can complete the request directly instead waking ksoftirqd. Link: https://lore.kernel.org/r/Yfw7JaszshmfYa1d@flow Reviewed-by: Bart Van Assche Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Martin K. Petersen --- include/scsi/scsi_cmnd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 6794d7322cbd..ff1c4b51f7ae 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -168,6 +168,7 @@ static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd) } void scsi_done(struct scsi_cmnd *cmd); +void scsi_done_direct(struct scsi_cmnd *cmd); extern void scsi_finish_command(struct scsi_cmnd *cmd); -- cgit v1.2.3 From 5913eb45d036288babb67e97b210233537ef7b90 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Mon, 24 Jan 2022 22:10:04 +1000 Subject: mfd: simple-mfd-i2c: Enable support for the silergy,sy7636a Signed-off-by: Alistair Francis Signed-off-by: Lee Jones --- include/linux/mfd/sy7636a.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 include/linux/mfd/sy7636a.h (limited to 'include') diff --git a/include/linux/mfd/sy7636a.h b/include/linux/mfd/sy7636a.h new file mode 100644 index 000000000000..22f03b2f851e --- /dev/null +++ b/include/linux/mfd/sy7636a.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Functions to access SY3686A power management chip. + * + * Copyright (C) 2021 reMarkable AS - http://www.remarkable.com/ + */ + +#ifndef __MFD_SY7636A_H +#define __MFD_SY7636A_H + +#define SY7636A_REG_OPERATION_MODE_CRL 0x00 +/* It is set if a gpio is used to control the regulator */ +#define SY7636A_OPERATION_MODE_CRL_VCOMCTL BIT(6) +#define SY7636A_OPERATION_MODE_CRL_ONOFF BIT(7) +#define SY7636A_REG_VCOM_ADJUST_CTRL_L 0x01 +#define SY7636A_REG_VCOM_ADJUST_CTRL_H 0x02 +#define SY7636A_REG_VCOM_ADJUST_CTRL_MASK 0x01ff +#define SY7636A_REG_VLDO_VOLTAGE_ADJULST_CTRL 0x03 +#define SY7636A_REG_POWER_ON_DELAY_TIME 0x06 +#define SY7636A_REG_FAULT_FLAG 0x07 +#define SY7636A_FAULT_FLAG_PG BIT(0) +#define SY7636A_REG_TERMISTOR_READOUT 0x08 + +#define SY7636A_REG_MAX 0x08 + +#define VCOM_ADJUST_CTRL_MASK 0x1ff +// Used to shift the high byte +#define VCOM_ADJUST_CTRL_SHIFT 8 +// Used to scale from VCOM_ADJUST_CTRL to mv +#define VCOM_ADJUST_CTRL_SCAL 10000 + +#define FAULT_FLAG_SHIFT 1 + +#endif /* __LINUX_MFD_SY7636A_H */ -- cgit v1.2.3 From fac608138c6136126faadafa5554cc0bbabf3c44 Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Mon, 7 Feb 2022 02:27:18 -0800 Subject: VMCI: dma dg: whitespace formatting change for vmci register defines Update formatting of existing register defines in preparation for adding additional register definitions for the VMCI device. Reviewed-by: Vishnu Dasa Signed-off-by: Jorgen Hansen Link: https://lore.kernel.org/r/20220207102725.2742-2-jhansen@vmware.com Signed-off-by: Greg Kroah-Hartman --- include/linux/vmw_vmci_defs.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index e36cb114c188..9911ecfc18ba 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -12,15 +12,15 @@ #include /* Register offsets. */ -#define VMCI_STATUS_ADDR 0x00 -#define VMCI_CONTROL_ADDR 0x04 -#define VMCI_ICR_ADDR 0x08 -#define VMCI_IMR_ADDR 0x0c -#define VMCI_DATA_OUT_ADDR 0x10 -#define VMCI_DATA_IN_ADDR 0x14 -#define VMCI_CAPS_ADDR 0x18 -#define VMCI_RESULT_LOW_ADDR 0x1c -#define VMCI_RESULT_HIGH_ADDR 0x20 +#define VMCI_STATUS_ADDR 0x00 +#define VMCI_CONTROL_ADDR 0x04 +#define VMCI_ICR_ADDR 0x08 +#define VMCI_IMR_ADDR 0x0c +#define VMCI_DATA_OUT_ADDR 0x10 +#define VMCI_DATA_IN_ADDR 0x14 +#define VMCI_CAPS_ADDR 0x18 +#define VMCI_RESULT_LOW_ADDR 0x1c +#define VMCI_RESULT_HIGH_ADDR 0x20 /* Max number of devices. */ #define VMCI_MAX_DEVICES 1 -- cgit v1.2.3 From e283a0e8b7ea83915e988ed059384af166b444c0 Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Mon, 7 Feb 2022 02:27:19 -0800 Subject: VMCI: dma dg: add MMIO access to registers Detect the support for MMIO access through examination of the length of the region requested in BAR1. If it is 256KB, the VMCI device supports MMIO access to registers. If MMIO access is supported, map the area of the region used for MMIO access (64KB size at offset 128KB). Add wrapper functions for accessing 32 bit register accesses through either MMIO or IO ports based on device configuration. Sending and receiving datagrams through iowrite8_rep/ioread8_rep is left unchanged for now, and will be addressed in a later change. Reviewed-by: Vishnu Dasa Signed-off-by: Jorgen Hansen Link: https://lore.kernel.org/r/20220207102725.2742-3-jhansen@vmware.com Signed-off-by: Greg Kroah-Hartman --- include/linux/vmw_vmci_defs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 9911ecfc18ba..8fc00e2685cf 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -82,6 +82,18 @@ enum { */ #define VMCI_MAX_PINNED_QP_MEMORY ((size_t)(32 * 1024)) +/* + * The version of the VMCI device that supports MMIO access to registers + * requests 256KB for BAR1 whereas the version of VMCI that supports + * MSI/MSI-X only requests 8KB. The layout of the larger 256KB region is: + * - the first 128KB are used for MSI/MSI-X. + * - the following 64KB are used for MMIO register access. + * - the remaining 64KB are unused. + */ +#define VMCI_WITH_MMIO_ACCESS_BAR_SIZE ((size_t)(256 * 1024)) +#define VMCI_MMIO_ACCESS_OFFSET ((size_t)(128 * 1024)) +#define VMCI_MMIO_ACCESS_SIZE ((size_t)(64 * 1024)) + /* * We have a fixed set of resource IDs available in the VMX. * This allows us to have a very simple implementation since we statically -- cgit v1.2.3 From eed2298d936087a1c85e0fa6f7170028e4f4fded Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Mon, 7 Feb 2022 02:27:20 -0800 Subject: VMCI: dma dg: detect DMA datagram capability Detect the VMCI DMA datagram capability, and if present, ack it to the device. Reviewed-by: Vishnu Dasa Signed-off-by: Jorgen Hansen Link: https://lore.kernel.org/r/20220207102725.2742-4-jhansen@vmware.com Signed-off-by: Greg Kroah-Hartman --- include/linux/vmw_vmci_defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 8fc00e2685cf..1ce2cffdc3ae 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -39,6 +39,7 @@ #define VMCI_CAPS_DATAGRAM BIT(2) #define VMCI_CAPS_NOTIFICATIONS BIT(3) #define VMCI_CAPS_PPN64 BIT(4) +#define VMCI_CAPS_DMA_DATAGRAM BIT(5) /* Interrupt Cause register bits. */ #define VMCI_ICR_DATAGRAM BIT(0) -- cgit v1.2.3 From 8cb520bea1470ca205980fbf030ed1f472f4af2f Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Mon, 7 Feb 2022 02:27:21 -0800 Subject: VMCI: dma dg: set OS page size Tell the device the page size used by the OS. Reviewed-by: Vishnu Dasa Signed-off-by: Jorgen Hansen Link: https://lore.kernel.org/r/20220207102725.2742-5-jhansen@vmware.com Signed-off-by: Greg Kroah-Hartman --- include/linux/vmw_vmci_defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 1ce2cffdc3ae..4167779469fd 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -21,6 +21,7 @@ #define VMCI_CAPS_ADDR 0x18 #define VMCI_RESULT_LOW_ADDR 0x1c #define VMCI_RESULT_HIGH_ADDR 0x20 +#define VMCI_GUEST_PAGE_SHIFT 0x34 /* Max number of devices. */ #define VMCI_MAX_DEVICES 1 -- cgit v1.2.3 From cc68f2177fcbfe2dbe5e9514789b96ba5995ec1e Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Mon, 7 Feb 2022 02:27:22 -0800 Subject: VMCI: dma dg: register dummy IRQ handlers for DMA datagrams Register dummy interrupt handlers for DMA datagrams in preparation for DMA datagram receive operations. Reviewed-by: Vishnu Dasa Signed-off-by: Jorgen Hansen Link: https://lore.kernel.org/r/20220207102725.2742-6-jhansen@vmware.com Signed-off-by: Greg Kroah-Hartman --- include/linux/vmw_vmci_defs.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 4167779469fd..2b70c024dacb 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -45,13 +45,22 @@ /* Interrupt Cause register bits. */ #define VMCI_ICR_DATAGRAM BIT(0) #define VMCI_ICR_NOTIFICATION BIT(1) +#define VMCI_ICR_DMA_DATAGRAM BIT(2) /* Interrupt Mask register bits. */ #define VMCI_IMR_DATAGRAM BIT(0) #define VMCI_IMR_NOTIFICATION BIT(1) +#define VMCI_IMR_DMA_DATAGRAM BIT(2) -/* Maximum MSI/MSI-X interrupt vectors in the device. */ -#define VMCI_MAX_INTRS 2 +/* + * Maximum MSI/MSI-X interrupt vectors in the device. + * If VMCI_CAPS_DMA_DATAGRAM is supported by the device, + * VMCI_MAX_INTRS_DMA_DATAGRAM vectors are available, + * otherwise only VMCI_MAX_INTRS_NOTIFICATION. + */ +#define VMCI_MAX_INTRS_NOTIFICATION 2 +#define VMCI_MAX_INTRS_DMA_DATAGRAM 3 +#define VMCI_MAX_INTRS VMCI_MAX_INTRS_DMA_DATAGRAM /* * Supported interrupt vectors. There is one for each ICR value above, @@ -60,6 +69,7 @@ enum { VMCI_INTR_DATAGRAM = 0, VMCI_INTR_NOTIFICATION = 1, + VMCI_INTR_DMA_DATAGRAM = 2, }; /* -- cgit v1.2.3 From 5ee109828e73bbe4213c373988608d8f33e03d78 Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Mon, 7 Feb 2022 02:27:23 -0800 Subject: VMCI: dma dg: allocate send and receive buffers for DMA datagrams If DMA datagrams are used, allocate send and receive buffers in coherent DMA memory. This is done in preparation for the send and receive datagram operations, where the buffers are used for the exchange of data between driver and device. Reviewed-by: Vishnu Dasa Signed-off-by: Jorgen Hansen Link: https://lore.kernel.org/r/20220207102725.2742-7-jhansen@vmware.com Signed-off-by: Greg Kroah-Hartman --- include/linux/vmw_vmci_defs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 2b70c024dacb..8bc37d8244a8 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -21,6 +21,10 @@ #define VMCI_CAPS_ADDR 0x18 #define VMCI_RESULT_LOW_ADDR 0x1c #define VMCI_RESULT_HIGH_ADDR 0x20 +#define VMCI_DATA_OUT_LOW_ADDR 0x24 +#define VMCI_DATA_OUT_HIGH_ADDR 0x28 +#define VMCI_DATA_IN_LOW_ADDR 0x2c +#define VMCI_DATA_IN_HIGH_ADDR 0x30 #define VMCI_GUEST_PAGE_SHIFT 0x34 /* Max number of devices. */ -- cgit v1.2.3 From 22aa5c7f323022477b70e044eb00e6bfea9498e8 Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Mon, 7 Feb 2022 02:27:24 -0800 Subject: VMCI: dma dg: add support for DMA datagrams sends Use DMA based send operation from the transmit buffer instead of the iowrite8_rep based datagram send when DMA datagrams are supported. The outgoing datagram is sent as inline data in the VMCI transmit buffer. Once the header has been configured, the send is initiated by writing the lower 32 bit of the buffer base address to the VMCI_DATA_OUT_LOW_ADDR register. Only then will the device process the header and the datagram itself. Following that, the driver busy waits (it isn't possible to sleep on the send path) for the header busy flag to change - indicating that the send is complete. Reviewed-by: Vishnu Dasa Signed-off-by: Jorgen Hansen Link: https://lore.kernel.org/r/20220207102725.2742-8-jhansen@vmware.com Signed-off-by: Greg Kroah-Hartman --- include/linux/vmw_vmci_defs.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 8bc37d8244a8..6fb663b36f72 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -110,6 +110,40 @@ enum { #define VMCI_MMIO_ACCESS_OFFSET ((size_t)(128 * 1024)) #define VMCI_MMIO_ACCESS_SIZE ((size_t)(64 * 1024)) +/* + * For VMCI devices supporting the VMCI_CAPS_DMA_DATAGRAM capability, the + * sending and receiving of datagrams can be performed using DMA to/from + * a driver allocated buffer. + * Sending and receiving will be handled as follows: + * - when sending datagrams, the driver initializes the buffer where the + * data part will refer to the outgoing VMCI datagram, sets the busy flag + * to 1 and writes the address of the buffer to VMCI_DATA_OUT_HIGH_ADDR + * and VMCI_DATA_OUT_LOW_ADDR. Writing to VMCI_DATA_OUT_LOW_ADDR triggers + * the device processing of the buffer. When the device has processed the + * buffer, it will write the result value to the buffer and then clear the + * busy flag. + * - when receiving datagrams, the driver initializes the buffer where the + * data part will describe the receive buffer, clears the busy flag and + * writes the address of the buffer to VMCI_DATA_IN_HIGH_ADDR and + * VMCI_DATA_IN_LOW_ADDR. Writing to VMCI_DATA_IN_LOW_ADDR triggers the + * device processing of the buffer. The device will copy as many available + * datagrams into the buffer as possible, and then sets the busy flag. + * When the busy flag is set, the driver will process the datagrams in the + * buffer. + */ +struct vmci_data_in_out_header { + uint32_t busy; + uint32_t opcode; + uint32_t size; + uint32_t rsvd; + uint64_t result; +}; + +struct vmci_sg_elem { + uint64_t addr; + uint64_t size; +}; + /* * We have a fixed set of resource IDs available in the VMX. * This allows us to have a very simple implementation since we statically -- cgit v1.2.3 From 2f04aa69ab5c5c40d2e3e51fd73ce2ecb651e9ba Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 4 Feb 2022 16:52:40 +0100 Subject: regulator: Add bindings for TPS62864x Add bindings for the TPS62864/TPS6286/TPS62868/TPS62869 voltage regulators. Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20220204155241.576342-2-vincent.whitchurch@axis.com Signed-off-by: Mark Brown --- include/dt-bindings/regulator/ti,tps62864.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 include/dt-bindings/regulator/ti,tps62864.h (limited to 'include') diff --git a/include/dt-bindings/regulator/ti,tps62864.h b/include/dt-bindings/regulator/ti,tps62864.h new file mode 100644 index 000000000000..8db31f23d956 --- /dev/null +++ b/include/dt-bindings/regulator/ti,tps62864.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_REGULATOR_TI_TPS62864_H +#define _DT_BINDINGS_REGULATOR_TI_TPS62864_H + +#define TPS62864_MODE_NORMAL 0 +#define TPS62864_MODE_FPWM 1 + +#endif -- cgit v1.2.3 From 3301bc53358a0eb0a0db65fd7a513cd4cb50c83a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 10 Jan 2022 15:29:45 +0800 Subject: lib/sbitmap: kill 'depth' from sbitmap_word Only the last sbitmap_word can have different depth, and all the others must have same depth of 1U << sb->shift, so not necessary to store it in sbitmap_word, and it can be retrieved easily and efficiently by adding one internal helper of __map_depth(sb, index). Remove 'depth' field from sbitmap_word, then the annotation of ____cacheline_aligned_in_smp for 'word' isn't needed any more. Not see performance effect when running high parallel IOPS test on null_blk. This way saves us one cacheline(usually 64 words) per each sbitmap_word. Cc: Martin Wilck Signed-off-by: Ming Lei Reviewed-by: Martin Wilck Reviewed-by: John Garry Link: https://lore.kernel.org/r/20220110072945.347535-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 95df357ec009..df3b584b0f0c 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -27,15 +27,10 @@ struct seq_file; * struct sbitmap_word - Word in a &struct sbitmap. */ struct sbitmap_word { - /** - * @depth: Number of bits being used in @word/@cleared - */ - unsigned long depth; - /** * @word: word holding free bits */ - unsigned long word ____cacheline_aligned_in_smp; + unsigned long word; /** * @cleared: word holding cleared bits @@ -164,6 +159,14 @@ struct sbitmap_queue { int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, gfp_t flags, int node, bool round_robin, bool alloc_hint); +/* sbitmap internal helper */ +static inline unsigned int __map_depth(const struct sbitmap *sb, int index) +{ + if (index == sb->map_nr - 1) + return sb->depth - (index << sb->shift); + return 1U << sb->shift; +} + /** * sbitmap_free() - Free memory used by a &struct sbitmap. * @sb: Bitmap to free. @@ -251,7 +254,7 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb, while (scanned < sb->depth) { unsigned long word; unsigned int depth = min_t(unsigned int, - sb->map[index].depth - nr, + __map_depth(sb, index) - nr, sb->depth - scanned); scanned += depth; -- cgit v1.2.3 From 3f607293b74d6acb06571a774a500143c1f0ed2c Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 8 Feb 2022 20:07:04 +0800 Subject: sbitmap: Delete old sbitmap_queue_get_shallow() Since __sbitmap_queue_get_shallow() was introduced in commit c05e66733788 ("sbitmap: add sbitmap_get_shallow() operation"), it has not been used. Delete __sbitmap_queue_get_shallow() and rename public __sbitmap_queue_get_shallow() -> sbitmap_queue_get_shallow() as it is odd to have public __foo but no foo at all. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1644322024-105340-1-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index df3b584b0f0c..dffeb8281c2d 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -135,7 +135,7 @@ struct sbitmap_queue { /** * @min_shallow_depth: The minimum shallow depth which may be passed to - * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow(). + * sbitmap_queue_get_shallow() */ unsigned int min_shallow_depth; }; @@ -463,7 +463,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, unsigned int *offset); /** - * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct + * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. * @sbq: Bitmap queue to allocate from. @@ -475,8 +475,8 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ -int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, - unsigned int shallow_depth); +int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, + unsigned int shallow_depth); /** * sbitmap_queue_get() - Try to allocate a free bit from a &struct @@ -498,32 +498,6 @@ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, return nr; } -/** - * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct - * sbitmap_queue, limiting the depth used from each word. - * @sbq: Bitmap queue to allocate from. - * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to - * sbitmap_queue_clear()). - * @shallow_depth: The maximum number of bits to allocate from a single word. - * See sbitmap_get_shallow(). - * - * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after - * initializing @sbq. - * - * Return: Non-negative allocated bit number if successful, -1 otherwise. - */ -static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, - unsigned int *cpu, - unsigned int shallow_depth) -{ - int nr; - - *cpu = get_cpu(); - nr = __sbitmap_queue_get_shallow(sbq, shallow_depth); - put_cpu(); - return nr; -} - /** * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the * minimum shallow depth that will be used. -- cgit v1.2.3 From dfefa04a90cf9a20090cfa096153d64f95b7e33f Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 8 Feb 2022 01:27:05 +0000 Subject: KVM: arm64: Drop unused param from kvm_psci_version() kvm_psci_version() consumes a pointer to struct kvm in addition to a vcpu pointer. Drop the kvm pointer as it is unused. While the comment suggests the explicit kvm pointer was useful for calling from hyp, there exist no such callsite in hyp. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220208012705.640444-1-oupton@google.com --- include/kvm/arm_psci.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index 5b58bd2fe088..297645edcaff 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -16,11 +16,7 @@ #define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_0 -/* - * We need the KVM pointer independently from the vcpu as we can call - * this from HYP, and need to apply kern_hyp_va on it... - */ -static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm) +static inline int kvm_psci_version(struct kvm_vcpu *vcpu) { /* * Our PSCI implementation stays the same across versions from -- cgit v1.2.3 From 2093057ab879da1070c851b9e07126eaa86d0dfc Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 27 Jan 2022 16:17:55 +0000 Subject: perf: Fix wrong name in comment for struct perf_cpu_context Commit 0793a61d4df8 ("performance counters: core code") added the perf subsystem (then called Performance Counters) to Linux, creating the struct perf_cpu_context. The comment for the struct referred to it as a "struct perf_counter_cpu_context". Commit cdd6c482c9ff ("perf: Do the big rename: Performance Counters -> Performance Events") changed the comment to refer to a "struct perf_event_cpu_context", which was still the wrong name for the struct. Change the comment to say "struct perf_cpu_context". CC: Thomas Gleixner CC: Ingo Molnar Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220127161759.53553-3-alexandru.elisei@arm.com --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 733649184b27..af97dd427501 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -864,7 +864,7 @@ struct perf_event_context { #define PERF_NR_CONTEXTS 4 /** - * struct perf_event_cpu_context - per cpu event context structure + * struct perf_cpu_context - per cpu event context structure */ struct perf_cpu_context { struct perf_event_context ctx; -- cgit v1.2.3 From db858060b1a788fba03711793dcaff19ea43286c Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 27 Jan 2022 16:17:57 +0000 Subject: KVM: arm64: Keep a list of probed PMUs The ARM PMU driver calls kvm_host_pmu_init() after probing to tell KVM that a hardware PMU is available for guest emulation. Heterogeneous systems can have more than one PMU present, and the callback gets called multiple times, once for each of them. Keep track of all the PMUs available to KVM, as they're going to be needed later. Reviewed-by: Reiji Watanabe Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220127161759.53553-5-alexandru.elisei@arm.com --- include/kvm/arm_pmu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index f9ed4c171d7b..20193416d214 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -29,6 +29,11 @@ struct kvm_pmu { struct irq_work overflow_work; }; +struct arm_pmu_entry { + struct list_head entry; + struct arm_pmu *arm_pmu; +}; + DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available); static __always_inline bool kvm_arm_support_pmu_v3(void) -- cgit v1.2.3 From 0cabb47af3cfaeb6007ba3868379bbd4daee64cc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 10 Dec 2021 16:25:20 -0800 Subject: rcu: Refactor rcu_barrier() empty-list handling This commit saves a few lines by checking first for an empty callback list. If the callback list is empty, then that CPU is taken care of, regardless of its online or nocb state. Also simplify tracing accordingly and fold a few lines together. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 670e41783edd..90b2fb0292cb 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -794,16 +794,15 @@ TRACE_EVENT_RCU(rcu_torture_read, * Tracepoint for rcu_barrier() execution. The string "s" describes * the rcu_barrier phase: * "Begin": rcu_barrier() started. + * "CB": An rcu_barrier_callback() invoked a callback, not the last. * "EarlyExit": rcu_barrier() piggybacked, thus early exit. * "Inc1": rcu_barrier() piggyback check counter incremented. - * "OfflineNoCBQ": rcu_barrier() found offline no-CBs CPU with callbacks. - * "OnlineQ": rcu_barrier() found online CPU with callbacks. - * "OnlineNQ": rcu_barrier() found online CPU, no callbacks. + * "Inc2": rcu_barrier() piggyback check counter incremented. * "IRQ": An rcu_barrier_callback() callback posted on remote CPU. * "IRQNQ": An rcu_barrier_callback() callback found no callbacks. - * "CB": An rcu_barrier_callback() invoked a callback, not the last. * "LastCB": An rcu_barrier_callback() invoked the last callback. - * "Inc2": rcu_barrier() piggyback check counter incremented. + * "NQ": rcu_barrier() found a CPU with no callbacks. + * "OnlineQ": rcu_barrier() found online CPU with callbacks. * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument * is the count of remaining callbacks, and "done" is the piggybacking count. */ -- cgit v1.2.3 From c6c89783eba05a5e159b07cfd8c68d841cc5de42 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 28 Jan 2022 15:39:36 -0800 Subject: fscrypt: add functions for direct I/O support Encrypted files traditionally haven't supported DIO, due to the need to encrypt/decrypt the data. However, when the encryption is implemented using inline encryption (blk-crypto) instead of the traditional filesystem-layer encryption, it is straightforward to support DIO. In preparation for supporting this, add the following functions: - fscrypt_dio_supported() checks whether a DIO request is supported as far as encryption is concerned. Encrypted files will only support DIO when inline encryption is used and the I/O request is properly aligned; this function checks these preconditions. - fscrypt_limit_io_blocks() limits the length of a bio to avoid crossing a place in the file that a bio with an encryption context cannot cross due to a DUN discontiguity. This function is needed by filesystems that use the iomap DIO implementation (which operates directly on logical ranges, so it won't use fscrypt_mergeable_bio()) and that support FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32. Co-developed-by: Satya Tangirala Signed-off-by: Satya Tangirala Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220128233940.79464-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 91ea9477e9bd..50d92d805bd8 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -714,6 +714,10 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, bool fscrypt_mergeable_bio_bh(struct bio *bio, const struct buffer_head *next_bh); +bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter); + +u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks); + #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ static inline bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode) @@ -742,6 +746,20 @@ static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, { return true; } + +static inline bool fscrypt_dio_supported(struct kiocb *iocb, + struct iov_iter *iter) +{ + const struct inode *inode = file_inode(iocb->ki_filp); + + return !fscrypt_needs_contents_encryption(inode); +} + +static inline u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, + u64 nr_blocks) +{ + return nr_blocks; +} #endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ /** -- cgit v1.2.3 From b2309a71c1f2fc841feb184195b2e46b2e139bf4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Feb 2022 10:41:07 -0800 Subject: net: add dev->dev_registered_tracker Convert one dev_hold()/dev_put() pair in register_netdevice() and unregister_netdevice_many() to dev_hold_track() and dev_put_track(). This would allow to detect a rogue dev_put() a bit earlier. Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20220207184107.1401096-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3fb6fb67ed77..5f6e2c0b0c90 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1948,6 +1948,8 @@ enum netdev_ml_priv_type { * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @linkwatch_dev_tracker: refcount tracker used by linkwatch. * @watchdog_dev_tracker: refcount tracker used by watchdog. + * @dev_registered_tracker: tracker for reference held while + * registered * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2282,6 +2284,7 @@ struct net_device { u8 dev_addr_shadow[MAX_ADDR_LEN]; netdevice_tracker linkwatch_dev_tracker; netdevice_tracker watchdog_dev_tracker; + netdevice_tracker dev_registered_tracker; }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.2.3 From 21a216a8fc630161e69eaf02cbebdd1816ff1a13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Feb 2022 20:50:28 -0800 Subject: ipv6/addrconf: allocate a per netns hash table Add a per netns hash table and a dedicated spinlock, first step to get rid of the global inet6_addr_lst[] one. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/net/netns/ipv6.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 30cdfc4e1615..755f12001c8b 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -92,6 +92,10 @@ struct netns_ipv6 { struct sock *tcp_sk; struct sock *igmp_sk; struct sock *mc_autojoin_sk; + + struct hlist_head *inet6_addr_lst; + spinlock_t addrconf_hash_lock; + #ifdef CONFIG_IPV6_MROUTE #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES struct mr_table *mrt6; -- cgit v1.2.3 From 8805d13ff1b2bef6a7bb8a005d2441763286dd7a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Feb 2022 20:50:29 -0800 Subject: ipv6/addrconf: use one delayed work per netns Next step for using per netns inet6_addr_lst is to have per netns work item to ultimately call addrconf_verify_rtnl() and addrconf_verify() with a new 'struct net*' argument. Everything is still using the global inet6_addr_lst[] table. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/net/netns/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 755f12001c8b..d145f1966682 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -95,6 +95,7 @@ struct netns_ipv6 { struct hlist_head *inet6_addr_lst; spinlock_t addrconf_hash_lock; + struct delayed_work addr_chk_work; #ifdef CONFIG_IPV6_MROUTE #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES -- cgit v1.2.3 From c7d9a6751a5fcebc87feee9b999b40078ed9fb43 Mon Sep 17 00:00:00 2001 From: Luiz Angelo Daros de Luca Date: Tue, 8 Feb 2022 02:32:10 -0300 Subject: net: dsa: typo in comment Signed-off-by: Luiz Angelo Daros de Luca Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220208053210.14831-1-luizluca@gmail.com Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index ca8c14b547b4..fd1f62a6e0a8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1281,7 +1281,7 @@ module_exit(dsa_tag_driver_module_exit) /** * module_dsa_tag_drivers() - Helper macro for registering DSA tag * drivers - * @__ops_array: Array of tag driver strucutres + * @__ops_array: Array of tag driver structures * * Helper macro for DSA tag drivers which do not do anything special * in module init/exit. Each module may only use this macro once, and -- cgit v1.2.3 From 6523d3b2ffa238ac033c34a726617061d6a744aa Mon Sep 17 00:00:00 2001 From: Iwona Winiarska Date: Tue, 8 Feb 2022 16:36:30 +0100 Subject: peci: Add core infrastructure Intel processors provide access for various services designed to support processor and DRAM thermal management, platform manageability and processor interface tuning and diagnostics. Those services are available via the Platform Environment Control Interface (PECI) that provides a communication channel between the processor and the Baseboard Management Controller (BMC) or other platform management device. This change introduces PECI subsystem by adding the initial core module and API for controller drivers. Co-developed-by: Jason M Bills Co-developed-by: Jae Hyun Yoo Reviewed-by: Pierre-Louis Bossart Acked-by: Joel Stanley Signed-off-by: Jason M Bills Signed-off-by: Jae Hyun Yoo Signed-off-by: Iwona Winiarska Link: https://lore.kernel.org/r/20220208153639.255278-5-iwona.winiarska@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/peci.h | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 include/linux/peci.h (limited to 'include') diff --git a/include/linux/peci.h b/include/linux/peci.h new file mode 100644 index 000000000000..26e0a4e73b50 --- /dev/null +++ b/include/linux/peci.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2018-2021 Intel Corporation */ + +#ifndef __LINUX_PECI_H +#define __LINUX_PECI_H + +#include +#include +#include +#include + +/* + * Currently we don't support any PECI command over 32 bytes. + */ +#define PECI_REQUEST_MAX_BUF_SIZE 32 + +struct peci_controller; +struct peci_request; + +/** + * struct peci_controller_ops - PECI controller specific methods + * @xfer: PECI transfer function + * + * PECI controllers may have different hardware interfaces - the drivers + * implementing PECI controllers can use this structure to abstract away those + * differences by exposing a common interface for PECI core. + */ +struct peci_controller_ops { + int (*xfer)(struct peci_controller *controller, u8 addr, struct peci_request *req); +}; + +/** + * struct peci_controller - PECI controller + * @dev: device object to register PECI controller to the device model + * @ops: pointer to device specific controller operations + * @bus_lock: lock used to protect multiple callers + * @id: PECI controller ID + * + * PECI controllers usually connect to their drivers using non-PECI bus, + * such as the platform bus. + * Each PECI controller can communicate with one or more PECI devices. + */ +struct peci_controller { + struct device dev; + struct peci_controller_ops *ops; + struct mutex bus_lock; /* held for the duration of xfer */ + u8 id; +}; + +struct peci_controller *devm_peci_controller_add(struct device *parent, + struct peci_controller_ops *ops); + +static inline struct peci_controller *to_peci_controller(void *d) +{ + return container_of(d, struct peci_controller, dev); +} + +/** + * struct peci_device - PECI device + * @dev: device object to register PECI device to the device model + * @controller: manages the bus segment hosting this PECI device + * @addr: address used on the PECI bus connected to the parent controller + * + * A peci_device identifies a single device (i.e. CPU) connected to a PECI bus. + * The behaviour exposed to the rest of the system is defined by the PECI driver + * managing the device. + */ +struct peci_device { + struct device dev; + u8 addr; +}; + +static inline struct peci_device *to_peci_device(struct device *d) +{ + return container_of(d, struct peci_device, dev); +} + +/** + * struct peci_request - PECI request + * @device: PECI device to which the request is sent + * @tx: TX buffer specific data + * @tx.buf: TX buffer + * @tx.len: transfer data length in bytes + * @rx: RX buffer specific data + * @rx.buf: RX buffer + * @rx.len: received data length in bytes + * + * A peci_request represents a request issued by PECI originator (TX) and + * a response received from PECI responder (RX). + */ +struct peci_request { + struct peci_device *device; + struct { + u8 buf[PECI_REQUEST_MAX_BUF_SIZE]; + u8 len; + } rx, tx; +}; + +#endif /* __LINUX_PECI_H */ -- cgit v1.2.3 From 52857e6828e260b16ac569578705f83cf2a71ac1 Mon Sep 17 00:00:00 2001 From: Iwona Winiarska Date: Tue, 8 Feb 2022 16:36:32 +0100 Subject: peci: Add device detection Since PECI devices are discoverable, we can dynamically detect devices that are actually available in the system. This change complements the earlier implementation by rescanning PECI bus to detect available devices. For this purpose, it also introduces the minimal API for PECI requests. Reviewed-by: Pierre-Louis Bossart Acked-by: Joel Stanley Signed-off-by: Iwona Winiarska Link: https://lore.kernel.org/r/20220208153639.255278-7-iwona.winiarska@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/peci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/peci.h b/include/linux/peci.h index 26e0a4e73b50..7e35673f3786 100644 --- a/include/linux/peci.h +++ b/include/linux/peci.h @@ -60,6 +60,7 @@ static inline struct peci_controller *to_peci_controller(void *d) * @dev: device object to register PECI device to the device model * @controller: manages the bus segment hosting this PECI device * @addr: address used on the PECI bus connected to the parent controller + * @deleted: indicates that PECI device was already deleted * * A peci_device identifies a single device (i.e. CPU) connected to a PECI bus. * The behaviour exposed to the rest of the system is defined by the PECI driver @@ -68,6 +69,7 @@ static inline struct peci_controller *to_peci_controller(void *d) struct peci_device { struct device dev; u8 addr; + bool deleted; }; static inline struct peci_device *to_peci_device(struct device *d) -- cgit v1.2.3 From 6b8145b054b27319dddaad4abbb5184e343375da Mon Sep 17 00:00:00 2001 From: Iwona Winiarska Date: Tue, 8 Feb 2022 16:36:34 +0100 Subject: peci: Add support for PECI device drivers Add support for PECI device drivers, which unlike PECI controller drivers are actually able to provide functionalities to userspace. Also, extend peci_request API to allow querying more details about PECI device (e.g. model/family), that's going to be used to find a compatible peci_driver. Reviewed-by: Pierre-Louis Bossart Acked-by: Joel Stanley Signed-off-by: Iwona Winiarska Link: https://lore.kernel.org/r/20220208153639.255278-9-iwona.winiarska@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/peci.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/peci.h b/include/linux/peci.h index 7e35673f3786..4eda423ba10c 100644 --- a/include/linux/peci.h +++ b/include/linux/peci.h @@ -14,6 +14,14 @@ */ #define PECI_REQUEST_MAX_BUF_SIZE 32 +#define PECI_PCS_PKG_ID 0 /* Package Identifier Read */ +#define PECI_PKG_ID_CPU_ID 0x0000 /* CPUID Info */ +#define PECI_PKG_ID_PLATFORM_ID 0x0001 /* Platform ID */ +#define PECI_PKG_ID_DEVICE_ID 0x0002 /* Uncore Device ID */ +#define PECI_PKG_ID_MAX_THREAD_ID 0x0003 /* Max Thread ID */ +#define PECI_PKG_ID_MICROCODE_REV 0x0004 /* CPU Microcode Update Revision */ +#define PECI_PKG_ID_MCA_ERROR_LOG 0x0005 /* Machine Check Status */ + struct peci_controller; struct peci_request; @@ -59,6 +67,11 @@ static inline struct peci_controller *to_peci_controller(void *d) * struct peci_device - PECI device * @dev: device object to register PECI device to the device model * @controller: manages the bus segment hosting this PECI device + * @info: PECI device characteristics + * @info.family: device family + * @info.model: device model + * @info.peci_revision: PECI revision supported by the PECI device + * @info.socket_id: the socket ID represented by the PECI device * @addr: address used on the PECI bus connected to the parent controller * @deleted: indicates that PECI device was already deleted * @@ -68,6 +81,12 @@ static inline struct peci_controller *to_peci_controller(void *d) */ struct peci_device { struct device dev; + struct { + u16 family; + u8 model; + u8 peci_revision; + u8 socket_id; + } info; u8 addr; bool deleted; }; -- cgit v1.2.3 From 93e1821c80f9460c8931dc4bc090ede794f966cd Mon Sep 17 00:00:00 2001 From: Iwona Winiarska Date: Tue, 8 Feb 2022 16:36:35 +0100 Subject: peci: Add peci-cpu driver PECI is an interface that may be used by different types of devices. Add a peci-cpu driver compatible with Intel processors. The driver is responsible for handling auxiliary devices that can subsequently be used by other drivers (e.g. hwmons). Reviewed-by: Pierre-Louis Bossart Acked-by: Joel Stanley Signed-off-by: Iwona Winiarska Link: https://lore.kernel.org/r/20220208153639.255278-10-iwona.winiarska@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/peci-cpu.h | 40 ++++++++++++++++++++++++++++++++++++++++ include/linux/peci.h | 8 -------- 2 files changed, 40 insertions(+), 8 deletions(-) create mode 100644 include/linux/peci-cpu.h (limited to 'include') diff --git a/include/linux/peci-cpu.h b/include/linux/peci-cpu.h new file mode 100644 index 000000000000..ff8ae9c26c80 --- /dev/null +++ b/include/linux/peci-cpu.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2021 Intel Corporation */ + +#ifndef __LINUX_PECI_CPU_H +#define __LINUX_PECI_CPU_H + +#include + +#include "../../arch/x86/include/asm/intel-family.h" + +#define PECI_PCS_PKG_ID 0 /* Package Identifier Read */ +#define PECI_PKG_ID_CPU_ID 0x0000 /* CPUID Info */ +#define PECI_PKG_ID_PLATFORM_ID 0x0001 /* Platform ID */ +#define PECI_PKG_ID_DEVICE_ID 0x0002 /* Uncore Device ID */ +#define PECI_PKG_ID_MAX_THREAD_ID 0x0003 /* Max Thread ID */ +#define PECI_PKG_ID_MICROCODE_REV 0x0004 /* CPU Microcode Update Revision */ +#define PECI_PKG_ID_MCA_ERROR_LOG 0x0005 /* Machine Check Status */ +#define PECI_PCS_MODULE_TEMP 9 /* Per Core DTS Temperature Read */ +#define PECI_PCS_THERMAL_MARGIN 10 /* DTS thermal margin */ +#define PECI_PCS_DDR_DIMM_TEMP 14 /* DDR DIMM Temperature */ +#define PECI_PCS_TEMP_TARGET 16 /* Temperature Target Read */ +#define PECI_PCS_TDP_UNITS 30 /* Units for power/energy registers */ + +struct peci_device; + +int peci_temp_read(struct peci_device *device, s16 *temp_raw); + +int peci_pcs_read(struct peci_device *device, u8 index, + u16 param, u32 *data); + +int peci_pci_local_read(struct peci_device *device, u8 bus, u8 dev, + u8 func, u16 reg, u32 *data); + +int peci_ep_pci_local_read(struct peci_device *device, u8 seg, + u8 bus, u8 dev, u8 func, u16 reg, u32 *data); + +int peci_mmio_read(struct peci_device *device, u8 bar, u8 seg, + u8 bus, u8 dev, u8 func, u64 address, u32 *data); + +#endif /* __LINUX_PECI_CPU_H */ diff --git a/include/linux/peci.h b/include/linux/peci.h index 4eda423ba10c..06e6ef935297 100644 --- a/include/linux/peci.h +++ b/include/linux/peci.h @@ -14,14 +14,6 @@ */ #define PECI_REQUEST_MAX_BUF_SIZE 32 -#define PECI_PCS_PKG_ID 0 /* Package Identifier Read */ -#define PECI_PKG_ID_CPU_ID 0x0000 /* CPUID Info */ -#define PECI_PKG_ID_PLATFORM_ID 0x0001 /* Platform ID */ -#define PECI_PKG_ID_DEVICE_ID 0x0002 /* Uncore Device ID */ -#define PECI_PKG_ID_MAX_THREAD_ID 0x0003 /* Max Thread ID */ -#define PECI_PKG_ID_MICROCODE_REV 0x0004 /* CPU Microcode Update Revision */ -#define PECI_PKG_ID_MCA_ERROR_LOG 0x0005 /* Machine Check Status */ - struct peci_controller; struct peci_request; -- cgit v1.2.3 From 4f774c4a65bf3987d1a95c966e884f38c8a942af Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 27 Jan 2022 19:25:53 -0800 Subject: cpufreq: Reintroduce ready() callback This effectively revert '4bf8e582119e ("cpufreq: Remove ready() callback")', in order to reintroduce the ready callback. This is needed in order to be able to leave the thermal pressure interrupts in the Qualcomm CPUfreq driver disabled during initialization, so that it doesn't fire while related_cpus are still 0. Signed-off-by: Bjorn Andersson [ Viresh: Added the Chinese translation as well and updated commit msg ] Signed-off-by: Viresh Kumar --- include/linux/cpufreq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1ab29e61b078..3522a272b74d 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -382,6 +382,9 @@ struct cpufreq_driver { int (*suspend)(struct cpufreq_policy *policy); int (*resume)(struct cpufreq_policy *policy); + /* Will be called after the driver is fully initialized */ + void (*ready)(struct cpufreq_policy *policy); + struct freq_attr **attr; /* platform specific boost support code */ -- cgit v1.2.3 From 7c76ecd9c99b6e9a771d813ab1aa7fa428b3ade1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 8 Feb 2022 16:14:32 +0200 Subject: xfrm: enforce validity of offload input flags struct xfrm_user_offload has flags variable that received user input, but kernel didn't check if valid bits were provided. It caused a situation where not sanitized input was forwarded directly to the drivers. For example, XFRM_OFFLOAD_IPV6 define that was exposed, was used by strongswan, but not implemented in the kernel at all. As a solution, check and sanitize input flags to forward XFRM_OFFLOAD_INBOUND to the drivers. Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Signed-off-by: Leon Romanovsky Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 4e29d7851890..65e13a099b1a 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -511,6 +511,12 @@ struct xfrm_user_offload { int ifindex; __u8 flags; }; +/* This flag was exposed without any kernel code that supporting it. + * Unfortunately, strongswan has the code that uses sets this flag, + * which makes impossible to reuse this bit. + * + * So leave it here to make sure that it won't be reused by mistake. + */ #define XFRM_OFFLOAD_IPV6 1 #define XFRM_OFFLOAD_INBOUND 2 -- cgit v1.2.3 From 8cba323437a49a45756d661f500b324fc2d486fe Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Tue, 8 Feb 2022 09:52:13 +0100 Subject: mtd: rawnand: protect access to rawnand devices while in suspend Prevent rawnand access while in a suspended state. Commit 013e6292aaf5 ("mtd: rawnand: Simplify the locking") allows the rawnand layer to return errors rather than waiting in a blocking wait. Tested on a iMX6ULL. Fixes: 013e6292aaf5 ("mtd: rawnand: Simplify the locking") Signed-off-by: Sean Nyekjaer Reviewed-by: Boris Brezillon Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220208085213.1838273-1-sean@geanix.com --- include/linux/mtd/rawnand.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 5b88cd51fadb..dcf90144d70b 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1240,6 +1240,7 @@ struct nand_secure_region { * @lock: Lock protecting the suspended field. Also used to serialize accesses * to the NAND device * @suspended: Set to 1 when the device is suspended, 0 when it's not + * @resume_wq: wait queue to sleep if rawnand is in suspended state. * @cur_cs: Currently selected target. -1 means no target selected, otherwise we * should always have cur_cs >= 0 && cur_cs < nanddev_ntargets(). * NAND Controller drivers should not modify this value, but they're @@ -1294,6 +1295,7 @@ struct nand_chip { /* Internals */ struct mutex lock; unsigned int suspended : 1; + wait_queue_head_t resume_wq; int cur_cs; int read_retries; struct nand_secure_region *secure_regions; -- cgit v1.2.3 From 5145abeb0649acf810a32e63bd762e617a9b3309 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 16 Dec 2021 12:16:41 +0100 Subject: mtd: nand: ecc: Provide a helper to retrieve a pilelined engine device In a pipelined engine situation, we might either have the host which internally has support for error correction, or have it using an external hardware block for this purpose. In the former case, the host is also the ECC engine. In the latter case, it is not. In order to get the right pointers on the right devices (for example: in order to devm_* allocate variables), let's introduce this helper which can safely be called by pipelined ECC engines in order to retrieve the right device structure. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211216111654.238086-16-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index b617efa0a881..615b3e3a3920 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -309,6 +309,7 @@ struct nand_ecc_engine *nand_ecc_get_sw_engine(struct nand_device *nand); struct nand_ecc_engine *nand_ecc_get_on_die_hw_engine(struct nand_device *nand); struct nand_ecc_engine *nand_ecc_get_on_host_hw_engine(struct nand_device *nand); void nand_ecc_put_on_host_hw_engine(struct nand_device *nand); +struct device *nand_ecc_get_engine_dev(struct device *host); #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_HAMMING) struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void); -- cgit v1.2.3 From 4398693a9e24bcab0b99ea219073917991d0792b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 8 Feb 2022 11:48:31 +0100 Subject: gpiolib: make struct comments into real kernel docs We have several comments that start with '/**' but don't conform to the kernel doc standard. Add proper detailed descriptions for the affected definitions and move the docs from the forward declarations to the struct definitions where applicable. Reported-by: Randy Dunlap Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Tested-by: Randy Dunlap --- include/linux/gpio/consumer.h | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 3ad67b4a72be..c3aa8b330e1c 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -8,27 +8,16 @@ #include struct device; - -/** - * Opaque descriptor for a GPIO. These are obtained using gpiod_get() and are - * preferable to the old integer-based handles. - * - * Contrary to integers, a pointer to a gpio_desc is guaranteed to be valid - * until the GPIO is released. - */ struct gpio_desc; - -/** - * Opaque descriptor for a structure of GPIO array attributes. This structure - * is attached to struct gpiod_descs obtained from gpiod_get_array() and can be - * passed back to get/set array functions in order to activate fast processing - * path if applicable. - */ struct gpio_array; /** - * Struct containing an array of descriptors that can be obtained using - * gpiod_get_array(). + * struct gpio_descs - Struct containing an array of descriptors that can be + * obtained using gpiod_get_array() + * + * @info: Pointer to the opaque gpio_array structure + * @ndescs: Number of held descriptors + * @desc: Array of pointers to GPIO descriptors */ struct gpio_descs { struct gpio_array *info; @@ -43,8 +32,16 @@ struct gpio_descs { #define GPIOD_FLAGS_BIT_NONEXCLUSIVE BIT(4) /** - * Optional flags that can be passed to one of gpiod_* to configure direction - * and output value. These values cannot be OR'd. + * enum gpiod_flags - Optional flags that can be passed to one of gpiod_* to + * configure direction and output value. These values + * cannot be OR'd. + * + * @GPIOD_ASIS: Don't change anything + * @GPIOD_IN: Set lines to input mode + * @GPIOD_OUT_LOW: Set lines to output and drive them low + * @GPIOD_OUT_HIGH: Set lines to output and drive them high + * @GPIOD_OUT_LOW_OPEN_DRAIN: Set lines to open-drain output and drive them low + * @GPIOD_OUT_HIGH_OPEN_DRAIN: Set lines to open-drain output and drive them high */ enum gpiod_flags { GPIOD_ASIS = 0, -- cgit v1.2.3 From 8dd8678e42b5a8bbd3e2c49cda76c743318c56b0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 5 Feb 2022 13:00:04 +0100 Subject: netfilter: ecache: don't use nf_conn spinlock For updating eache missed value we can use cmpxchg. This also avoids need to disable BH. kernel robot reported build failure on v1 because not all arches support cmpxchg for u16, so extend this to u32. This doesn't increase struct size, existing padding is used. Reported-by: kernel test robot Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 16bcff809b18..6c4c490a3e34 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -21,10 +21,10 @@ enum nf_ct_ecache_state { struct nf_conntrack_ecache { unsigned long cache; /* bitops want long */ - u16 missed; /* missed events */ u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ enum nf_ct_ecache_state state:8;/* ecache state */ + u32 missed; /* missed events */ u32 portid; /* netlink portid of destroyer */ }; -- cgit v1.2.3 From 7afa38831aee2ca2f41b22747ed8545e1887aaa9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Feb 2022 12:29:47 +0100 Subject: netfilter: cttimeout: use option structure Instead of two exported functions, export a single option structure. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_timeout.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index db507e4a65bb..3ea94f6f3844 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -108,8 +108,12 @@ static inline void nf_ct_destroy_timeout(struct nf_conn *ct) #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -extern struct nf_ct_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name); -extern void (*nf_ct_timeout_put_hook)(struct nf_ct_timeout *timeout); +struct nf_ct_timeout_hooks { + struct nf_ct_timeout *(*timeout_find_get)(struct net *net, const char *name); + void (*timeout_put)(struct nf_ct_timeout *timeout); +}; + +extern const struct nf_ct_timeout_hooks *nf_ct_timeout_hook; #endif #endif /* _NF_CONNTRACK_TIMEOUT_H */ -- cgit v1.2.3 From 23f68d462984bfda47c7bf663dca347e8e3df549 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 7 Feb 2022 19:25:08 +0100 Subject: netfilter: nft_cmp: optimize comparison for 16-bytes Allow up to 16-byte comparisons with a new cmp fast version. Use two 64-bit words and calculate the mask representing the bits to be compared. Make sure the comparison is 64-bit aligned and avoid out-of-bound memory access on registers. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index b6fb1fdff9b2..0ea7c55cea4d 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -42,6 +42,14 @@ struct nft_cmp_fast_expr { bool inv; }; +struct nft_cmp16_fast_expr { + struct nft_data data; + struct nft_data mask; + u8 sreg; + u8 len; + bool inv; +}; + struct nft_immediate_expr { struct nft_data data; u8 dreg; @@ -59,6 +67,7 @@ static inline u32 nft_cmp_fast_mask(unsigned int len) } extern const struct nft_expr_ops nft_cmp_fast_ops; +extern const struct nft_expr_ops nft_cmp16_fast_ops; struct nft_payload { enum nft_payload_bases base:8; -- cgit v1.2.3 From cfc56f85e72f5b9c5c5be26dc2b16518d36a7868 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 7 Feb 2022 18:13:18 +0100 Subject: net: do not keep the dst cache when uncloning an skb dst and its metadata When uncloning an skb dst and its associated metadata a new dst+metadata is allocated and the tunnel information from the old metadata is copied over there. The issue is the tunnel metadata has references to cached dst, which are copied along the way. When a dst+metadata refcount drops to 0 the metadata is freed including the cached dst entries. As they are also referenced in the initial dst+metadata, this ends up in UaFs. In practice the above did not happen because of another issue, the dst+metadata was never freed because its refcount never dropped to 0 (this will be fixed in a subsequent patch). Fix this by initializing the dst cache after copying the tunnel information from the old metadata to also unshare the dst cache. Fixes: d71785ffc7e7 ("net: add dst_cache to ovs vxlan lwtunnel") Cc: Paolo Abeni Reported-by: Vlad Buslov Tested-by: Vlad Buslov Signed-off-by: Antoine Tenart Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 14efa0ded75d..b997e0c1e362 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -123,6 +123,19 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, sizeof(struct ip_tunnel_info) + md_size); +#ifdef CONFIG_DST_CACHE + /* Unclone the dst cache if there is one */ + if (new_md->u.tun_info.dst_cache.cache) { + int ret; + + ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC); + if (ret) { + metadata_dst_free(new_md); + return ERR_PTR(ret); + } + } +#endif + skb_dst_drop(skb); dst_hold(&new_md->dst); skb_dst_set(skb, &new_md->dst); -- cgit v1.2.3 From 9eeabdf17fa0ab75381045c867c370f4cc75a613 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 7 Feb 2022 18:13:19 +0100 Subject: net: fix a memleak when uncloning an skb dst and its metadata When uncloning an skb dst and its associated metadata, a new dst+metadata is allocated and later replaces the old one in the skb. This is helpful to have a non-shared dst+metadata attached to a specific skb. The issue is the uncloned dst+metadata is initialized with a refcount of 1, which is increased to 2 before attaching it to the skb. When tun_dst_unclone returns, the dst+metadata is only referenced from a single place (the skb) while its refcount is 2. Its refcount will never drop to 0 (when the skb is consumed), leading to a memory leak. Fix this by removing the call to dst_hold in tun_dst_unclone, as the dst+metadata refcount is already 1. Fixes: fc4099f17240 ("openvswitch: Fix egress tunnel info.") Cc: Pravin B Shelar Reported-by: Vlad Buslov Tested-by: Vlad Buslov Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index b997e0c1e362..adab27ba1ecb 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -137,7 +137,6 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) #endif skb_dst_drop(skb); - dst_hold(&new_md->dst); skb_dst_set(skb, &new_md->dst); return new_md; } -- cgit v1.2.3 From 8069b22d656f6e1922352bff90ab78e6fab73779 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 9 Feb 2022 12:05:55 +0800 Subject: mctp: Add helper for address match checking Currently, we have a couple of paths that check that an EID matches, or the match value is MCTP_ADDR_ANY. Rather than open coding this, add a little helper. Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/net/mctp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/mctp.h b/include/net/mctp.h index 7e35ec79b909..706d329dd8e8 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -45,6 +45,11 @@ static inline bool mctp_address_ok(mctp_eid_t eid) return eid >= 8 && eid < 255; } +static inline bool mctp_address_matches(mctp_eid_t match, mctp_eid_t eid) +{ + return match == eid || match == MCTP_ADDR_ANY; +} + static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb) { return (struct mctp_hdr *)skb_network_header(skb); -- cgit v1.2.3 From 63ed1aab3d40aa61aaa66819bdce9377ac7f40fa Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Wed, 9 Feb 2022 12:05:57 +0800 Subject: mctp: Add SIOCMCTP{ALLOC,DROP}TAG ioctls for tag control This change adds a couple of new ioctls for mctp sockets: SIOCMCTPALLOCTAG and SIOCMCTPDROPTAG. These ioctls provide facilities for explicit allocation / release of tags, overriding the automatic allocate-on-send/release-on-reply and timeout behaviours. This allows userspace more control over messages that may not fit a simple request/response model. In order to indicate a pre-allocated tag to the sendmsg() syscall, we introduce a new flag to the struct sockaddr_mctp.smctp_tag value: MCTP_TAG_PREALLOC. Additional changes from Jeremy Kerr . Contains a fix that was: Reported-by: kernel test robot Signed-off-by: Matt Johnston Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/net/mctp.h | 11 ++++++++++- include/trace/events/mctp.h | 5 ++++- include/uapi/linux/mctp.h | 18 ++++++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mctp.h b/include/net/mctp.h index 706d329dd8e8..e80a4baf8379 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -126,7 +126,7 @@ struct mctp_sock { */ struct mctp_sk_key { mctp_eid_t peer_addr; - mctp_eid_t local_addr; + mctp_eid_t local_addr; /* MCTP_ADDR_ANY for local owned tags */ __u8 tag; /* incoming tag match; invert TO for local */ /* we hold a ref to sk when set */ @@ -163,6 +163,12 @@ struct mctp_sk_key { */ unsigned long dev_flow_state; struct mctp_dev *dev; + + /* a tag allocated with SIOCMCTPALLOCTAG ioctl will not expire + * automatically on timeout or response, instead SIOCMCTPDROPTAG + * is used. + */ + bool manual_alloc; }; struct mctp_skb_cb { @@ -239,6 +245,9 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); void mctp_key_unref(struct mctp_sk_key *key); +struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, + mctp_eid_t daddr, mctp_eid_t saddr, + bool manual, u8 *tagp); /* routing <--> device interface */ unsigned int mctp_default_net(struct net *net); diff --git a/include/trace/events/mctp.h b/include/trace/events/mctp.h index 175b057c507f..165cf25f77a7 100644 --- a/include/trace/events/mctp.h +++ b/include/trace/events/mctp.h @@ -15,6 +15,7 @@ enum { MCTP_TRACE_KEY_REPLIED, MCTP_TRACE_KEY_INVALIDATED, MCTP_TRACE_KEY_CLOSED, + MCTP_TRACE_KEY_DROPPED, }; #endif /* __TRACE_MCTP_ENUMS */ @@ -22,6 +23,7 @@ TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_TIMEOUT); TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_REPLIED); TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_INVALIDATED); TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_CLOSED); +TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_DROPPED); TRACE_EVENT(mctp_key_acquire, TP_PROTO(const struct mctp_sk_key *key), @@ -66,7 +68,8 @@ TRACE_EVENT(mctp_key_release, { MCTP_TRACE_KEY_TIMEOUT, "timeout" }, { MCTP_TRACE_KEY_REPLIED, "replied" }, { MCTP_TRACE_KEY_INVALIDATED, "invalidated" }, - { MCTP_TRACE_KEY_CLOSED, "closed" }) + { MCTP_TRACE_KEY_CLOSED, "closed" }, + { MCTP_TRACE_KEY_DROPPED, "dropped" }) ) ); diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 07b0318716fc..154ab56651f1 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -44,7 +44,25 @@ struct sockaddr_mctp_ext { #define MCTP_TAG_MASK 0x07 #define MCTP_TAG_OWNER 0x08 +#define MCTP_TAG_PREALLOC 0x10 #define MCTP_OPT_ADDR_EXT 1 +#define SIOCMCTPALLOCTAG (SIOCPROTOPRIVATE + 0) +#define SIOCMCTPDROPTAG (SIOCPROTOPRIVATE + 1) + +struct mctp_ioc_tag_ctl { + mctp_eid_t peer_addr; + + /* For SIOCMCTPALLOCTAG: must be passed as zero, kernel will + * populate with the allocated tag value. Returned tag value will + * always have TO and PREALLOC set. + * + * For SIOCMCTPDROPTAG: userspace provides tag value to drop, from + * a prior SIOCMCTPALLOCTAG call (and so must have TO and PREALLOC set). + */ + __u8 tag; + __u16 flags; +}; + #endif /* __UAPI_MCTP_H */ -- cgit v1.2.3 From a0386bba70934d42f586eaf68b21d5eeaffa7bd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Jan 2022 18:52:01 +0100 Subject: spi: make remove callback a void function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The value returned by an spi driver's remove function is mostly ignored. (Only an error message is printed if the value is non-zero that the error is ignored.) So change the prototype of the remove function to return no value. This way driver authors are not tempted to assume that passing an error to the upper layer is a good idea. All drivers are adapted accordingly. There is no intended change of behaviour, all callbacks were prepared to return 0 before. Signed-off-by: Uwe Kleine-König Acked-by: Marc Kleine-Budde Acked-by: Andy Shevchenko Reviewed-by: Geert Uytterhoeven Acked-by: Jérôme Pouiller Acked-by: Miquel Raynal Acked-by: Jonathan Cameron Acked-by: Claudius Heine Acked-by: Stefan Schmidt Acked-by: Alexandre Belloni Acked-by: Ulf Hansson # For MMC Acked-by: Marcus Folkesson Acked-by: Łukasz Stelmach Acked-by: Lee Jones Link: https://lore.kernel.org/r/20220123175201.34839-6-u.kleine-koenig@pengutronix.de Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7ab3fed7b804..c84e61b99c7b 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -280,7 +280,7 @@ struct spi_message; struct spi_driver { const struct spi_device_id *id_table; int (*probe)(struct spi_device *spi); - int (*remove)(struct spi_device *spi); + void (*remove)(struct spi_device *spi); void (*shutdown)(struct spi_device *spi); struct device_driver driver; }; -- cgit v1.2.3 From 1f8863bfb5ca500ea1c7669b16b1931ba27fce20 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Feb 2022 12:02:59 +0000 Subject: genirq: Allow the PM device to originate from irq domain As a preparation to moving the reference to the device used for runtime power management, add a new 'dev' field to the irqdomain structure for that exact purpose. The irq_chip_pm_{get,put}() helpers are made aware of the dual location via a new private helper. No functional change intended. Signed-off-by: Marc Zyngier Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Tested-by: Tony Lindgren Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220201120310.878267-2-maz@kernel.org --- include/linux/irqdomain.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index d476405802e9..be25a33293e5 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -151,6 +151,8 @@ struct irq_domain_chip_generic; * @gc: Pointer to a list of generic chips. There is a helper function for * setting up one or more generic chips for interrupt controllers * drivers using the generic chip library which uses this pointer. + * @dev: Pointer to a device that the domain represent, and that will be + * used for power management purposes. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains * * Revmap data, used internally by irq_domain @@ -171,6 +173,7 @@ struct irq_domain { struct fwnode_handle *fwnode; enum irq_domain_bus_token bus_token; struct irq_domain_chip_generic *gc; + struct device *dev; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *parent; #endif @@ -226,6 +229,13 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) return to_of_node(d->fwnode); } +static inline void irq_domain_set_pm_device(struct irq_domain *d, + struct device *dev) +{ + if (d) + d->dev = dev; +} + #ifdef CONFIG_IRQ_DOMAIN struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id, const char *name, phys_addr_t *pa); -- cgit v1.2.3 From c306d737691ef84305d4ed0d302c63db2932f0bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 25 Jan 2022 15:57:45 -0500 Subject: NFSD: Deprecate NFS_OFFSET_MAX NFS_OFFSET_MAX was introduced way back in Linux v2.3.y before there was a kernel-wide OFFSET_MAX value. As a clean up, replace the last few uses of it with its generic equivalent, and get rid of it. Signed-off-by: Chuck Lever --- include/linux/nfs.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 0dc7ad38a0da..b06375e88e58 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -36,14 +36,6 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc memcpy(target->data, source->data, source->size); } - -/* - * This is really a general kernel constant, but since nothing like - * this is defined in the kernel headers, I have to do it here. - */ -#define NFS_OFFSET_MAX ((__s64)((~(__u64)0) >> 1)) - - enum nfs3_stable_how { NFS_UNSTABLE = 0, NFS_DATA_SYNC = 1, -- cgit v1.2.3 From 9a69e2b385f443f244a7e8b8bcafe5ccfb0866b4 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 9 Feb 2022 19:43:32 +0100 Subject: bpf: Make remote_port field in struct bpf_sk_lookup 16-bit wide remote_port is another case of a BPF context field documented as a 32-bit value in network byte order for which the BPF context access converter generates a load of a zero-padded 16-bit integer in network byte order. First such case was dst_port in bpf_sock which got addressed in commit 4421a582718a ("bpf: Make dst_port field in struct bpf_sock 16-bit wide"). Loading 4-bytes from the remote_port offset and converting the value with bpf_ntohl() leads to surprising results, as the expected value is shifted by 16 bits. Reduce the confusion by splitting the field in two - a 16-bit field holding a big-endian integer, and a 16-bit zero-padding anonymous field that follows it. Suggested-by: Alexei Starovoitov Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220209184333.654927-2-jakub@cloudflare.com --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a7f0ddedac1f..afe3d0d7f5f2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6453,7 +6453,8 @@ struct bpf_sk_lookup { __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ __u32 remote_ip4; /* Network byte order */ __u32 remote_ip6[4]; /* Network byte order */ - __u32 remote_port; /* Network byte order */ + __be16 remote_port; /* Network byte order */ + __u16 :16; /* Zero padding */ __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ -- cgit v1.2.3 From 5bdd3eb253544b1e80f904e1205699d0a126d2d6 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 4 Feb 2022 11:58:32 -0500 Subject: drm/amdkfd: Remove unused old debugger implementation Cleanup the kfd code by removing the unused old debugger implementation. The address watch was only ever implemented in the upstream driver for GFXv7 (Kaveri). The user mode tools runtime using this API was never open-sourced. Work on the old debugger prototype that used this API has been discontinued years ago. Only a small piece of resetting wavefronts is kept and is moved to kfd_device_queue_manager.c. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index e6a56c146920..6e4268f5e482 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -756,16 +756,16 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_WAIT_EVENTS \ AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) -#define AMDKFD_IOC_DBG_REGISTER \ +#define AMDKFD_IOC_DBG_REGISTER_DEPRECATED \ AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) -#define AMDKFD_IOC_DBG_UNREGISTER \ +#define AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED \ AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) -#define AMDKFD_IOC_DBG_ADDRESS_WATCH \ +#define AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED \ AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) -#define AMDKFD_IOC_DBG_WAVE_CONTROL \ +#define AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED \ AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) #define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ -- cgit v1.2.3 From 5cad527d5ffa9a1c4731bb9c97d2ee93f8960d50 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Feb 2022 14:08:38 +0800 Subject: net: drop_monitor: support drop reason In the commit c504e5c2f964 ("net: skb: introduce kfree_skb_reason()") drop reason is introduced to the tracepoint of kfree_skb. Therefore, drop_monitor is able to report the drop reason to users by netlink. The drop reasons are reported as string to users, which is exactly the same as what we do when reporting it to ftrace. Signed-off-by: Menglong Dong Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220209060838.55513-1-imagedong@tencent.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/net_dropmon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h index 66048cc5d7b3..1bbea8f0681e 100644 --- a/include/uapi/linux/net_dropmon.h +++ b/include/uapi/linux/net_dropmon.h @@ -93,6 +93,7 @@ enum net_dm_attr { NET_DM_ATTR_SW_DROPS, /* flag */ NET_DM_ATTR_HW_DROPS, /* flag */ NET_DM_ATTR_FLOW_ACTION_COOKIE, /* binary */ + NET_DM_ATTR_REASON, /* string */ __NET_DM_ATTR_MAX, NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1 -- cgit v1.2.3 From 70e038f89b467995708207fb57bbf46aec32dc2c Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 16 Dec 2021 12:16:42 +0100 Subject: mtd: nand: mxic-ecc: Support SPI pipelined mode Introduce the support for another possible configuration: the ECC engine may work as DMA master (pipelined) and move itself the data to/from the NAND chip into the buffer, applying the necessary corrections/computations on the fly. This driver offers an ECC engine implementation that must be instatiated from a SPI controller driver. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211216111654.238086-17-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-mxic.h | 49 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 include/linux/mtd/nand-ecc-mxic.h (limited to 'include') diff --git a/include/linux/mtd/nand-ecc-mxic.h b/include/linux/mtd/nand-ecc-mxic.h new file mode 100644 index 000000000000..f3aa1ac82aed --- /dev/null +++ b/include/linux/mtd/nand-ecc-mxic.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright © 2019 Macronix + * Author: Miquèl Raynal + * + * Header for the Macronix external ECC engine. + */ + +#ifndef __MTD_NAND_ECC_MXIC_H__ +#define __MTD_NAND_ECC_MXIC_H__ + +#include +#include + +struct mxic_ecc_engine; + +#if IS_ENABLED(CONFIG_MTD_NAND_ECC_MXIC) + +struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void); +struct nand_ecc_engine *mxic_ecc_get_pipelined_engine(struct platform_device *spi_pdev); +void mxic_ecc_put_pipelined_engine(struct nand_ecc_engine *eng); +int mxic_ecc_process_data_pipelined(struct nand_ecc_engine *eng, + unsigned int direction, dma_addr_t dirmap); + +#else /* !CONFIG_MTD_NAND_ECC_MXIC */ + +static inline struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void) +{ + return NULL; +} + +static inline struct nand_ecc_engine * +mxic_ecc_get_pipelined_engine(struct platform_device *spi_pdev) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void mxic_ecc_put_pipelined_engine(struct nand_ecc_engine *eng) {} + +static inline int mxic_ecc_process_data_pipelined(struct nand_ecc_engine *eng, + unsigned int direction, + dma_addr_t dirmap) +{ + return -EOPNOTSUPP; +} + +#endif /* CONFIG_MTD_NAND_ECC_MXIC */ + +#endif /* __MTD_NAND_ECC_MXIC_H__ */ -- cgit v1.2.3 From 4a3cc7fb6e63bcfdedec25364738f1493345bd20 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Jan 2022 10:17:56 +0100 Subject: spi: spi-mem: Introduce a capability structure Create a spi_controller_mem_caps structure and put it within the spi_controller structure close to the spi_controller_mem_ops strucure. So far the only field in this structure is the support for dtr operations, but soon we will add another parameter. Also create a helper to parse the capabilities and check if the requested capability has been set or not. Signed-off-by: Miquel Raynal Reviewed-by: Pratyush Yadav Reviewed-by: Boris Brezillon Reviewed-by: Tudor Ambarus Reviewed-by: Mark Brown Link: https://lore.kernel.org/linux-mtd/20220127091808.1043392-2-miquel.raynal@bootlin.com --- include/linux/spi/spi-mem.h | 11 +++++++++++ include/linux/spi/spi.h | 3 +++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 85e2ff7b840d..38e5d45c9842 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -285,6 +285,17 @@ struct spi_controller_mem_ops { unsigned long timeout_ms); }; +/** + * struct spi_controller_mem_caps - SPI memory controller capabilities + * @dtr: Supports DTR operations + */ +struct spi_controller_mem_caps { + bool dtr; +}; + +#define spi_mem_controller_is_capable(ctlr, cap) \ + ((ctlr)->mem_caps && (ctlr)->mem_caps->cap) + /** * struct spi_mem_driver - SPI memory driver * @spidrv: inherit from a SPI driver diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7ab3fed7b804..cf99a1ee0e74 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -23,6 +23,7 @@ struct ptp_system_timestamp; struct spi_controller; struct spi_transfer; struct spi_controller_mem_ops; +struct spi_controller_mem_caps; /* * INTERFACES between SPI master-side drivers and SPI slave protocol handlers, @@ -415,6 +416,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @mem_ops: optimized/dedicated operations for interactions with SPI memory. * This field is optional and should only be implemented if the * controller has native support for memory like operations. + * @mem_caps: controller capabilities for the handling of memory operations. * @unprepare_message: undo any work done by prepare_message(). * @slave_abort: abort the ongoing transfer request on an SPI slave controller * @cs_gpios: LEGACY: array of GPIO descs to use as chip select lines; one per @@ -639,6 +641,7 @@ struct spi_controller { /* Optimized handlers for SPI memory-like operations. */ const struct spi_controller_mem_ops *mem_ops; + const struct spi_controller_mem_caps *mem_caps; /* gpio chip select */ int *cs_gpios; -- cgit v1.2.3 From 9a15efc5d5e6b5beaed0883e5bdcd0b1384c1b20 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Jan 2022 10:18:00 +0100 Subject: spi: spi-mem: Kill the spi_mem_dtr_supports_op() helper Now that spi_mem_default_supports_op() has access to the static controller capabilities (relating to memory operations), and now that these capabilities have been filled by the relevant controllers, there is no need for a specific helper checking only DTR operations, so let's just kill spi_mem_dtr_supports_op() and simply use spi_mem_default_supports_op() instead. Signed-off-by: Miquel Raynal Reviewed-by: Pratyush Yadav Reviewed-by: Boris Brezillon Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/linux-mtd/20220127091808.1043392-6-miquel.raynal@bootlin.com --- include/linux/spi/spi-mem.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 38e5d45c9842..4a1bfe689872 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -330,10 +330,6 @@ void spi_controller_dma_unmap_mem_op_data(struct spi_controller *ctlr, bool spi_mem_default_supports_op(struct spi_mem *mem, const struct spi_mem_op *op); - -bool spi_mem_dtr_supports_op(struct spi_mem *mem, - const struct spi_mem_op *op); - #else static inline int spi_controller_dma_map_mem_op_data(struct spi_controller *ctlr, @@ -356,13 +352,6 @@ bool spi_mem_default_supports_op(struct spi_mem *mem, { return false; } - -static inline -bool spi_mem_dtr_supports_op(struct spi_mem *mem, - const struct spi_mem_op *op) -{ - return false; -} #endif /* CONFIG_SPI_MEM */ int spi_mem_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op); -- cgit v1.2.3 From a433c2cbd75ab76f277364f44e76f32c7df306e7 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Jan 2022 10:18:01 +0100 Subject: spi: spi-mem: Add an ecc parameter to the spi_mem_op structure Soon the SPI-NAND core will need a way to request a SPI controller to enable ECC support for a given operation. This is because of the pipelined integration of certain ECC engines, which are directly managed by the SPI controller itself. Introduce a spi_mem_op additional field for this purpose: ecc. So far this field is left unset and checked to be false by all the SPI controller drivers in their ->supports_op() hook, as they all call spi_mem_default_supports_op(). Signed-off-by: Miquel Raynal Acked-by: Pratyush Yadav Reviewed-by: Boris Brezillon Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/linux-mtd/20220127091808.1043392-7-miquel.raynal@bootlin.com --- include/linux/spi/spi-mem.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 4a1bfe689872..2ba044d0d5e5 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -89,6 +89,7 @@ enum spi_mem_data_dir { * @dummy.dtr: whether the dummy bytes should be sent in DTR mode or not * @data.buswidth: number of IO lanes used to send/receive the data * @data.dtr: whether the data should be sent in DTR mode or not + * @data.ecc: whether error correction is required or not * @data.dir: direction of the transfer * @data.nbytes: number of data bytes to send/receive. Can be zero if the * operation does not involve transferring data @@ -119,6 +120,7 @@ struct spi_mem_op { struct { u8 buswidth; u8 dtr : 1; + u8 ecc : 1; enum spi_mem_data_dir dir; unsigned int nbytes; union { @@ -288,9 +290,11 @@ struct spi_controller_mem_ops { /** * struct spi_controller_mem_caps - SPI memory controller capabilities * @dtr: Supports DTR operations + * @ecc: Supports operations with error correction */ struct spi_controller_mem_caps { bool dtr; + bool ecc; }; #define spi_mem_controller_is_capable(ctlr, cap) \ -- cgit v1.2.3 From f9d7c7265bcff7d9a17425a8cddf702e8fe159c2 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Jan 2022 10:18:03 +0100 Subject: mtd: spinand: Create direct mapping descriptors for ECC operations In order for pipelined ECC engines to be able to enable/disable the ECC engine only when needed and avoid races when future parallel-operations will be supported, we need to provide the information about the use of the ECC engine in the direct mapping hooks. As direct mapping configurations are meant to be static, it is best to create two new mappings: one for regular 'raw' accesses and one for accesses involving correction. It is up to the driver to use or not the new ECC enable boolean contained in the spi-mem operation. As dirmaps are not free (they consume a few pages of MMIO address space) and because these extra entries are only meant to be used by pipelined engines, let's limit their use to this specific type of engine and save a bit of memory with all the other setups. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20220127091808.1043392-9-miquel.raynal@bootlin.com --- include/linux/mtd/spinand.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 6988956b8492..3aa28240a77f 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -389,6 +389,8 @@ struct spinand_info { struct spinand_dirmap { struct spi_mem_dirmap_desc *wdesc; struct spi_mem_dirmap_desc *rdesc; + struct spi_mem_dirmap_desc *wdesc_ecc; + struct spi_mem_dirmap_desc *rdesc_ecc; }; /** -- cgit v1.2.3 From 00360ebae483e603d55ec9a7231b787cb80ffe13 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 2 Feb 2022 15:45:36 +0100 Subject: spi: mxic: Add support for pipelined ECC operations Some SPI-NAND chips do not have a proper on-die ECC engine providing error correction/detection. This is particularly an issue on embedded devices with limited resources because all the computations must happen in software, unless an external hardware engine is provided. These external engines are new and can be of two categories: external or pipelined. Macronix is providing both, the former being already supported. The second, however, is very SoC implementation dependent and must be instantiated by the SPI host controller directly. An entire subsystem has been contributed to support these engines which makes the insertion into another subsystem such as SPI quite straightforward without the need for a lot of specific functions. Signed-off-by: Miquel Raynal Reviewed-by: Mark Brown Link: https://lore.kernel.org/linux-mtd/20220202144536.393792-1-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-mxic.h | 2 +- include/linux/mtd/nand.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/nand-ecc-mxic.h b/include/linux/mtd/nand-ecc-mxic.h index f3aa1ac82aed..b125926e458c 100644 --- a/include/linux/mtd/nand-ecc-mxic.h +++ b/include/linux/mtd/nand-ecc-mxic.h @@ -14,7 +14,7 @@ struct mxic_ecc_engine; -#if IS_ENABLED(CONFIG_MTD_NAND_ECC_MXIC) +#if IS_ENABLED(CONFIG_MTD_NAND_ECC_MXIC) && IS_REACHABLE(CONFIG_MTD_NAND_CORE) struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void); struct nand_ecc_engine *mxic_ecc_get_pipelined_engine(struct platform_device *spi_pdev); diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 615b3e3a3920..c3693bb87b4c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -303,8 +303,23 @@ int nand_ecc_prepare_io_req(struct nand_device *nand, int nand_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req); bool nand_ecc_is_strong_enough(struct nand_device *nand); + +#if IS_REACHABLE(CONFIG_MTD_NAND_CORE) int nand_ecc_register_on_host_hw_engine(struct nand_ecc_engine *engine); int nand_ecc_unregister_on_host_hw_engine(struct nand_ecc_engine *engine); +#else +static inline int +nand_ecc_register_on_host_hw_engine(struct nand_ecc_engine *engine) +{ + return -ENOTSUPP; +} +static inline int +nand_ecc_unregister_on_host_hw_engine(struct nand_ecc_engine *engine) +{ + return -ENOTSUPP; +} +#endif + struct nand_ecc_engine *nand_ecc_get_sw_engine(struct nand_device *nand); struct nand_ecc_engine *nand_ecc_get_on_die_hw_engine(struct nand_device *nand); struct nand_ecc_engine *nand_ecc_get_on_host_hw_engine(struct nand_device *nand); -- cgit v1.2.3 From beb0622138cd2848dec06b0651a988c39d099574 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Feb 2022 12:03:10 +0000 Subject: genirq: Kill irq_chip::parent_device Now that noone is using irq_chip::parent_device in the tree, get rid of it. Signed-off-by: Marc Zyngier Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220201120310.878267-13-maz@kernel.org --- include/linux/irq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 848e1e12c5c6..2cb2e2ac2703 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -456,7 +456,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) /** * struct irq_chip - hardware interrupt chip descriptor * - * @parent_device: pointer to parent device for irqchip * @name: name for /proc/interrupts * @irq_startup: start up the interrupt (defaults to ->enable if NULL) * @irq_shutdown: shut down the interrupt (defaults to ->disable if NULL) @@ -503,7 +502,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @flags: chip specific flags */ struct irq_chip { - struct device *parent_device; const char *name; unsigned int (*irq_startup)(struct irq_data *data); void (*irq_shutdown)(struct irq_data *data); -- cgit v1.2.3 From 5fdc1242453e2ae88b2cdb607e4eda6b687f084c Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 10 Feb 2022 17:05:18 +0200 Subject: ASoC: SOF: Move the definition of enum sof_dsp_power_states to global header Move the enum sof_dsp_power_states to include/sound/sof.h to be accessible outside of the core SOF stack. Signed-off-by: Peter Ujfalusi Reviewed-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20220210150525.30756-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/sound/sof.h b/include/sound/sof.h index 813680ab9aad..7cdfc954df12 100644 --- a/include/sound/sof.h +++ b/include/sound/sof.h @@ -39,6 +39,14 @@ enum sof_fw_state { SOF_FW_CRASHED, }; +/* DSP power states */ +enum sof_dsp_power_states { + SOF_DSP_PM_D0, + SOF_DSP_PM_D1, + SOF_DSP_PM_D2, + SOF_DSP_PM_D3, +}; + /* * SOF Platform data. */ -- cgit v1.2.3 From ede6c39c4f9068cbeb4036448c45fff5393e0432 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Feb 2022 18:59:32 -0800 Subject: net: make net->dev_unreg_count atomic Having to acquire rtnl from netdev_run_todo() for every dismantled device is not desirable when/if rtnl is under stress. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 374cc7b260fc..c4f5601f6e32 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -63,7 +63,7 @@ struct net { */ spinlock_t rules_mod_lock; - unsigned int dev_unreg_count; + atomic_t dev_unreg_count; unsigned int dev_base_seq; /* protected by rtnl_mutex */ int ifindex; -- cgit v1.2.3 From bd1ba5732bb954c31e2be07e8ee1397a910835e4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Dec 2021 22:09:20 +0000 Subject: KVM: x86: Get the number of Hyper-V sparse banks from the VARHEAD field Get the number of sparse banks from the VARHEAD field, which the guest is required to provide as "The size of a variable header, in QWORDS.", where the variable header is: Variable Header Bytes = {Total Header Bytes - sizeof(Fixed Header)} rounded up to nearest multiple of 8 Variable HeaderSize = Variable Header Bytes / 8 In other words, the VARHEAD should match the number of sparse banks. Keep the manual count as a sanity check, but otherwise rely on the field so as to more closely align with the logic defined in the TLFS and to allow for future cleanups. Tweak the tracepoint output to use "rep_cnt" instead of simply "cnt" now that there is also "var_cnt". Signed-off-by: Sean Christopherson Reviewed-by: Vitaly Kuznetsov Message-Id: <20211207220926.718794-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/asm-generic/hyperv-tlfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index 8f97c2927bee..b8babc4d3fe4 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -183,6 +183,7 @@ enum HV_GENERIC_SET_FORMAT { #define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) #define HV_HYPERCALL_FAST_BIT BIT(16) #define HV_HYPERCALL_VARHEAD_OFFSET 17 +#define HV_HYPERCALL_VARHEAD_MASK GENMASK_ULL(26, 17) #define HV_HYPERCALL_REP_COMP_OFFSET 32 #define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32) #define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) -- cgit v1.2.3 From 413af6601f7613c07d8c36b57e184d7841ace43a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Dec 2021 22:09:26 +0000 Subject: KVM: x86: Add checks for reserved-to-zero Hyper-V hypercall fields Add checks for the three fields in Hyper-V's hypercall params that must be zero. Per the TLFS, HV_STATUS_INVALID_HYPERCALL_INPUT is returned if "A reserved bit in the specified hypercall input value is non-zero." Note, some versions of the TLFS have an off-by-one bug for the last reserved field, and define it as being bits 64:60. See https://github.com/MicrosoftDocs/Virtualization-Documentation/pull/1682. Signed-off-by: Sean Christopherson Reviewed-by: Vitaly Kuznetsov Message-Id: <20211207220926.718794-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/asm-generic/hyperv-tlfs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index b8babc4d3fe4..fdce7a4cfc6f 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -184,11 +184,17 @@ enum HV_GENERIC_SET_FORMAT { #define HV_HYPERCALL_FAST_BIT BIT(16) #define HV_HYPERCALL_VARHEAD_OFFSET 17 #define HV_HYPERCALL_VARHEAD_MASK GENMASK_ULL(26, 17) +#define HV_HYPERCALL_RSVD0_MASK GENMASK_ULL(31, 27) #define HV_HYPERCALL_REP_COMP_OFFSET 32 #define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32) #define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) +#define HV_HYPERCALL_RSVD1_MASK GENMASK_ULL(47, 44) #define HV_HYPERCALL_REP_START_OFFSET 48 #define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) +#define HV_HYPERCALL_RSVD2_MASK GENMASK_ULL(63, 60) +#define HV_HYPERCALL_RSVD_MASK (HV_HYPERCALL_RSVD0_MASK | \ + HV_HYPERCALL_RSVD1_MASK | \ + HV_HYPERCALL_RSVD2_MASK) /* hypercall status code */ #define HV_STATUS_SUCCESS 0 -- cgit v1.2.3 From 27c196c7b73cb70bbed3a9df46563bab60e63415 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 9 Feb 2022 11:27:09 -0600 Subject: kernel/resource: Introduce request_mem_region_muxed() Support for requesting muxed memory region is implemented but not currently callable as a macro. Add the request muxed memory region macro. MMIO memory accesses can be synchronized using request_mem_region() which is already available. This call will return failure if the resource is busy. The 'muxed' version of this macro will handle a busy resource by using a wait queue to retry until the resource is available. Signed-off-by: Terry Bowman Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/ioport.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 8359c50f9988..ec5f71f7135b 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -262,6 +262,8 @@ resource_union(struct resource *r1, struct resource *r2, struct resource *r) #define request_muxed_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED) #define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl) #define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0) +#define request_mem_region_muxed(start, n, name) \ + __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_MUXED) #define request_mem_region_exclusive(start,n,name) \ __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE) #define rename_region(region, newname) do { (region)->name = (newname); } while (0) -- cgit v1.2.3 From 85cedb4e0c9d3b08c28e164e592b3a329e3dd5fa Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Wed, 9 Feb 2022 19:45:07 +0100 Subject: dt-bindings: clock: Add qualcomm QCM2290 DISPCC bindings Add device tree bindings for display clock controller on QCM2290 SoCs. Signed-off-by: Loic Poulain Reviewed-by: Bjorn Andersson Reviewed-by: Rob Herring Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/1644432308-21099-1-git-send-email-loic.poulain@linaro.org --- include/dt-bindings/clock/qcom,dispcc-qcm2290.h | 34 +++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,dispcc-qcm2290.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,dispcc-qcm2290.h b/include/dt-bindings/clock/qcom,dispcc-qcm2290.h new file mode 100644 index 000000000000..1db513d6b3ee --- /dev/null +++ b/include/dt-bindings/clock/qcom,dispcc-qcm2290.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2019, The Linux Foundation. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_DISP_CC_QCM2290_H +#define _DT_BINDINGS_CLK_QCOM_DISP_CC_QCM2290_H + +/* DISP_CC clocks */ +#define DISP_CC_PLL0 0 +#define DISP_CC_MDSS_AHB_CLK 1 +#define DISP_CC_MDSS_AHB_CLK_SRC 2 +#define DISP_CC_MDSS_BYTE0_CLK 3 +#define DISP_CC_MDSS_BYTE0_CLK_SRC 4 +#define DISP_CC_MDSS_BYTE0_DIV_CLK_SRC 5 +#define DISP_CC_MDSS_BYTE0_INTF_CLK 6 +#define DISP_CC_MDSS_ESC0_CLK 7 +#define DISP_CC_MDSS_ESC0_CLK_SRC 8 +#define DISP_CC_MDSS_MDP_CLK 9 +#define DISP_CC_MDSS_MDP_CLK_SRC 10 +#define DISP_CC_MDSS_MDP_LUT_CLK 11 +#define DISP_CC_MDSS_NON_GDSC_AHB_CLK 12 +#define DISP_CC_MDSS_PCLK0_CLK 13 +#define DISP_CC_MDSS_PCLK0_CLK_SRC 14 +#define DISP_CC_MDSS_VSYNC_CLK 15 +#define DISP_CC_MDSS_VSYNC_CLK_SRC 16 +#define DISP_CC_SLEEP_CLK 17 +#define DISP_CC_SLEEP_CLK_SRC 18 +#define DISP_CC_XO_CLK 19 +#define DISP_CC_XO_CLK_SRC 20 + +#define MDSS_GDSC 0 + +#endif -- cgit v1.2.3 From 8008e7902f28eb9e5459b21d375b3e5b4090efff Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Fri, 28 Jan 2022 13:17:09 +0530 Subject: soc: qcom: llcc: Update the logic for version info extraction LLCC HW version info is made up of major, branch, minor and echo version bits each of which are 8bits. Several features in newer LLCC HW are based on the full version rather than just major or minor versions such as write-subcache enable which is applicable for versions v2.0.0.0 and later, also upcoming write-subcache cacheable for SM8450 SoC which is only present in versions v2.1.0.0 and later, so it makes it easier and cleaner to just directly compare with the full version than adding additional major/branch/ minor/echo version checks. So remove the earlier major version check and add full version check for those features. Signed-off-by: Sai Prakash Ranjan Tested-by: Vinod Koul Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/a82d7c32348c51fcc2b63e220d91b318bf706c83.1643355594.git.quic_saipraka@quicinc.com --- include/linux/soc/qcom/llcc-qcom.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 9e8fd92c96b7..beecf00b707d 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -83,7 +83,7 @@ struct llcc_edac_reg_data { * @bitmap: Bit map to track the active slice ids * @offsets: Pointer to the bank offsets array * @ecc_irq: interrupt for llcc cache error detection and reporting - * @major_version: Indicates the LLCC major version + * @version: Indicates the LLCC version */ struct llcc_drv_data { struct regmap *regmap; @@ -96,7 +96,7 @@ struct llcc_drv_data { unsigned long *bitmap; u32 *offsets; int ecc_irq; - u32 major_version; + u32 version; }; #if IS_ENABLED(CONFIG_QCOM_LLCC) -- cgit v1.2.3 From a6e9d7ef252c44a4f33b4403cd367430697dd9be Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Fri, 28 Jan 2022 13:17:13 +0530 Subject: soc: qcom: llcc: Add configuration data for SM8450 SoC Add LLCC configuration data for SM8450 SoC. Signed-off-by: Sai Prakash Ranjan Tested-by: Vinod Koul Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/fec944cb8f2a4a70785903c6bfec629c6f31b6a4.1643355594.git.quic_saipraka@quicinc.com --- include/linux/soc/qcom/llcc-qcom.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index beecf00b707d..0bc21ee58fac 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -35,7 +35,12 @@ #define LLCC_WRCACHE 31 #define LLCC_CVPFW 32 #define LLCC_CPUSS1 33 +#define LLCC_CAMEXP0 34 +#define LLCC_CPUMTE 35 #define LLCC_CPUHWT 36 +#define LLCC_MDMCLAD2 37 +#define LLCC_CAMEXP1 38 +#define LLCC_AENPU 45 /** * struct llcc_slice_desc - Cache slice descriptor -- cgit v1.2.3 From 7a7b1414ea9a3641672be768afe16f583f0b76e7 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 25 Jan 2022 11:11:19 -0600 Subject: dt-bindings: power: imx8mq: add defines for VPU blk-ctrl domains This adds the defines for the power domains provided by the VPU blk-ctrl on the i.MX8MQ. Signed-off-by: Lucas Stach Acked-by: Rob Herring Signed-off-by: Adam Ford Signed-off-by: Shawn Guo --- include/dt-bindings/power/imx8mq-power.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/power/imx8mq-power.h b/include/dt-bindings/power/imx8mq-power.h index 8a513bd9166e..9f7d0f1e7c32 100644 --- a/include/dt-bindings/power/imx8mq-power.h +++ b/include/dt-bindings/power/imx8mq-power.h @@ -18,4 +18,7 @@ #define IMX8M_POWER_DOMAIN_MIPI_CSI2 9 #define IMX8M_POWER_DOMAIN_PCIE2 10 +#define IMX8MQ_VPUBLK_PD_G1 0 +#define IMX8MQ_VPUBLK_PD_G2 1 + #endif -- cgit v1.2.3 From 7f5a08c79df35e68f1a43033450c5050f12bc155 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Tue, 18 Jan 2022 12:43:15 -0800 Subject: user_events: Add minimal support for trace_event into ftrace Minimal support for interacting with dynamic events, trace_event and ftrace. Core outline of flow between user process, ioctl and trace_event APIs. User mode processes that wish to use trace events to get data into ftrace, perf, eBPF, etc are limited to uprobes today. The user events features enables an ABI for user mode processes to create and write to trace events that are isolated from kernel level trace events. This enables a faster path for tracing from user mode data as well as opens managed code to participate in trace events, where stub locations are dynamic. User processes often want to trace only when it's useful. To enable this a set of pages are mapped into the user process space that indicate the current state of the user events that have been registered. User processes can check if their event is hooked to a trace/probe, and if it is, emit the event data out via the write() syscall. Two new files are introduced into tracefs to accomplish this: user_events_status - This file is mmap'd into participating user mode processes to indicate event status. user_events_data - This file is opened and register/delete ioctl's are issued to create/open/delete trace events that can be used for tracing. The typical scenario is on process start to mmap user_events_status. Processes then register the events they plan to use via the REG ioctl. The ioctl reads and updates the passed in user_reg struct. The status_index of the struct is used to know the byte in the status page to check for that event. The write_index of the struct is used to describe that event when writing out to the fd that was used for the ioctl call. The data must always include this index first when writing out data for an event. Data can be written either by write() or by writev(). For example, in memory: int index; char data[]; Psuedo code example of typical usage: struct user_reg reg; int page_fd = open("user_events_status", O_RDWR); char *page_data = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, page_fd, 0); close(page_fd); int data_fd = open("user_events_data", O_RDWR); reg.size = sizeof(reg); reg.name_args = (__u64)"test"; ioctl(data_fd, DIAG_IOCSREG, ®); int status_id = reg.status_index; int write_id = reg.write_index; struct iovec io[2]; io[0].iov_base = &write_id; io[0].iov_len = sizeof(write_id); io[1].iov_base = payload; io[1].iov_len = sizeof(payload); if (page_data[status_id]) writev(data_fd, io, 2); User events are also exposed via the dynamic_events tracefs file for both create and delete. Current status is exposed via the user_events_status tracefs file. Simple example to register a user event via dynamic_events: echo u:test >> dynamic_events cat dynamic_events u:test If an event is hooked to a probe, the probe hooked shows up: echo 1 > events/user_events/test/enable cat user_events_status 1:test # Used by ftrace Active: 1 Busy: 1 Max: 4096 If an event is not hooked to a probe, no probe status shows up: echo 0 > events/user_events/test/enable cat user_events_status 1:test Active: 1 Busy: 0 Max: 4096 Users can describe the trace event format via the following format: name[:FLAG1[,FLAG2...] [field1[;field2...]] Each field has the following format: type name Example for char array with a size of 20 named msg: echo 'u:detailed char[20] msg' >> dynamic_events cat dynamic_events u:detailed char[20] msg Data offsets are based on the data written out via write() and will be updated to reflect the correct offset in the trace_event fields. For dynamic data it is recommended to use the new __rel_loc data type. This type will be the same as __data_loc, but the offset is relative to this entry. This allows user_events to not worry about what common fields are being inserted before the data. The above format is valid for both the ioctl and the dynamic_events file. Link: https://lkml.kernel.org/r/20220118204326.2169-2-beaub@linux.microsoft.com Acked-by: Masami Hiramatsu Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- include/uapi/linux/user_events.h | 116 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 include/uapi/linux/user_events.h (limited to 'include') diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h new file mode 100644 index 000000000000..e570840571e1 --- /dev/null +++ b/include/uapi/linux/user_events.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2021, Microsoft Corporation. + * + * Authors: + * Beau Belgrave + */ +#ifndef _UAPI_LINUX_USER_EVENTS_H +#define _UAPI_LINUX_USER_EVENTS_H + +#include +#include + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#define USER_EVENTS_SYSTEM "user_events" +#define USER_EVENTS_PREFIX "u:" + +/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */ +#define EVENT_BIT_FTRACE 0 +#define EVENT_BIT_PERF 1 +#define EVENT_BIT_OTHER 7 + +#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE) +#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF) +#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER) + +/* Create dynamic location entry within a 32-bit value */ +#define DYN_LOC(offset, size) ((size) << 16 | (offset)) + +/* Use raw iterator for attached BPF program(s), no affect on ftrace/perf */ +#define FLAG_BPF_ITER (1 << 0) + +/* + * Describes an event registration and stores the results of the registration. + * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum + * must set the size and name_args before invocation. + */ +struct user_reg { + + /* Input: Size of the user_reg structure being used */ + __u32 size; + + /* Input: Pointer to string with event name, description and flags */ + __u64 name_args; + + /* Output: Byte index of the event within the status page */ + __u32 status_index; + + /* Output: Index of the event to use when writing data */ + __u32 write_index; +}; + +#define DIAG_IOC_MAGIC '*' + +/* Requests to register a user_event */ +#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg*) + +/* Requests to delete a user_event */ +#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*) + +/* Data type that was passed to the BPF program */ +enum { + /* Data resides in kernel space */ + USER_BPF_DATA_KERNEL, + + /* Data resides in user space */ + USER_BPF_DATA_USER, + + /* Data is a pointer to a user_bpf_iter structure */ + USER_BPF_DATA_ITER, +}; + +/* + * Describes an iovec iterator that BPF programs can use to access data for + * a given user_event write() / writev() call. + */ +struct user_bpf_iter { + + /* Offset of the data within the first iovec */ + __u32 iov_offset; + + /* Number of iovec structures */ + __u32 nr_segs; + + /* Pointer to iovec structures */ + const struct iovec *iov; +}; + +/* Context that BPF programs receive when attached to a user_event */ +struct user_bpf_context { + + /* Data type being passed (see union below) */ + __u32 data_type; + + /* Length of the data */ + __u32 data_len; + + /* Pointer to data, varies by data type */ + union { + /* Kernel data (data_type == USER_BPF_DATA_KERNEL) */ + void *kdata; + + /* User data (data_type == USER_BPF_DATA_USER) */ + void *udata; + + /* Direct iovec (data_type == USER_BPF_DATA_ITER) */ + struct user_bpf_iter *iter; + }; +}; + +#endif /* _UAPI_LINUX_USER_EVENTS_H */ -- cgit v1.2.3 From 0f4b58423f3500ee3e3159fbbd6c41a6e6f920d4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 3 Feb 2022 11:03:50 +0200 Subject: drm/dp: add drm_dp_128b132b_read_aux_rd_interval() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DP 2.0 errata changes DP_128B132B_TRAINING_AUX_RD_INTERVAL (DPCD 0x2216) completely. Add a new function to read that. Follow-up will need to clean up existing functions. v2: fix reversed interpretation of bit 7 meaning (Uma) Cc: Uma Shankar Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/22f6637194c9edb22b6a84be82dd385550dbb958.1643878928.git.jani.nikula@intel.com --- include/drm/drm_dp_helper.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 30359e434c3f..629ac2272e55 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -1115,6 +1115,7 @@ struct drm_panel; # define DP_UHBR13_5 (1 << 2) #define DP_128B132B_TRAINING_AUX_RD_INTERVAL 0x2216 /* 2.0 */ +# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_1MS_UNIT (1 << 7) # define DP_128B132B_TRAINING_AUX_RD_INTERVAL_MASK 0x7f # define DP_128B132B_TRAINING_AUX_RD_INTERVAL_400_US 0x00 # define DP_128B132B_TRAINING_AUX_RD_INTERVAL_4_MS 0x01 @@ -1552,6 +1553,8 @@ void drm_dp_link_train_channel_eq_delay(const struct drm_dp_aux *aux, void drm_dp_lttpr_link_train_channel_eq_delay(const struct drm_dp_aux *aux, const u8 caps[DP_LTTPR_PHY_CAP_SIZE]); +int drm_dp_128b132b_read_aux_rd_interval(struct drm_dp_aux *aux); + u8 drm_dp_link_rate_to_bw_code(int link_rate); int drm_dp_bw_code_to_link_rate(u8 link_bw); -- cgit v1.2.3 From 0192c25c03cd2feaeaadae375fe6aadff788939a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 3 Feb 2022 11:03:51 +0200 Subject: drm/dp: add 128b/132b link status helpers from DP 2.0 E11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DP 2.0 errata redefines link training. There are some new status bits, and some of the old ones need to be checked independently. Add helpers to do this. Cc: Uma Shankar Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/5a46260d1f171fed46d0ab8fe4b6499abd65ce24.1643878928.git.jani.nikula@intel.com --- include/drm/drm_dp_helper.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 629ac2272e55..c9ca38941514 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -738,11 +738,13 @@ struct drm_panel; DP_LANE_CHANNEL_EQ_DONE | \ DP_LANE_SYMBOL_LOCKED) -#define DP_LANE_ALIGN_STATUS_UPDATED 0x204 - -#define DP_INTERLANE_ALIGN_DONE (1 << 0) -#define DP_DOWNSTREAM_PORT_STATUS_CHANGED (1 << 6) -#define DP_LINK_STATUS_UPDATED (1 << 7) +#define DP_LANE_ALIGN_STATUS_UPDATED 0x204 +#define DP_INTERLANE_ALIGN_DONE (1 << 0) +#define DP_128B132B_DPRX_EQ_INTERLANE_ALIGN_DONE (1 << 2) /* 2.0 E11 */ +#define DP_128B132B_DPRX_CDS_INTERLANE_ALIGN_DONE (1 << 3) /* 2.0 E11 */ +#define DP_128B132B_LT_FAILED (1 << 4) /* 2.0 E11 */ +#define DP_DOWNSTREAM_PORT_STATUS_CHANGED (1 << 6) +#define DP_LINK_STATUS_UPDATED (1 << 7) #define DP_SINK_STATUS 0x205 # define DP_RECEIVE_PORT_0_STATUS (1 << 0) @@ -1554,6 +1556,13 @@ void drm_dp_lttpr_link_train_channel_eq_delay(const struct drm_dp_aux *aux, const u8 caps[DP_LTTPR_PHY_CAP_SIZE]); int drm_dp_128b132b_read_aux_rd_interval(struct drm_dp_aux *aux); +bool drm_dp_128b132b_lane_channel_eq_done(const u8 link_status[DP_LINK_STATUS_SIZE], + int lane_count); +bool drm_dp_128b132b_lane_symbol_locked(const u8 link_status[DP_LINK_STATUS_SIZE], + int lane_count); +bool drm_dp_128b132b_eq_interlane_align_done(const u8 link_status[DP_LINK_STATUS_SIZE]); +bool drm_dp_128b132b_cds_interlane_align_done(const u8 link_status[DP_LINK_STATUS_SIZE]); +bool drm_dp_128b132b_link_training_failed(const u8 link_status[DP_LINK_STATUS_SIZE]); u8 drm_dp_link_rate_to_bw_code(int link_rate); int drm_dp_bw_code_to_link_rate(u8 link_bw); -- cgit v1.2.3 From 427153ef63a82a4d51c6046e2457787127f4d6d7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 3 Feb 2022 11:03:52 +0200 Subject: drm/dp: add some new DPCD macros from DP 2.0 E11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add some of the new additions from DP 2.0 E11. Cc: Uma Shankar Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/ec9c1b94858de36b9f4ef6c197effa4ca667afc3.1643878928.git.jani.nikula@intel.com --- include/drm/drm_dp_helper.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index c9ca38941514..c48bf958a967 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -560,6 +560,7 @@ struct drm_panel; # define DP_TRAINING_PATTERN_DISABLE 0 # define DP_TRAINING_PATTERN_1 1 # define DP_TRAINING_PATTERN_2 2 +# define DP_TRAINING_PATTERN_2_CDS 3 /* 2.0 E11 */ # define DP_TRAINING_PATTERN_3 3 /* 1.2 */ # define DP_TRAINING_PATTERN_4 7 /* 1.4 */ # define DP_TRAINING_PATTERN_MASK 0x3 @@ -1353,6 +1354,7 @@ struct drm_panel; # define DP_PHY_REPEATER_128B132B_SUPPORTED (1 << 0) /* See DP_128B132B_SUPPORTED_LINK_RATES for values */ #define DP_PHY_REPEATER_128B132B_RATES 0xf0007 /* 2.0 */ +#define DP_PHY_REPEATER_EQ_DONE 0xf0008 /* 2.0 E11 */ enum drm_dp_phy { DP_PHY_DPRX, -- cgit v1.2.3 From 297565aa22cfa80ab0f88c3569693aea0b6afb6d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 5 Feb 2022 16:23:45 +0100 Subject: lib/xor: make xor prototypes more friendly to compiler vectorization Modern compilers are perfectly capable of extracting parallelism from the XOR routines, provided that the prototypes reflect the nature of the input accurately, in particular, the fact that the input vectors are expected not to overlap. This is not documented explicitly, but is implied by the interchangeability of the various C routines, some of which use temporary variables while others don't: this means that these routines only behave identically for non-overlapping inputs. So let's decorate these input vectors with the __restrict modifier, which informs the compiler that there is no overlap. While at it, make the input-only vectors pointer-to-const as well. Tested-by: Nathan Chancellor Signed-off-by: Ard Biesheuvel Reviewed-by: Nick Desaulniers Link: https://github.com/ClangBuiltLinux/linux/issues/563 Signed-off-by: Herbert Xu --- include/asm-generic/xor.h | 84 +++++++++++++++++++++++++++++++---------------- include/linux/raid/xor.h | 21 ++++++++---- 2 files changed, 70 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/asm-generic/xor.h b/include/asm-generic/xor.h index b62a2a56a4d4..44509d48fca2 100644 --- a/include/asm-generic/xor.h +++ b/include/asm-generic/xor.h @@ -8,7 +8,8 @@ #include static void -xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { long lines = bytes / (sizeof (long)) / 8; @@ -27,8 +28,9 @@ xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { long lines = bytes / (sizeof (long)) / 8; @@ -48,8 +50,10 @@ xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { long lines = bytes / (sizeof (long)) / 8; @@ -70,8 +74,11 @@ xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { long lines = bytes / (sizeof (long)) / 8; @@ -93,7 +100,8 @@ xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { long lines = bytes / (sizeof (long)) / 8; @@ -129,8 +137,9 @@ xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { long lines = bytes / (sizeof (long)) / 8; @@ -175,8 +184,10 @@ xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { long lines = bytes / (sizeof (long)) / 8; @@ -230,8 +241,11 @@ xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { long lines = bytes / (sizeof (long)) / 8; @@ -294,7 +308,8 @@ xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { long lines = bytes / (sizeof (long)) / 8 - 1; prefetchw(p1); @@ -320,8 +335,9 @@ xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { long lines = bytes / (sizeof (long)) / 8 - 1; prefetchw(p1); @@ -350,8 +366,10 @@ xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { long lines = bytes / (sizeof (long)) / 8 - 1; @@ -384,8 +402,11 @@ xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { long lines = bytes / (sizeof (long)) / 8 - 1; @@ -421,7 +442,8 @@ xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { long lines = bytes / (sizeof (long)) / 8 - 1; @@ -466,8 +488,9 @@ xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { long lines = bytes / (sizeof (long)) / 8 - 1; @@ -523,8 +546,10 @@ xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { long lines = bytes / (sizeof (long)) / 8 - 1; @@ -591,8 +616,11 @@ xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { long lines = bytes / (sizeof (long)) / 8 - 1; diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h index 2a9fee8ddae3..51b811b62322 100644 --- a/include/linux/raid/xor.h +++ b/include/linux/raid/xor.h @@ -11,13 +11,20 @@ struct xor_block_template { struct xor_block_template *next; const char *name; int speed; - void (*do_2)(unsigned long, unsigned long *, unsigned long *); - void (*do_3)(unsigned long, unsigned long *, unsigned long *, - unsigned long *); - void (*do_4)(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *); - void (*do_5)(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *, unsigned long *); + void (*do_2)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict); + void (*do_3)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict); + void (*do_4)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict); + void (*do_5)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict); }; #endif -- cgit v1.2.3 From dc1b4df09acdca7a89806b28f235cd6d8dcd3d24 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 7 Feb 2022 10:19:43 +0000 Subject: atomics: Fix atomic64_{read_acquire,set_release} fallbacks Arnd reports that on 32-bit architectures, the fallbacks for atomic64_read_acquire() and atomic64_set_release() are broken as they use smp_load_acquire() and smp_store_release() respectively, which do not work on types larger than the native word size. Since those contain compiletime_assert_atomic_type(), any attempt to use those fallbacks will result in a build-time error. e.g. with the following added to arch/arm/kernel/setup.c: | void test_atomic64(atomic64_t *v) | { | atomic64_set_release(v, 5); | atomic64_read_acquire(v); | } The compiler will complain as follows: | In file included from : | In function 'arch_atomic64_set_release', | inlined from 'test_atomic64' at ./include/linux/atomic/atomic-instrumented.h:669:2: | ././include/linux/compiler_types.h:346:38: error: call to '__compiletime_assert_9' declared with attribute error: Need native word sized stores/loads for atomicity. | 346 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | | ^ | ././include/linux/compiler_types.h:327:4: note: in definition of macro '__compiletime_assert' | 327 | prefix ## suffix(); \ | | ^~~~~~ | ././include/linux/compiler_types.h:346:2: note: in expansion of macro '_compiletime_assert' | 346 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | | ^~~~~~~~~~~~~~~~~~~ | ././include/linux/compiler_types.h:349:2: note: in expansion of macro 'compiletime_assert' | 349 | compiletime_assert(__native_word(t), \ | | ^~~~~~~~~~~~~~~~~~ | ./include/asm-generic/barrier.h:133:2: note: in expansion of macro 'compiletime_assert_atomic_type' | 133 | compiletime_assert_atomic_type(*p); \ | | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ./include/asm-generic/barrier.h:164:55: note: in expansion of macro '__smp_store_release' | 164 | #define smp_store_release(p, v) do { kcsan_release(); __smp_store_release(p, v); } while (0) | | ^~~~~~~~~~~~~~~~~~~ | ./include/linux/atomic/atomic-arch-fallback.h:1270:2: note: in expansion of macro 'smp_store_release' | 1270 | smp_store_release(&(v)->counter, i); | | ^~~~~~~~~~~~~~~~~ | make[2]: *** [scripts/Makefile.build:288: arch/arm/kernel/setup.o] Error 1 | make[1]: *** [scripts/Makefile.build:550: arch/arm/kernel] Error 2 | make: *** [Makefile:1831: arch/arm] Error 2 Fix this by only using smp_load_acquire() and smp_store_release() for native atomic types, and otherwise falling back to the regular barriers necessary for acquire/release semantics, as we do in the more generic acquire and release fallbacks. Since the fallback templates are used to generate the atomic64_*() and atomic_*() operations, the __native_word() check is added to both. For the atomic_*() operations, which are always 32-bit, the __native_word() check is redundant but not harmful, as it is always true. For the example above this works as expected on 32-bit, e.g. for arm multi_v7_defconfig: | : | push {r4, r5} | dmb ish | pldw [r0] | mov r2, #5 | mov r3, #0 | ldrexd r4, [r0] | strexd r4, r2, [r0] | teq r4, #0 | bne 484 | ldrexd r2, [r0] | dmb ish | pop {r4, r5} | bx lr ... and also on 64-bit, e.g. for arm64 defconfig: | : | bti c | paciasp | mov x1, #0x5 | stlr x1, [x0] | ldar x0, [x0] | autiasp | ret Reported-by: Arnd Bergmann Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ard Biesheuvel Reviewed-by: Boqun Feng Link: https://lore.kernel.org/r/20220207101943.439825-1-mark.rutland@arm.com --- include/linux/atomic/atomic-arch-fallback.h | 38 +++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index a3dba31df01e..6db58d180866 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -151,7 +151,16 @@ static __always_inline int arch_atomic_read_acquire(const atomic_t *v) { - return smp_load_acquire(&(v)->counter); + int ret; + + if (__native_word(atomic_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_atomic_read(v); + __atomic_acquire_fence(); + } + + return ret; } #define arch_atomic_read_acquire arch_atomic_read_acquire #endif @@ -160,7 +169,12 @@ arch_atomic_read_acquire(const atomic_t *v) static __always_inline void arch_atomic_set_release(atomic_t *v, int i) { - smp_store_release(&(v)->counter, i); + if (__native_word(atomic_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_atomic_set(v, i); + } } #define arch_atomic_set_release arch_atomic_set_release #endif @@ -1258,7 +1272,16 @@ arch_atomic_dec_if_positive(atomic_t *v) static __always_inline s64 arch_atomic64_read_acquire(const atomic64_t *v) { - return smp_load_acquire(&(v)->counter); + s64 ret; + + if (__native_word(atomic64_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_atomic64_read(v); + __atomic_acquire_fence(); + } + + return ret; } #define arch_atomic64_read_acquire arch_atomic64_read_acquire #endif @@ -1267,7 +1290,12 @@ arch_atomic64_read_acquire(const atomic64_t *v) static __always_inline void arch_atomic64_set_release(atomic64_t *v, s64 i) { - smp_store_release(&(v)->counter, i); + if (__native_word(atomic64_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_atomic64_set(v, i); + } } #define arch_atomic64_set_release arch_atomic64_set_release #endif @@ -2358,4 +2386,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// cca554917d7ea73d5e3e7397dd70c484cad9b2c4 +// 8e2cc06bc0d2c0967d2f8424762bd48555ee40ae -- cgit v1.2.3 From 9983a9d577db415c41099a20a5637ab25dd3c240 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 8 Feb 2022 18:08:02 +0100 Subject: locking/local_lock: Make the empty local_lock_*() function a macro. It has been said that local_lock() does not add any overhead compared to preempt_disable() in a !LOCKDEP configuration. A micro benchmark showed an unexpected result which can be reduced to the fact that local_lock() was not entirely optimized away. In the !LOCKDEP configuration local_lock_acquire() is an empty static inline function. On x86 the this_cpu_ptr() argument of that function is fully evaluated leading to an additional mov+add instructions which are not needed and not used. Replace the static inline function with a macro. The typecheck() macro ensures that the argument is of proper type while the resulting disassembly shows no traces of this_cpu_ptr(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Waiman Long Link: https://lkml.kernel.org/r/YgKjciR60fZft2l4@linutronix.de --- include/linux/local_lock_internal.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 975e33b793a7..6d635e8306d6 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -44,9 +44,9 @@ static inline void local_lock_debug_init(local_lock_t *l) } #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) -static inline void local_lock_acquire(local_lock_t *l) { } -static inline void local_lock_release(local_lock_t *l) { } -static inline void local_lock_debug_init(local_lock_t *l) { } +# define local_lock_acquire(__ll) do { typecheck(local_lock_t *, __ll); } while (0) +# define local_lock_release(__ll) do { typecheck(local_lock_t *, __ll); } while (0) +# define local_lock_debug_init(__ll) do { typecheck(local_lock_t *, __ll); } while (0) #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } -- cgit v1.2.3 From 48b6190a00425a1bebac9f7ae4b338a1e20f50f3 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Thu, 10 Feb 2022 17:11:36 +0800 Subject: net/smc: Limit SMC visits when handshake workqueue congested This patch intends to provide a mechanism to put constraint on SMC connections visit according to the pressure of SMC handshake process. At present, frequent visits will cause the incoming connections to be backlogged in SMC handshake queue, raise the connections established time. Which is quite unacceptable for those applications who base on short lived connections. There are two ways to implement this mechanism: 1. Put limitation after TCP established. 2. Put limitation before TCP established. In the first way, we need to wait and receive CLC messages that the client will potentially send, and then actively reply with a decline message, in a sense, which is also a sort of SMC handshake, affect the connections established time on its way. In the second way, the only problem is that we need to inject SMC logic into TCP when it is about to reply the incoming SYN, since we already do that, it's seems not a problem anymore. And advantage is obvious, few additional processes are required to complete the constraint. This patch use the second way. After this patch, connections who beyond constraint will not informed any SMC indication, and SMC will not be involved in any of its subsequent processes. Link: https://lore.kernel.org/all/1641301961-59331-1-git-send-email-alibuda@linux.alibaba.com/ Signed-off-by: D. Wythe Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 78b91bb92f0d..1168302b7927 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -394,6 +394,7 @@ struct tcp_sock { bool is_mptcp; #endif #if IS_ENABLED(CONFIG_SMC) + bool (*smc_hs_congested)(const struct sock *sk); bool syn_smc; /* SYN includes SMC */ #endif -- cgit v1.2.3 From a6a6fe27bab48f0d09a64b051e7bde432fcae081 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Thu, 10 Feb 2022 17:11:37 +0800 Subject: net/smc: Dynamic control handshake limitation by socket options This patch aims to add dynamic control for SMC handshake limitation for every smc sockets, in production environment, it is possible for the same applications to handle different service types, and may have different opinion on SMC handshake limitation. This patch try socket options to complete it, since we don't have socket option level for SMC yet, which requires us to implement it at the same time. This patch does the following: - add new socket option level: SOL_SMC. - add new SMC socket option: SMC_LIMIT_HS. - provide getter/setter for SMC socket options. Link: https://lore.kernel.org/all/20f504f961e1a803f85d64229ad84260434203bd.1644323503.git.alibuda@linux.alibaba.com/ Signed-off-by: D. Wythe Signed-off-by: David S. Miller --- include/linux/socket.h | 1 + include/uapi/linux/smc.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index 8ef26d89ef49..6f85f5d957ef 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -366,6 +366,7 @@ struct ucred { #define SOL_XDP 283 #define SOL_MPTCP 284 #define SOL_MCTP 285 +#define SOL_SMC 286 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 6c2874fd2c00..343e7450c3a3 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -284,4 +284,8 @@ enum { __SMC_NLA_SEID_TABLE_MAX, SMC_NLA_SEID_TABLE_MAX = __SMC_NLA_SEID_TABLE_MAX - 1 }; + +/* SMC socket options */ +#define SMC_LIMIT_HS 1 /* constraint on smc handshake */ + #endif /* _UAPI_LINUX_SMC_H */ -- cgit v1.2.3 From f9496b7c1b48ce02cd17a3ee88b1e049c689a222 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Thu, 10 Feb 2022 17:11:38 +0800 Subject: net/smc: Add global configure for handshake limitation by netlink Although we can control SMC handshake limitation through socket options, which means that applications who need it must modify their code. It's quite troublesome for many existing applications. This patch modifies the global default value of SMC handshake limitation through netlink, providing a way to put constraint on handshake without modifies any code for applications. Suggested-by: Tony Lu Signed-off-by: D. Wythe Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- include/net/netns/smc.h | 2 ++ include/uapi/linux/smc.h | 11 +++++++++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index ea8a9cf2619b..47b166684fd8 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -12,5 +12,7 @@ struct netns_smc { /* protect fback_rsn */ struct mutex mutex_fback_rsn; struct smc_stats_rsn *fback_rsn; + + bool limit_smc_hs; /* constraint on handshake */ }; #endif diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 343e7450c3a3..693f549f6966 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -59,6 +59,9 @@ enum { SMC_NETLINK_DUMP_SEID, SMC_NETLINK_ENABLE_SEID, SMC_NETLINK_DISABLE_SEID, + SMC_NETLINK_DUMP_HS_LIMITATION, + SMC_NETLINK_ENABLE_HS_LIMITATION, + SMC_NETLINK_DISABLE_HS_LIMITATION, }; /* SMC_GENL_FAMILY top level attributes */ @@ -285,6 +288,14 @@ enum { SMC_NLA_SEID_TABLE_MAX = __SMC_NLA_SEID_TABLE_MAX - 1 }; +/* SMC_NETLINK_HS_LIMITATION attributes */ +enum { + SMC_NLA_HS_LIMITATION_UNSPEC, + SMC_NLA_HS_LIMITATION_ENABLED, /* u8 */ + __SMC_NLA_HS_LIMITATION_MAX, + SMC_NLA_HS_LIMITATION_MAX = __SMC_NLA_HS_LIMITATION_MAX - 1 +}; + /* SMC socket options */ #define SMC_LIMIT_HS 1 /* constraint on smc handshake */ -- cgit v1.2.3 From 2d4feb2c1ba728fe53d6f80705b2d7e5c5b666a5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Feb 2022 13:42:28 -0800 Subject: ipv6: get rid of net->ipv6.rt6_stats->fib_rt_uncache This counter has never been visible, there is little point trying to maintain it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 40ae8f1b18e5..32c83bb68879 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -367,9 +367,8 @@ struct rt6_statistics { __u32 fib_rt_cache; /* cached rt entries in exception table */ __u32 fib_discarded_routes; /* total number of routes delete */ - /* The following stats are not protected by any lock */ + /* The following stat is not protected by any lock */ atomic_t fib_rt_alloc; /* total number of routes alloced */ - atomic_t fib_rt_uncache; /* rt entries in uncached list */ }; #define RTN_TL_ROOT 0x0001 -- cgit v1.2.3 From bad03efd11dfa6f039fe494207996c482e9364d0 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 9 Feb 2022 07:01:33 -0800 Subject: ALSA: cleanup double word in comment Remove the second 'device'. Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20220209150133.2291856-1-trix@redhat.com Signed-off-by: Takashi Iwai --- include/sound/hda_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/hda_verbs.h b/include/sound/hda_verbs.h index e36b77531c5c..006d358acce2 100644 --- a/include/sound/hda_verbs.h +++ b/include/sound/hda_verbs.h @@ -461,7 +461,7 @@ enum { #define AC_DE_ELDV (1<<1) #define AC_DE_IA (1<<2) -/* device device types (0x0-0xf) */ +/* device types (0x0-0xf) */ enum { AC_JACK_LINE_OUT, AC_JACK_SPEAKER, -- cgit v1.2.3 From d5869fdc189f0f12a954a48d58a48104a2f5d044 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 10 Feb 2022 14:52:22 -0800 Subject: block: introduce block_rq_error tracepoint Currently, rasdaemon uses the existing tracepoint block_rq_complete and filters out non-error cases in order to capture block disk errors. But there are a few problems with this approach: 1. Even kernel trace filter could do the filtering work, there is still some overhead after we enable this tracepoint. 2. The filter is merely based on errno, which does not align with kernel logic to check the errors for print_req_error(). 3. block_rq_complete only provides dev major and minor to identify the block device, it is not convenient to use in user-space. So introduce a new tracepoint block_rq_error just for the error case. With this patch, rasdaemon could switch to block_rq_error. Since the new tracepoint has the similar implementation with block_rq_complete, so move the existing code from TRACE_EVENT block_rq_complete() into new event class block_rq_completion(). Then add event for block_rq_complete and block_rq_err respectively from the newly created event class per the suggestion from Chaitanya Kulkarni. Cc: Jens Axboe Cc: Christoph Hellwig Reviewed-by: Steven Rostedt Signed-off-by: Cong Wang Signed-off-by: Chaitanya Kulkarni Signed-off-by: Yang Shi Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220210225222.260069-1-shy828301@gmail.com Signed-off-by: Jens Axboe --- include/trace/events/block.h | 49 ++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 27170e40e8c9..7f4dfbdf12a6 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -100,19 +100,7 @@ TRACE_EVENT(block_rq_requeue, __entry->nr_sector, 0) ); -/** - * block_rq_complete - block IO operation completed by device driver - * @rq: block operations request - * @error: status code - * @nr_bytes: number of completed bytes - * - * The block_rq_complete tracepoint event indicates that some portion - * of operation request has been completed by the device driver. If - * the @rq->bio is %NULL, then there is absolutely no additional work to - * do for the request. If @rq->bio is non-NULL then there is - * additional work required to complete the request. - */ -TRACE_EVENT(block_rq_complete, +DECLARE_EVENT_CLASS(block_rq_completion, TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), @@ -144,6 +132,41 @@ TRACE_EVENT(block_rq_complete, __entry->nr_sector, __entry->error) ); +/** + * block_rq_complete - block IO operation completed by device driver + * @rq: block operations request + * @error: status code + * @nr_bytes: number of completed bytes + * + * The block_rq_complete tracepoint event indicates that some portion + * of operation request has been completed by the device driver. If + * the @rq->bio is %NULL, then there is absolutely no additional work to + * do for the request. If @rq->bio is non-NULL then there is + * additional work required to complete the request. + */ +DEFINE_EVENT(block_rq_completion, block_rq_complete, + + TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), + + TP_ARGS(rq, error, nr_bytes) +); + +/** + * block_rq_error - block IO operation error reported by device driver + * @rq: block operations request + * @error: status code + * @nr_bytes: number of completed bytes + * + * The block_rq_error tracepoint event indicates that some portion + * of operation request has failed as reported by the device driver. + */ +DEFINE_EVENT(block_rq_completion, block_rq_error, + + TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), + + TP_ARGS(rq, error, nr_bytes) +); + DECLARE_EVENT_CLASS(block_rq, TP_PROTO(struct request *rq), -- cgit v1.2.3 From 0e51e2ab49a99bc5077760aa083dfa1c3bf9899b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Feb 2022 18:11:47 +0800 Subject: block: remove THROTL_IOPS_MAX No one uses THROTL_IOPS_MAX any more, so remove it. Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220211101149.2368042-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index b4de2010fba5..bdc49bd4eef0 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -28,8 +28,6 @@ /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) -/* Max limits for throttle policy */ -#define THROTL_IOPS_MAX UINT_MAX #define FC_APPID_LEN 129 -- cgit v1.2.3 From 672fdcf0e7de3b1e39416ac85abf178f023271f1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Feb 2022 18:11:49 +0800 Subject: block: partition include/linux/blk-cgroup.h Partition include/linux/blk-cgroup.h into two parts: one is public part, the other is block layer private part. Suggested by Christoph Hellwig. Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220211101149.2368042-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 459 +-------------------------------------------- 1 file changed, 5 insertions(+), 454 deletions(-) (limited to 'include') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index bdc49bd4eef0..f2ad8ed8f777 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -25,12 +25,8 @@ #include #include -/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ -#define BLKG_STAT_CPU_BATCH (INT_MAX / 2) - #define FC_APPID_LEN 129 - #ifdef CONFIG_BLK_CGROUP enum blkg_iostat_type { @@ -42,6 +38,7 @@ enum blkg_iostat_type { }; struct blkcg_gq; +struct blkg_policy_data; struct blkcg { struct cgroup_subsys_state css; @@ -74,36 +71,6 @@ struct blkg_iostat_set { struct blkg_iostat last; }; -/* - * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a - * request_queue (q). This is used by blkcg policies which need to track - * information per blkcg - q pair. - * - * There can be multiple active blkcg policies and each blkg:policy pair is - * represented by a blkg_policy_data which is allocated and freed by each - * policy's pd_alloc/free_fn() methods. A policy can allocate private data - * area by allocating larger data structure which embeds blkg_policy_data - * at the beginning. - */ -struct blkg_policy_data { - /* the blkg and policy id this per-policy data belongs to */ - struct blkcg_gq *blkg; - int plid; -}; - -/* - * Policies that need to keep per-blkcg data which is independent from any - * request_queue associated to it should implement cpd_alloc/free_fn() - * methods. A policy can allocate private data area by allocating larger - * data structure which embeds blkcg_policy_data at the beginning. - * cpd_init() is invoked to let each policy handle per-blkcg data. - */ -struct blkcg_policy_data { - /* the blkcg and policy id this per-policy data belongs to */ - struct blkcg *blkcg; - int plid; -}; - /* association between a blk cgroup and a request queue */ struct blkcg_gq { /* Pointer to the associated request_queue */ @@ -139,120 +106,17 @@ struct blkcg_gq { struct rcu_head rcu_head; }; -typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); -typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); -typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); -typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); -typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, - struct request_queue *q, struct blkcg *blkcg); -typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); -typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, - struct seq_file *s); - -struct blkcg_policy { - int plid; - /* cgroup files for the policy */ - struct cftype *dfl_cftypes; - struct cftype *legacy_cftypes; - - /* operations */ - blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; - blkcg_pol_init_cpd_fn *cpd_init_fn; - blkcg_pol_free_cpd_fn *cpd_free_fn; - blkcg_pol_bind_cpd_fn *cpd_bind_fn; - - blkcg_pol_alloc_pd_fn *pd_alloc_fn; - blkcg_pol_init_pd_fn *pd_init_fn; - blkcg_pol_online_pd_fn *pd_online_fn; - blkcg_pol_offline_pd_fn *pd_offline_fn; - blkcg_pol_free_pd_fn *pd_free_fn; - blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; - blkcg_pol_stat_pd_fn *pd_stat_fn; -}; - -extern struct blkcg blkcg_root; extern struct cgroup_subsys_state * const blkcg_root_css; -extern bool blkcg_debug_stats; - -struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, - struct request_queue *q, bool update_hint); -int blkcg_init_queue(struct request_queue *q); -void blkcg_exit_queue(struct request_queue *q); - -/* Blkio controller policy registration */ -int blkcg_policy_register(struct blkcg_policy *pol); -void blkcg_policy_unregister(struct blkcg_policy *pol); -int blkcg_activate_policy(struct request_queue *q, - const struct blkcg_policy *pol); -void blkcg_deactivate_policy(struct request_queue *q, - const struct blkcg_policy *pol); - -const char *blkg_dev_name(struct blkcg_gq *blkg); -void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, - u64 (*prfill)(struct seq_file *, - struct blkg_policy_data *, int), - const struct blkcg_policy *pol, int data, - bool show_total); -u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); - -struct blkg_conf_ctx { - struct block_device *bdev; - struct blkcg_gq *blkg; - char *body; -}; - -struct block_device *blkcg_conf_open_bdev(char **inputp); -int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, - char *input, struct blkg_conf_ctx *ctx); -void blkg_conf_finish(struct blkg_conf_ctx *ctx); -/** - * blkcg_css - find the current css - * - * Find the css associated with either the kthread or the current task. - * This may return a dying css, so it is up to the caller to use tryget logic - * to confirm it is alive and well. - */ -static inline struct cgroup_subsys_state *blkcg_css(void) -{ - struct cgroup_subsys_state *css; - - css = kthread_blkcg(); - if (css) - return css; - return task_css(current, io_cgrp_id); -} +void blkcg_destroy_blkgs(struct blkcg *blkcg); +void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); +void blkcg_maybe_throttle_current(void); static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; } -/** - * __bio_blkcg - internal, inconsistent version to get blkcg - * - * DO NOT USE. - * This function is inconsistent and consequently is dangerous to use. The - * first part of the function returns a blkcg where a reference is owned by the - * bio. This means it does not need to be rcu protected as it cannot go away - * with the bio owning a reference to it. However, the latter potentially gets - * it from task_css(). This can race against task migration and the cgroup - * dying. It is also semantically different as it must be called rcu protected - * and is susceptible to failure when trying to get a reference to it. - * Therefore, it is not ok to assume that *_get() will always succeed on the - * blkcg returned here. - */ -static inline struct blkcg *__bio_blkcg(struct bio *bio) -{ - if (bio && bio->bi_blkg) - return bio->bi_blkg->blkcg; - return css_to_blkcg(blkcg_css()); -} - /** * bio_blkcg - grab the blkcg associated with a bio * @bio: target bio @@ -288,22 +152,6 @@ static inline bool blk_cgroup_congested(void) return ret; } -/** - * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg - * @return: true if this bio needs to be submitted with the root blkg context. - * - * In order to avoid priority inversions we sometimes need to issue a bio as if - * it were attached to the root blkg, and then backcharge to the actual owning - * blkg. The idea is we do bio_blkcg() to look up the actual context for the - * bio and attach the appropriate blkg to the bio. Then we call this helper and - * if it is true run with the root blkg for that queue and then do any - * backcharging to the originating cgroup once the io is complete. - */ -static inline bool bio_issue_as_root_blkg(struct bio *bio) -{ - return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; -} - /** * blkcg_parent - get the parent of a blkcg * @blkcg: blkcg of interest @@ -315,96 +163,6 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) return css_to_blkcg(blkcg->css.parent); } -/** - * __blkg_lookup - internal version of blkg_lookup() - * @blkcg: blkcg of interest - * @q: request_queue of interest - * @update_hint: whether to update lookup hint with the result or not - * - * This is internal version and shouldn't be used by policy - * implementations. Looks up blkgs for the @blkcg - @q pair regardless of - * @q's bypass state. If @update_hint is %true, the caller should be - * holding @q->queue_lock and lookup hint is updated on success. - */ -static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, - struct request_queue *q, - bool update_hint) -{ - struct blkcg_gq *blkg; - - if (blkcg == &blkcg_root) - return q->root_blkg; - - blkg = rcu_dereference(blkcg->blkg_hint); - if (blkg && blkg->q == q) - return blkg; - - return blkg_lookup_slowpath(blkcg, q, update_hint); -} - -/** - * blkg_lookup - lookup blkg for the specified blkcg - q pair - * @blkcg: blkcg of interest - * @q: request_queue of interest - * - * Lookup blkg for the @blkcg - @q pair. This function should be called - * under RCU read lock. - */ -static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, - struct request_queue *q) -{ - WARN_ON_ONCE(!rcu_read_lock_held()); - return __blkg_lookup(blkcg, q, false); -} - -/** - * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair - * @q: request_queue of interest - * - * Lookup blkg for @q at the root level. See also blkg_lookup(). - */ -static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) -{ - return q->root_blkg; -} - -/** - * blkg_to_pdata - get policy private data - * @blkg: blkg of interest - * @pol: policy of interest - * - * Return pointer to private data associated with the @blkg-@pol pair. - */ -static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, - struct blkcg_policy *pol) -{ - return blkg ? blkg->pd[pol->plid] : NULL; -} - -static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, - struct blkcg_policy *pol) -{ - return blkcg ? blkcg->cpd[pol->plid] : NULL; -} - -/** - * pdata_to_blkg - get blkg associated with policy private data - * @pd: policy private data of interest - * - * @pd is policy private data. Determine the blkg it's associated with. - */ -static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) -{ - return pd ? pd->blkg : NULL; -} - -static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) -{ - return cpd ? cpd->blkcg : NULL; -} - -extern void blkcg_destroy_blkgs(struct blkcg *blkcg); - /** * blkcg_pin_online - pin online state * @blkcg: blkcg of interest @@ -437,231 +195,24 @@ static inline void blkcg_unpin_online(struct blkcg *blkcg) } while (blkcg); } -/** - * blkg_path - format cgroup path of blkg - * @blkg: blkg of interest - * @buf: target buffer - * @buflen: target buffer length - * - * Format the path of the cgroup of @blkg into @buf. - */ -static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) -{ - return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); -} - -/** - * blkg_get - get a blkg reference - * @blkg: blkg to get - * - * The caller should be holding an existing reference. - */ -static inline void blkg_get(struct blkcg_gq *blkg) -{ - percpu_ref_get(&blkg->refcnt); -} - -/** - * blkg_tryget - try and get a blkg reference - * @blkg: blkg to get - * - * This is for use when doing an RCU lookup of the blkg. We may be in the midst - * of freeing this blkg, so we can only use it if the refcnt is not zero. - */ -static inline bool blkg_tryget(struct blkcg_gq *blkg) -{ - return blkg && percpu_ref_tryget(&blkg->refcnt); -} - -/** - * blkg_put - put a blkg reference - * @blkg: blkg to put - */ -static inline void blkg_put(struct blkcg_gq *blkg) -{ - percpu_ref_put(&blkg->refcnt); -} - -/** - * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants - * @d_blkg: loop cursor pointing to the current descendant - * @pos_css: used for iteration - * @p_blkg: target blkg to walk descendants of - * - * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU - * read locked. If called under either blkcg or queue lock, the iteration - * is guaranteed to include all and only online blkgs. The caller may - * update @pos_css by calling css_rightmost_descendant() to skip subtree. - * @p_blkg is included in the iteration and the first node to be visited. - */ -#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ - css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ - if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ - (p_blkg)->q, false))) - -/** - * blkg_for_each_descendant_post - post-order walk of a blkg's descendants - * @d_blkg: loop cursor pointing to the current descendant - * @pos_css: used for iteration - * @p_blkg: target blkg to walk descendants of - * - * Similar to blkg_for_each_descendant_pre() but performs post-order - * traversal instead. Synchronization rules are the same. @p_blkg is - * included in the iteration and the last node to be visited. - */ -#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ - css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ - if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ - (p_blkg)->q, false))) - -bool __blkcg_punt_bio_submit(struct bio *bio); - -static inline bool blkcg_punt_bio_submit(struct bio *bio) -{ - if (bio->bi_opf & REQ_CGROUP_PUNT) - return __blkcg_punt_bio_submit(bio); - else - return false; -} - -static inline void blkcg_bio_issue_init(struct bio *bio) -{ - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); -} - -static inline void blkcg_use_delay(struct blkcg_gq *blkg) -{ - if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) - return; - if (atomic_add_return(1, &blkg->use_delay) == 1) - atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); -} - -static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) -{ - int old = atomic_read(&blkg->use_delay); - - if (WARN_ON_ONCE(old < 0)) - return 0; - if (old == 0) - return 0; - - /* - * We do this song and dance because we can race with somebody else - * adding or removing delay. If we just did an atomic_dec we'd end up - * negative and we'd already be in trouble. We need to subtract 1 and - * then check to see if we were the last delay so we can drop the - * congestion count on the cgroup. - */ - while (old) { - int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1); - if (cur == old) - break; - old = cur; - } - - if (old == 0) - return 0; - if (old == 1) - atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); - return 1; -} - -/** - * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount - * @blkg: target blkg - * @delay: delay duration in nsecs - * - * When enabled with this function, the delay is not decayed and must be - * explicitly cleared with blkcg_clear_delay(). Must not be mixed with - * blkcg_[un]use_delay() and blkcg_add_delay() usages. - */ -static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay) -{ - int old = atomic_read(&blkg->use_delay); - - /* We only want 1 person setting the congestion count for this blkg. */ - if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old) - atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); - - atomic64_set(&blkg->delay_nsec, delay); -} - -/** - * blkcg_clear_delay - Disable allocator delay mechanism - * @blkg: target blkg - * - * Disable use_delay mechanism. See blkcg_set_delay(). - */ -static inline void blkcg_clear_delay(struct blkcg_gq *blkg) -{ - int old = atomic_read(&blkg->use_delay); - - /* We only want 1 person clearing the congestion count for this blkg. */ - if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old) - atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); -} - -void blk_cgroup_bio_start(struct bio *bio); -void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); -void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); -void blkcg_maybe_throttle_current(void); #else /* CONFIG_BLK_CGROUP */ struct blkcg { }; -struct blkg_policy_data { -}; - -struct blkcg_policy_data { -}; - struct blkcg_gq { }; -struct blkcg_policy { -}; - #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) static inline void blkcg_maybe_throttle_current(void) { } static inline bool blk_cgroup_congested(void) { return false; } #ifdef CONFIG_BLOCK - static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } - -static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } -static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) -{ return NULL; } -static inline int blkcg_init_queue(struct request_queue *q) { return 0; } -static inline void blkcg_exit_queue(struct request_queue *q) { } -static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } -static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } -static inline int blkcg_activate_policy(struct request_queue *q, - const struct blkcg_policy *pol) { return 0; } -static inline void blkcg_deactivate_policy(struct request_queue *q, - const struct blkcg_policy *pol) { } - -static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } +#endif /* CONFIG_BLOCK */ -static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, - struct blkcg_policy *pol) { return NULL; } -static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } -static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } -static inline void blkg_get(struct blkcg_gq *blkg) { } -static inline void blkg_put(struct blkcg_gq *blkg) { } - -static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } -static inline void blkcg_bio_issue_init(struct bio *bio) { } -static inline void blk_cgroup_bio_start(struct bio *bio) { } - -#define blk_queue_for_each_rl(rl, q) \ - for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) - -#endif /* CONFIG_BLOCK */ #endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_BLK_CGROUP_FC_APPID -- cgit v1.2.3 From a8abb0c3dc1e28454851a00f8b7333d9695d566c Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 9 Feb 2022 12:33:23 +0530 Subject: bpf: Fix crash due to incorrect copy_map_value When both bpf_spin_lock and bpf_timer are present in a BPF map value, copy_map_value needs to skirt both objects when copying a value into and out of the map. However, the current code does not set both s_off and t_off in copy_map_value, which leads to a crash when e.g. bpf_spin_lock is placed in map value with bpf_timer, as bpf_map_update_elem call will be able to overwrite the other timer object. When the issue is not fixed, an overwriting can produce the following splat: [root@(none) bpf]# ./test_progs -t timer_crash [ 15.930339] bpf_testmod: loading out-of-tree module taints kernel. [ 16.037849] ================================================================== [ 16.038458] BUG: KASAN: user-memory-access in __pv_queued_spin_lock_slowpath+0x32b/0x520 [ 16.038944] Write of size 8 at addr 0000000000043ec0 by task test_progs/325 [ 16.039399] [ 16.039514] CPU: 0 PID: 325 Comm: test_progs Tainted: G OE 5.16.0+ #278 [ 16.039983] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.15.0-1 04/01/2014 [ 16.040485] Call Trace: [ 16.040645] [ 16.040805] dump_stack_lvl+0x59/0x73 [ 16.041069] ? __pv_queued_spin_lock_slowpath+0x32b/0x520 [ 16.041427] kasan_report.cold+0x116/0x11b [ 16.041673] ? __pv_queued_spin_lock_slowpath+0x32b/0x520 [ 16.042040] __pv_queued_spin_lock_slowpath+0x32b/0x520 [ 16.042328] ? memcpy+0x39/0x60 [ 16.042552] ? pv_hash+0xd0/0xd0 [ 16.042785] ? lockdep_hardirqs_off+0x95/0xd0 [ 16.043079] __bpf_spin_lock_irqsave+0xdf/0xf0 [ 16.043366] ? bpf_get_current_comm+0x50/0x50 [ 16.043608] ? jhash+0x11a/0x270 [ 16.043848] bpf_timer_cancel+0x34/0xe0 [ 16.044119] bpf_prog_c4ea1c0f7449940d_sys_enter+0x7c/0x81 [ 16.044500] bpf_trampoline_6442477838_0+0x36/0x1000 [ 16.044836] __x64_sys_nanosleep+0x5/0x140 [ 16.045119] do_syscall_64+0x59/0x80 [ 16.045377] ? lock_is_held_type+0xe4/0x140 [ 16.045670] ? irqentry_exit_to_user_mode+0xa/0x40 [ 16.046001] ? mark_held_locks+0x24/0x90 [ 16.046287] ? asm_exc_page_fault+0x1e/0x30 [ 16.046569] ? asm_exc_page_fault+0x8/0x30 [ 16.046851] ? lockdep_hardirqs_on+0x7e/0x100 [ 16.047137] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 16.047405] RIP: 0033:0x7f9e4831718d [ 16.047602] Code: b4 0c 00 0f 05 eb a9 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d b3 6c 0c 00 f7 d8 64 89 01 48 [ 16.048764] RSP: 002b:00007fff488086b8 EFLAGS: 00000206 ORIG_RAX: 0000000000000023 [ 16.049275] RAX: ffffffffffffffda RBX: 00007f9e48683740 RCX: 00007f9e4831718d [ 16.049747] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00007fff488086d0 [ 16.050225] RBP: 00007fff488086f0 R08: 00007fff488085d7 R09: 00007f9e4cb594a0 [ 16.050648] R10: 0000000000000000 R11: 0000000000000206 R12: 00007f9e484cde30 [ 16.051124] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 16.051608] [ 16.051762] ================================================================== Fixes: 68134668c17f ("bpf: Add map side support for bpf timers.") Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220209070324.1093182-2-memxor@gmail.com --- include/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fa517ae604ad..31a83449808b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -224,7 +224,8 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) if (unlikely(map_value_has_spin_lock(map))) { s_off = map->spin_lock_off; s_sz = sizeof(struct bpf_spin_lock); - } else if (unlikely(map_value_has_timer(map))) { + } + if (unlikely(map_value_has_timer(map))) { t_off = map->timer_off; t_sz = sizeof(struct bpf_timer); } -- cgit v1.2.3 From 5eaed6eedbe9612f642ad2b880f961d1c6c8ec2b Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 11 Feb 2022 11:49:53 -0800 Subject: bpf: Fix a bpf_timer initialization issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch in [1] intends to fix a bpf_timer related issue, but the fix caused existing 'timer' selftest to fail with hang or some random errors. After some debug, I found an issue with check_and_init_map_value() in the hashtab.c. More specifically, in hashtab.c, we have code l_new = bpf_map_kmalloc_node(&htab->map, ...) check_and_init_map_value(&htab->map, l_new...) Note that bpf_map_kmalloc_node() does not do initialization so l_new contains random value. The function check_and_init_map_value() intends to zero the bpf_spin_lock and bpf_timer if they exist in the map. But I found bpf_spin_lock is zero'ed but bpf_timer is not zero'ed. With [1], later copy_map_value() skips copying of bpf_spin_lock and bpf_timer. The non-zero bpf_timer caused random failures for 'timer' selftest. Without [1], for both bpf_spin_lock and bpf_timer case, bpf_timer will be zero'ed, so 'timer' self test is okay. For check_and_init_map_value(), why bpf_spin_lock is zero'ed properly while bpf_timer not. In bpf uapi header, we have struct bpf_spin_lock { __u32 val; }; struct bpf_timer { __u64 :64; __u64 :64; } __attribute__((aligned(8))); The initialization code: *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = (struct bpf_spin_lock){}; *(struct bpf_timer *)(dst + map->timer_off) = (struct bpf_timer){}; It appears the compiler has no obligation to initialize anonymous fields. For example, let us use clang with bpf target as below: $ cat t.c struct bpf_timer { unsigned long long :64; }; struct bpf_timer2 { unsigned long long a; }; void test(struct bpf_timer *t) { *t = (struct bpf_timer){}; } void test2(struct bpf_timer2 *t) { *t = (struct bpf_timer2){}; } $ clang -target bpf -O2 -c -g t.c $ llvm-objdump -d t.o ... 0000000000000000 : 0: 95 00 00 00 00 00 00 00 exit 0000000000000008 : 1: b7 02 00 00 00 00 00 00 r2 = 0 2: 7b 21 00 00 00 00 00 00 *(u64 *)(r1 + 0) = r2 3: 95 00 00 00 00 00 00 00 exit gcc11.2 does not have the above issue. But from INTERNATIONAL STANDARD ©ISO/IEC ISO/IEC 9899:201x Programming languages — C http://www.open-std.org/Jtc1/sc22/wg14/www/docs/n1547.pdf page 157: Except where explicitly stated otherwise, for the purposes of this subclause unnamed members of objects of structure and union type do not participate in initialization. Unnamed members of structure objects have indeterminate value even after initialization. To fix the problem, let use memset for bpf_timer case in check_and_init_map_value(). For consistency, memset is also used for bpf_spin_lock case. [1] https://lore.kernel.org/bpf/20220209070324.1093182-2-memxor@gmail.com/ Fixes: 68134668c17f3 ("bpf: Add map side support for bpf timers.") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220211194953.3142152-1-yhs@fb.com --- include/linux/bpf.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 31a83449808b..d0ad379d1e62 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -209,11 +209,9 @@ static inline bool map_value_has_timer(const struct bpf_map *map) static inline void check_and_init_map_value(struct bpf_map *map, void *dst) { if (unlikely(map_value_has_spin_lock(map))) - *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = - (struct bpf_spin_lock){}; + memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock)); if (unlikely(map_value_has_timer(map))) - *(struct bpf_timer *)(dst + map->timer_off) = - (struct bpf_timer){}; + memset(dst + map->timer_off, 0, sizeof(struct bpf_timer)); } /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ -- cgit v1.2.3 From 03e4383c7ce36ac400489c8fe84724470a8251e9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 7 Feb 2022 22:12:31 -0800 Subject: scsi: ibmvscsis: Silence -Warray-bounds warning Instead of doing a cast to storage that is too small, add a union for the high 64 bits. Silences the warnings under -Warray-bounds: drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c: In function 'ibmvscsis_send_messages': drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c:1934:44: error: array subscript 'struct viosrp_crq[0]' is partly outside array bounds of 'u64[1]' {aka 'long long unsigned int[1]'} [-Werror=array-bounds] 1934 | crq->valid = VALID_CMD_RESP_EL; | ^~ drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c:1875:13: note: while referencing 'msg_hi' 1875 | u64 msg_hi = 0; | ^~~~~~ There is no change to the resulting binary instructions. Link: https://lore.kernel.org/lkml/20220125142430.75c3160e@canb.auug.org.au Link: https://lore.kernel.org/r/20220208061231.3429486-1-keescook@chromium.org Cc: Michael Cyr Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: Tyrel Datwyler Cc: linux-scsi@vger.kernel.org Cc: target-devel@vger.kernel.org Reported-by: Stephen Rothwell Reviewed-by: Tyrel Datwyler Signed-off-by: Kees Cook Signed-off-by: Martin K. Petersen --- include/scsi/viosrp.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/scsi/viosrp.h b/include/scsi/viosrp.h index c978133c83e3..6c5559d2b285 100644 --- a/include/scsi/viosrp.h +++ b/include/scsi/viosrp.h @@ -70,12 +70,17 @@ enum viosrp_crq_status { }; struct viosrp_crq { - u8 valid; /* used by RPA */ - u8 format; /* SCSI vs out-of-band */ - u8 reserved; - u8 status; /* non-scsi failure? (e.g. DMA failure) */ - __be16 timeout; /* in seconds */ - __be16 IU_length; /* in bytes */ + union { + __be64 high; /* High 64 bits */ + struct { + u8 valid; /* used by RPA */ + u8 format; /* SCSI vs out-of-band */ + u8 reserved; + u8 status; /* non-scsi failure? (e.g. DMA failure) */ + __be16 timeout; /* in seconds */ + __be16 IU_length; /* in bytes */ + }; + }; __be64 IU_data_ptr; /* the TCE for transferring data */ }; -- cgit v1.2.3 From 26fc0ea74fcb9b76b41f5e9b89728cd1c01559cd Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 10 Feb 2022 18:43:24 +0800 Subject: scsi: libsas: Drop SAS_TASK_AT_INITIATOR This flag is now only ever set, so delete it. This also avoids a use-after-free in the pm8001 queue path, as reported in the following: https://lore.kernel.org/linux-scsi/c3cb7228-254e-9584-182b-007ac5e6fe0a@huawei.com/T/#m28c94c6d3ff582ec4a9fa54819180740e8bd4cfb https://lore.kernel.org/linux-scsi/0cc0c435-b4f2-9c76-258d-865ba50a29dd@huawei.com/ [mkp: checkpatch + two SAS_TASK_AT_INITIATOR references] Link: https://lore.kernel.org/r/1644489804-85730-3-git-send-email-john.garry@huawei.com Reviewed-by: Damien Le Moal Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 698f2032807b..549232d66b40 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -617,7 +617,6 @@ struct sas_task_slow { #define SAS_TASK_STATE_DONE 2 #define SAS_TASK_STATE_ABORTED 4 #define SAS_TASK_NEED_DEV_RESET 8 -#define SAS_TASK_AT_INITIATOR 16 extern struct sas_task *sas_alloc_task(gfp_t flags); extern struct sas_task *sas_alloc_slow_task(gfp_t flags); -- cgit v1.2.3 From 26d4a969dd0516da2d25e1e4dc5632853c774c17 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Fri, 11 Feb 2022 14:42:55 +0800 Subject: scsi: libsas: Use void for sas_discover_event() return code The callers of function sas_discover_event() do not check its return value. The function also only ever returns 0, so use void instead. Link: https://lore.kernel.org/r/1644561778-183074-2-git-send-email-chenxiang66@hisilicon.com Reviewed-by: John Garry Signed-off-by: Xiang Chen Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 549232d66b40..fad328d3a551 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -684,7 +684,7 @@ int sas_ex_revalidate_domain(struct domain_device *); void sas_unregister_domain_devices(struct asd_sas_port *port, int gone); void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *); -int sas_discover_event(struct asd_sas_port *, enum discover_event ev); +void sas_discover_event(struct asd_sas_port *, enum discover_event ev); int sas_discover_sata(struct domain_device *); int sas_discover_end_dev(struct domain_device *); -- cgit v1.2.3 From 3a20e64281fd481f59c5c188d60632ef1d3264ea Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Fri, 11 Feb 2022 14:42:57 +0800 Subject: scsi: libsas: Remove unused parameter for function sas_ata_eh() Input parameter work_q is not unused in function sas_ata_eh(), so remove it. Link: https://lore.kernel.org/r/1644561778-183074-4-git-send-email-chenxiang66@hisilicon.com Reviewed-by: John Garry Signed-off-by: Xiang Chen Signed-off-by: Martin K. Petersen --- include/scsi/sas_ata.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index 416c9c47d0e7..21e7c10c6295 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -25,8 +25,7 @@ int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy); int sas_ata_init(struct domain_device *dev); void sas_ata_task_abort(struct sas_task *task); void sas_ata_strategy_handler(struct Scsi_Host *shost); -void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q, - struct list_head *done_q); +void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q); void sas_ata_schedule_reset(struct domain_device *dev); void sas_ata_wait_eh(struct domain_device *dev); void sas_probe_sata(struct asd_sas_port *port); @@ -52,8 +51,7 @@ static inline void sas_ata_strategy_handler(struct Scsi_Host *shost) { } -static inline void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q, - struct list_head *done_q) +static inline void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q) { } -- cgit v1.2.3 From 23406e4d1f1e4653ec16b546d9480268ef284634 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Fri, 11 Feb 2022 14:42:58 +0800 Subject: scsi: Remove unused member cmd_pool for structure scsi_host_template After commit e9c787e65c0c ("scsi: allocate scsi_cmnd structures as part of struct request"), the member cmd_pool in structure scsi_host_template is not used, so remove it. Link: https://lore.kernel.org/r/1644561778-183074-5-git-send-email-chenxiang66@hisilicon.com Reviewed-by: John Garry Signed-off-by: Xiang Chen Signed-off-by: Martin K. Petersen --- include/scsi/scsi_host.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 72e1a347baa6..667d889b92b5 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -16,7 +16,6 @@ struct completion; struct module; struct scsi_cmnd; struct scsi_device; -struct scsi_host_cmd_pool; struct scsi_target; struct Scsi_Host; struct scsi_transport_template; @@ -493,8 +492,6 @@ struct scsi_host_template { */ u64 vendor_id; - struct scsi_host_cmd_pool *cmd_pool; - /* Delay for runtime autosuspend */ int rpm_autosuspend_delay; }; -- cgit v1.2.3 From e496132ebedd870b67f1f6d2428f9bb9d7ae27fd Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 8 Feb 2022 09:43:34 +0000 Subject: sched/fair: Adjust the allowed NUMA imbalance when SD_NUMA spans multiple LLCs Commit 7d2b5dd0bcc4 ("sched/numa: Allow a floating imbalance between NUMA nodes") allowed an imbalance between NUMA nodes such that communicating tasks would not be pulled apart by the load balancer. This works fine when there is a 1:1 relationship between LLC and node but can be suboptimal for multiple LLCs if independent tasks prematurely use CPUs sharing cache. Zen* has multiple LLCs per node with local memory channels and due to the allowed imbalance, it's far harder to tune some workloads to run optimally than it is on hardware that has 1 LLC per node. This patch allows an imbalance to exist up to the point where LLCs should be balanced between nodes. On a Zen3 machine running STREAM parallelised with OMP to have on instance per LLC the results and without binding, the results are 5.17.0-rc0 5.17.0-rc0 vanilla sched-numaimb-v6 MB/sec copy-16 162596.94 ( 0.00%) 580559.74 ( 257.05%) MB/sec scale-16 136901.28 ( 0.00%) 374450.52 ( 173.52%) MB/sec add-16 157300.70 ( 0.00%) 564113.76 ( 258.62%) MB/sec triad-16 151446.88 ( 0.00%) 564304.24 ( 272.61%) STREAM can use directives to force the spread if the OpenMP is new enough but that doesn't help if an application uses threads and it's not known in advance how many threads will be created. Coremark is a CPU and cache intensive benchmark parallelised with threads. When running with 1 thread per core, the vanilla kernel allows threads to contend on cache. With the patch; 5.17.0-rc0 5.17.0-rc0 vanilla sched-numaimb-v5 Min Score-16 368239.36 ( 0.00%) 389816.06 ( 5.86%) Hmean Score-16 388607.33 ( 0.00%) 427877.08 * 10.11%* Max Score-16 408945.69 ( 0.00%) 481022.17 ( 17.62%) Stddev Score-16 15247.04 ( 0.00%) 24966.82 ( -63.75%) CoeffVar Score-16 3.92 ( 0.00%) 5.82 ( -48.48%) It can also make a big difference for semi-realistic workloads like specjbb which can execute arbitrary numbers of threads without advance knowledge of how they should be placed. Even in cases where the average performance is neutral, the results are more stable. 5.17.0-rc0 5.17.0-rc0 vanilla sched-numaimb-v6 Hmean tput-1 71631.55 ( 0.00%) 73065.57 ( 2.00%) Hmean tput-8 582758.78 ( 0.00%) 556777.23 ( -4.46%) Hmean tput-16 1020372.75 ( 0.00%) 1009995.26 ( -1.02%) Hmean tput-24 1416430.67 ( 0.00%) 1398700.11 ( -1.25%) Hmean tput-32 1687702.72 ( 0.00%) 1671357.04 ( -0.97%) Hmean tput-40 1798094.90 ( 0.00%) 2015616.46 * 12.10%* Hmean tput-48 1972731.77 ( 0.00%) 2333233.72 ( 18.27%) Hmean tput-56 2386872.38 ( 0.00%) 2759483.38 ( 15.61%) Hmean tput-64 2909475.33 ( 0.00%) 2925074.69 ( 0.54%) Hmean tput-72 2585071.36 ( 0.00%) 2962443.97 ( 14.60%) Hmean tput-80 2994387.24 ( 0.00%) 3015980.59 ( 0.72%) Hmean tput-88 3061408.57 ( 0.00%) 3010296.16 ( -1.67%) Hmean tput-96 3052394.82 ( 0.00%) 2784743.41 ( -8.77%) Hmean tput-104 2997814.76 ( 0.00%) 2758184.50 ( -7.99%) Hmean tput-112 2955353.29 ( 0.00%) 2859705.09 ( -3.24%) Hmean tput-120 2889770.71 ( 0.00%) 2764478.46 ( -4.34%) Hmean tput-128 2871713.84 ( 0.00%) 2750136.73 ( -4.23%) Stddev tput-1 5325.93 ( 0.00%) 2002.53 ( 62.40%) Stddev tput-8 6630.54 ( 0.00%) 10905.00 ( -64.47%) Stddev tput-16 25608.58 ( 0.00%) 6851.16 ( 73.25%) Stddev tput-24 12117.69 ( 0.00%) 4227.79 ( 65.11%) Stddev tput-32 27577.16 ( 0.00%) 8761.05 ( 68.23%) Stddev tput-40 59505.86 ( 0.00%) 2048.49 ( 96.56%) Stddev tput-48 168330.30 ( 0.00%) 93058.08 ( 44.72%) Stddev tput-56 219540.39 ( 0.00%) 30687.02 ( 86.02%) Stddev tput-64 121750.35 ( 0.00%) 9617.36 ( 92.10%) Stddev tput-72 223387.05 ( 0.00%) 34081.13 ( 84.74%) Stddev tput-80 128198.46 ( 0.00%) 22565.19 ( 82.40%) Stddev tput-88 136665.36 ( 0.00%) 27905.97 ( 79.58%) Stddev tput-96 111925.81 ( 0.00%) 99615.79 ( 11.00%) Stddev tput-104 146455.96 ( 0.00%) 28861.98 ( 80.29%) Stddev tput-112 88740.49 ( 0.00%) 58288.23 ( 34.32%) Stddev tput-120 186384.86 ( 0.00%) 45812.03 ( 75.42%) Stddev tput-128 78761.09 ( 0.00%) 57418.48 ( 27.10%) Similarly, for embarassingly parallel problems like NPB-ep, there are improvements due to better spreading across LLC when the machine is not fully utilised. vanilla sched-numaimb-v6 Min ep.D 31.79 ( 0.00%) 26.11 ( 17.87%) Amean ep.D 31.86 ( 0.00%) 26.17 * 17.86%* Stddev ep.D 0.07 ( 0.00%) 0.05 ( 24.41%) CoeffVar ep.D 0.22 ( 0.00%) 0.20 ( 7.97%) Max ep.D 31.93 ( 0.00%) 26.21 ( 17.91%) Signed-off-by: Mel Gorman Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Gautham R. Shenoy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20220208094334.16379-3-mgorman@techsingularity.net --- include/linux/sched/topology.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 8054641c0a7b..56cffe42abbc 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -93,6 +93,7 @@ struct sched_domain { unsigned int busy_factor; /* less balancing by factor if busy */ unsigned int imbalance_pct; /* No balance until over watermark */ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ + unsigned int imb_numa_nr; /* Nr running tasks that allows a NUMA imbalance */ int nohz_idle; /* NOHZ IDLE status */ int flags; /* See SD_* */ -- cgit v1.2.3 From 0764db9b49c932b89ee4d9e3236dff4bb07b4a66 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 11 Feb 2022 16:32:32 -0800 Subject: mm: memcg: synchronize objcg lists with a dedicated spinlock Alexander reported a circular lock dependency revealed by the mmap1 ltp test: LOCKDEP_CIRCULAR (suite: ltp, case: mtest06 (mmap1)) WARNING: possible circular locking dependency detected 5.17.0-20220113.rc0.git0.f2211f194038.300.fc35.s390x+debug #1 Not tainted ------------------------------------------------------ mmap1/202299 is trying to acquire lock: 00000001892c0188 (css_set_lock){..-.}-{2:2}, at: obj_cgroup_release+0x4a/0xe0 but task is already holding lock: 00000000ca3b3818 (&sighand->siglock){-.-.}-{2:2}, at: force_sig_info_to_task+0x38/0x180 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&sighand->siglock){-.-.}-{2:2}: __lock_acquire+0x604/0xbd8 lock_acquire.part.0+0xe2/0x238 lock_acquire+0xb0/0x200 _raw_spin_lock_irqsave+0x6a/0xd8 __lock_task_sighand+0x90/0x190 cgroup_freeze_task+0x2e/0x90 cgroup_migrate_execute+0x11c/0x608 cgroup_update_dfl_csses+0x246/0x270 cgroup_subtree_control_write+0x238/0x518 kernfs_fop_write_iter+0x13e/0x1e0 new_sync_write+0x100/0x190 vfs_write+0x22c/0x2d8 ksys_write+0x6c/0xf8 __do_syscall+0x1da/0x208 system_call+0x82/0xb0 -> #0 (css_set_lock){..-.}-{2:2}: check_prev_add+0xe0/0xed8 validate_chain+0x736/0xb20 __lock_acquire+0x604/0xbd8 lock_acquire.part.0+0xe2/0x238 lock_acquire+0xb0/0x200 _raw_spin_lock_irqsave+0x6a/0xd8 obj_cgroup_release+0x4a/0xe0 percpu_ref_put_many.constprop.0+0x150/0x168 drain_obj_stock+0x94/0xe8 refill_obj_stock+0x94/0x278 obj_cgroup_charge+0x164/0x1d8 kmem_cache_alloc+0xac/0x528 __sigqueue_alloc+0x150/0x308 __send_signal+0x260/0x550 send_signal+0x7e/0x348 force_sig_info_to_task+0x104/0x180 force_sig_fault+0x48/0x58 __do_pgm_check+0x120/0x1f0 pgm_check_handler+0x11e/0x180 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&sighand->siglock); lock(css_set_lock); lock(&sighand->siglock); lock(css_set_lock); *** DEADLOCK *** 2 locks held by mmap1/202299: #0: 00000000ca3b3818 (&sighand->siglock){-.-.}-{2:2}, at: force_sig_info_to_task+0x38/0x180 #1: 00000001892ad560 (rcu_read_lock){....}-{1:2}, at: percpu_ref_put_many.constprop.0+0x0/0x168 stack backtrace: CPU: 15 PID: 202299 Comm: mmap1 Not tainted 5.17.0-20220113.rc0.git0.f2211f194038.300.fc35.s390x+debug #1 Hardware name: IBM 3906 M04 704 (LPAR) Call Trace: dump_stack_lvl+0x76/0x98 check_noncircular+0x136/0x158 check_prev_add+0xe0/0xed8 validate_chain+0x736/0xb20 __lock_acquire+0x604/0xbd8 lock_acquire.part.0+0xe2/0x238 lock_acquire+0xb0/0x200 _raw_spin_lock_irqsave+0x6a/0xd8 obj_cgroup_release+0x4a/0xe0 percpu_ref_put_many.constprop.0+0x150/0x168 drain_obj_stock+0x94/0xe8 refill_obj_stock+0x94/0x278 obj_cgroup_charge+0x164/0x1d8 kmem_cache_alloc+0xac/0x528 __sigqueue_alloc+0x150/0x308 __send_signal+0x260/0x550 send_signal+0x7e/0x348 force_sig_info_to_task+0x104/0x180 force_sig_fault+0x48/0x58 __do_pgm_check+0x120/0x1f0 pgm_check_handler+0x11e/0x180 INFO: lockdep is turned off. In this example a slab allocation from __send_signal() caused a refilling and draining of a percpu objcg stock, resulted in a releasing of another non-related objcg. Objcg release path requires taking the css_set_lock, which is used to synchronize objcg lists. This can create a circular dependency with the sighandler lock, which is taken with the locked css_set_lock by the freezer code (to freeze a task). In general it seems that using css_set_lock to synchronize objcg lists makes any slab allocations and deallocation with the locked css_set_lock and any intervened locks risky. To fix the problem and make the code more robust let's stop using css_set_lock to synchronize objcg lists and use a new dedicated spinlock instead. Link: https://lkml.kernel.org/r/Yfm1IHmoGdyUR81T@carbon.dhcp.thefacebook.com Fixes: bf4f059954dc ("mm: memcg/slab: obj_cgroup API") Signed-off-by: Roman Gushchin Reported-by: Alexander Egorenkov Tested-by: Alexander Egorenkov Reviewed-by: Waiman Long Acked-by: Tejun Heo Reviewed-by: Shakeel Butt Reviewed-by: Jeremy Linton Tested-by: Jeremy Linton Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b72d75141e12..0abbd685703b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -219,7 +219,7 @@ struct obj_cgroup { struct mem_cgroup *memcg; atomic_t nr_charged_bytes; union { - struct list_head list; + struct list_head list; /* protected by objcg_lock */ struct rcu_head rcu; }; }; @@ -315,7 +315,8 @@ struct mem_cgroup { #ifdef CONFIG_MEMCG_KMEM int kmemcg_id; struct obj_cgroup __rcu *objcg; - struct list_head objcg_list; /* list of inherited objcgs */ + /* list of inherited objcgs, protected by objcg_lock */ + struct list_head objcg_list; #endif MEMCG_PADDING(_pad2_); -- cgit v1.2.3 From 8913c61001482378d4ed8cc577b17c1ba3e847e4 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Fri, 11 Feb 2022 16:32:35 -0800 Subject: kfence: make test case compatible with run time set sample interval The parameter kfence_sample_interval can be set via boot parameter and late shell command, which is convenient for automated tests and KFENCE parameter optimization. However, KFENCE test case just uses compile-time CONFIG_KFENCE_SAMPLE_INTERVAL, which will make KFENCE test case not run as users desired. Export kfence_sample_interval, so that KFENCE test case can use run-time-set sample interval. Link: https://lkml.kernel.org/r/20220207034432.185532-1-liupeng256@huawei.com Signed-off-by: Peng Liu Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Sumit Semwal Cc: Christian Knig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfence.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kfence.h b/include/linux/kfence.h index 4b5e3679a72c..f49e64222628 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -17,6 +17,8 @@ #include #include +extern unsigned long kfence_sample_interval; + /* * We allocate an even number of pages, as it simplifies calculations to map * address to metadata indices; effectively, the very first page serves as an -- cgit v1.2.3 From 6d240170811aad7330e6d0b3857fb0d4d9c82b56 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 7 Feb 2022 10:05:40 +0800 Subject: firmware: imx: add get resource owner api Add resource owner management API, this API could be used to check whether M4 is under control of Linux. Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- include/linux/firmware/imx/svc/rm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/imx/svc/rm.h b/include/linux/firmware/imx/svc/rm.h index 456b6a59d29b..31456f897aa9 100644 --- a/include/linux/firmware/imx/svc/rm.h +++ b/include/linux/firmware/imx/svc/rm.h @@ -59,11 +59,16 @@ enum imx_sc_rm_func { #if IS_ENABLED(CONFIG_IMX_SCU) bool imx_sc_rm_is_resource_owned(struct imx_sc_ipc *ipc, u16 resource); +int imx_sc_rm_get_resource_owner(struct imx_sc_ipc *ipc, u16 resource, u8 *pt); #else static inline bool imx_sc_rm_is_resource_owned(struct imx_sc_ipc *ipc, u16 resource) { return true; } +static inline int imx_sc_rm_get_resource_owner(struct imx_sc_ipc *ipc, u16 resource, u8 *pt) +{ + return -EOPNOTSUPP; +} #endif #endif -- cgit v1.2.3 From 3e96dcfb96e80d2f7f1edb6a1ac81b12de996fa8 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 11 Feb 2022 23:32:27 +0100 Subject: ARM: ixp4xx: Delete the Goramo MLR boardfile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This board is replaced with the corresponding device tree. Also delete dangling platform data file only used by this boardfile and nothing else. Cc: Krzysztof Hałasa Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220211223238.648934-3-linus.walleij@linaro.org Signed-off-by: Linus Walleij --- include/linux/platform_data/wan_ixp4xx_hss.h | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 include/linux/platform_data/wan_ixp4xx_hss.h (limited to 'include') diff --git a/include/linux/platform_data/wan_ixp4xx_hss.h b/include/linux/platform_data/wan_ixp4xx_hss.h deleted file mode 100644 index d525a0feb9e1..000000000000 --- a/include/linux/platform_data/wan_ixp4xx_hss.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PLATFORM_DATA_WAN_IXP4XX_HSS_H -#define __PLATFORM_DATA_WAN_IXP4XX_HSS_H - -#include - -/* Information about built-in HSS (synchronous serial) interfaces */ -struct hss_plat_info { - int (*set_clock)(int port, unsigned int clock_type); - int (*open)(int port, void *pdev, - void (*set_carrier_cb)(void *pdev, int carrier)); - void (*close)(int port, void *pdev); - u8 txreadyq; - u32 timer_freq; -}; - -#endif -- cgit v1.2.3 From b50113cbdd1340c31e85e4cfc5f5e81ce9cbb2aa Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 11 Feb 2022 23:32:31 +0100 Subject: soc: ixp4xx: Add features from regmap helper If we want to read the CFG2 register on the expansion bus and apply the inversion and check for some hardcoded versions this helper comes in handy. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220211223238.648934-7-linus.walleij@linaro.org Signed-off-by: Linus Walleij --- include/linux/soc/ixp4xx/cpu.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/soc/ixp4xx/cpu.h b/include/linux/soc/ixp4xx/cpu.h index 88bd8de0e803..48c2e241ac83 100644 --- a/include/linux/soc/ixp4xx/cpu.h +++ b/include/linux/soc/ixp4xx/cpu.h @@ -9,6 +9,7 @@ #define __SOC_IXP4XX_CPU_H__ #include +#include #ifdef CONFIG_ARM #include #endif @@ -23,6 +24,9 @@ #define IXP46X_PROCESSOR_ID_VALUE 0x69054200 /* including IXP455 */ #define IXP46X_PROCESSOR_ID_MASK 0xfffffff0 +/* Feature register in the expansion bus controller */ +#define IXP4XX_EXP_CNFG2 0x2c + /* "fuse" bits of IXP_EXP_CFG2 */ /* All IXP4xx CPUs */ #define IXP4XX_FEATURE_RCOMP (1 << 0) @@ -89,6 +93,22 @@ u32 ixp4xx_read_feature_bits(void); void ixp4xx_write_feature_bits(u32 value); +static inline u32 cpu_ixp4xx_features(struct regmap *rmap) +{ + u32 val; + + regmap_read(rmap, IXP4XX_EXP_CNFG2, &val); + /* For some reason this register is inverted */ + val = ~val; + if (cpu_is_ixp42x_rev_a0()) + return IXP42X_FEATURE_MASK & ~(IXP4XX_FEATURE_RCOMP | + IXP4XX_FEATURE_AES); + if (cpu_is_ixp42x()) + return val & IXP42X_FEATURE_MASK; + if (cpu_is_ixp43x()) + return val & IXP43X_FEATURE_MASK; + return val & IXP46X_FEATURE_MASK; +} #else #define cpu_is_ixp42x_rev_a0() 0 #define cpu_is_ixp42x() 0 @@ -101,6 +121,10 @@ static inline u32 ixp4xx_read_feature_bits(void) static inline void ixp4xx_write_feature_bits(u32 value) { } +static inline u32 cpu_ixp4xx_features(struct regmap *rmap) +{ + return 0; +} #endif #endif /* _ASM_ARCH_CPU_H */ -- cgit v1.2.3 From 8754a7e61c766fbc533c627b56ff181550dca00e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 11 Feb 2022 23:32:32 +0100 Subject: soc: ixp4xx-npe: Access syscon regs using regmap If we access the syscon (expansion bus config registers) using the syscon regmap instead of relying on direct accessor functions, we do not need to call this static code in the machine (arch/arm/mach-ixp4xx/common.c) which makes things less dependent on custom machine-dependent code. Look up the syscon regmap and handle the error: this will make deferred probe work with relation to the syscon. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220211223238.648934-8-linus.walleij@linaro.org Signed-off-by: Linus Walleij --- include/linux/soc/ixp4xx/npe.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/soc/ixp4xx/npe.h b/include/linux/soc/ixp4xx/npe.h index 2a91f465d456..9efeac777da1 100644 --- a/include/linux/soc/ixp4xx/npe.h +++ b/include/linux/soc/ixp4xx/npe.h @@ -3,6 +3,7 @@ #define __IXP4XX_NPE_H #include +#include extern const char *npe_names[]; @@ -17,6 +18,7 @@ struct npe_regs { struct npe { struct npe_regs __iomem *regs; + struct regmap *rmap; int id; int valid; }; -- cgit v1.2.3 From c8200f4e7267545a384fb86a4630f76958ab9df6 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 11 Feb 2022 23:32:33 +0100 Subject: net: ixp4xx_eth: Drop platform data support All IXP4xx platforms are converted to device tree, the platform data path is no longer used. Drop the code and custom include, confine the driver in its own file. Depend on OF and remove ifdefs around this, as we are all probing from OF now. Cc: David S. Miller Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Signed-off-by: Linus Walleij Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20220211223238.648934-9-linus.walleij@linaro.org Signed-off-by: Linus Walleij --- include/linux/platform_data/eth_ixp4xx.h | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 include/linux/platform_data/eth_ixp4xx.h (limited to 'include') diff --git a/include/linux/platform_data/eth_ixp4xx.h b/include/linux/platform_data/eth_ixp4xx.h deleted file mode 100644 index 114b0940729f..000000000000 --- a/include/linux/platform_data/eth_ixp4xx.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PLATFORM_DATA_ETH_IXP4XX -#define __PLATFORM_DATA_ETH_IXP4XX - -#include - -#define IXP4XX_ETH_NPEA 0x00 -#define IXP4XX_ETH_NPEB 0x10 -#define IXP4XX_ETH_NPEC 0x20 - -/* Information about built-in Ethernet MAC interfaces */ -struct eth_plat_info { - u8 phy; /* MII PHY ID, 0 - 31 */ - u8 rxq; /* configurable, currently 0 - 31 only */ - u8 txreadyq; - u8 hwaddr[6]; - u8 npe; /* NPE instance used by this interface */ - bool has_mdio; /* If this instance has an MDIO bus */ -}; - -#endif -- cgit v1.2.3 From 3059dfa52c07a9b6d770e87dc21b68f4295239c5 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 11 Feb 2022 23:32:35 +0100 Subject: ARM: ixp4xx: Remove feature bit accessors We switched users of the accessors over to using syscon to inspect the bits, or removed the need for checking them. Delete these accessors. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220211223238.648934-11-linus.walleij@linaro.org Signed-off-by: Linus Walleij --- include/linux/soc/ixp4xx/cpu.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/soc/ixp4xx/cpu.h b/include/linux/soc/ixp4xx/cpu.h index 48c2e241ac83..f526ac33afea 100644 --- a/include/linux/soc/ixp4xx/cpu.h +++ b/include/linux/soc/ixp4xx/cpu.h @@ -90,9 +90,6 @@ IXP43X_PROCESSOR_ID_VALUE) #define cpu_is_ixp46x() ((read_cpuid_id() & IXP46X_PROCESSOR_ID_MASK) == \ IXP46X_PROCESSOR_ID_VALUE) - -u32 ixp4xx_read_feature_bits(void); -void ixp4xx_write_feature_bits(u32 value); static inline u32 cpu_ixp4xx_features(struct regmap *rmap) { u32 val; @@ -114,13 +111,6 @@ static inline u32 cpu_ixp4xx_features(struct regmap *rmap) #define cpu_is_ixp42x() 0 #define cpu_is_ixp43x() 0 #define cpu_is_ixp46x() 0 -static inline u32 ixp4xx_read_feature_bits(void) -{ - return 0; -} -static inline void ixp4xx_write_feature_bits(u32 value) -{ -} static inline u32 cpu_ixp4xx_features(struct regmap *rmap) { return 0; -- cgit v1.2.3 From f5c54f77b07b278cfde4a654e111c39996ac8b5b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 4 Feb 2022 09:30:13 +0100 Subject: cpumask: Add a x86-specific cpumask_clear_cpu() helper Add a x86-specific cpumask_clear_cpu() helper which will be used in places where the explicit KASAN-instrumentation in the *_bit() helpers is unwanted. Also, always inline two more cpumask generic helpers. allyesconfig: text data bss dec hex filename 190553143 159425889 32076404 382055436 16c5b40c vmlinux.before 190551812 159424945 32076404 382053161 16c5ab29 vmlinux.after Signed-off-by: Borislav Petkov Acked-by: Marco Elver Link: https://lore.kernel.org/r/20220204083015.17317-2-bp@alien8.de --- include/linux/cpumask.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 6b06c698cd2a..fe29ac7cc469 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -102,7 +102,7 @@ extern atomic_t __num_online_cpus; extern cpumask_t cpus_booted_once_mask; -static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) +static __always_inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) { #ifdef CONFIG_DEBUG_PER_CPU_MAPS WARN_ON_ONCE(cpu >= bits); @@ -110,7 +110,7 @@ static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) } /* verify cpu argument to cpumask_* operators */ -static inline unsigned int cpumask_check(unsigned int cpu) +static __always_inline unsigned int cpumask_check(unsigned int cpu) { cpu_max_bits_warn(cpu, nr_cpumask_bits); return cpu; -- cgit v1.2.3 From 39d01d9c4522c4ac9e0f041eaaf7cee670a6a574 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 7 Feb 2022 20:25:40 +0100 Subject: dt-bindings: power: add defines for i.MX8MP power domain This adds the DT defines for the GPC power domains found on the i.MX8MP SoC. Signed-off-by: Lucas Stach Acked-by: Rob Herring Signed-off-by: Shawn Guo --- include/dt-bindings/power/imx8mp-power.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 include/dt-bindings/power/imx8mp-power.h (limited to 'include') diff --git a/include/dt-bindings/power/imx8mp-power.h b/include/dt-bindings/power/imx8mp-power.h new file mode 100644 index 000000000000..7c67689e4faf --- /dev/null +++ b/include/dt-bindings/power/imx8mp-power.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* + * Copyright (C) 2020 Pengutronix, Sascha Hauer + */ + +#ifndef __DT_BINDINGS_IMX8MP_POWER_DOMAIN_POWER_H__ +#define __DT_BINDINGS_IMX8MP_POWER_DOMAIN_POWER_H__ + +#define IMX8MP_POWER_DOMAIN_MIPI_PHY1 0 +#define IMX8MP_POWER_DOMAIN_PCIE_PHY 1 +#define IMX8MP_POWER_DOMAIN_USB1_PHY 2 +#define IMX8MP_POWER_DOMAIN_USB2_PHY 3 +#define IMX8MP_POWER_DOMAIN_MLMIX 4 +#define IMX8MP_POWER_DOMAIN_AUDIOMIX 5 +#define IMX8MP_POWER_DOMAIN_GPU2D 6 +#define IMX8MP_POWER_DOMAIN_GPUMIX 7 +#define IMX8MP_POWER_DOMAIN_VPUMIX 8 +#define IMX8MP_POWER_DOMAIN_GPU3D 9 +#define IMX8MP_POWER_DOMAIN_MEDIAMIX 10 +#define IMX8MP_POWER_DOMAIN_VPU_G1 11 +#define IMX8MP_POWER_DOMAIN_VPU_G2 12 +#define IMX8MP_POWER_DOMAIN_VPU_VC8000E 13 +#define IMX8MP_POWER_DOMAIN_HDMIMIX 14 +#define IMX8MP_POWER_DOMAIN_HDMI_PHY 15 +#define IMX8MP_POWER_DOMAIN_MIPI_PHY2 16 +#define IMX8MP_POWER_DOMAIN_HSIOMIX 17 +#define IMX8MP_POWER_DOMAIN_MEDIAMIX_ISPDWP 18 + +#endif -- cgit v1.2.3 From 38294f6158b8f8af8afa82f98ca9db3a8021f020 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 7 Feb 2022 20:25:42 +0100 Subject: dt-bindings: power: imx8mp: add defines for HSIO blk-ctrl domains This adds the defines for the power domains provided by the HSIO blk-ctrl on the i.MX8MP. Signed-off-by: Lucas Stach Acked-by: Rob Herring Signed-off-by: Shawn Guo --- include/dt-bindings/power/imx8mp-power.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/power/imx8mp-power.h b/include/dt-bindings/power/imx8mp-power.h index 7c67689e4faf..9f90c40a2c6c 100644 --- a/include/dt-bindings/power/imx8mp-power.h +++ b/include/dt-bindings/power/imx8mp-power.h @@ -26,4 +26,10 @@ #define IMX8MP_POWER_DOMAIN_HSIOMIX 17 #define IMX8MP_POWER_DOMAIN_MEDIAMIX_ISPDWP 18 +#define IMX8MP_HSIOBLK_PD_USB 0 +#define IMX8MP_HSIOBLK_PD_USB_PHY1 1 +#define IMX8MP_HSIOBLK_PD_USB_PHY2 2 +#define IMX8MP_HSIOBLK_PD_PCIE 3 +#define IMX8MP_HSIOBLK_PD_PCIE_PHY 4 + #endif -- cgit v1.2.3 From 2618a0dae09ef37728dab89ff60418cbe25ae6bd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Feb 2022 09:14:49 -0800 Subject: etherdevice: Adjust ether_addr* prototypes to silence -Wstringop-overead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With GCC 12, -Wstringop-overread was warning about an implicit cast from char[6] to char[8]. However, the extra 2 bytes are always thrown away, alignment doesn't matter, and the risk of hitting the edge of unallocated memory has been accepted, so this prototype can just be converted to a regular char *. Silences: net/core/dev.c: In function ‘bpf_prog_run_generic_xdp’: net/core/dev.c:4618:21: warning: ‘ether_addr_equal_64bits’ reading 8 bytes from a region of size 6 [-Wstringop-overread] 4618 | orig_host = ether_addr_equal_64bits(eth->h_dest, > skb->dev->dev_addr); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/core/dev.c:4618:21: note: referencing argument 1 of type ‘const u8[8]’ {aka ‘const unsigned char[8]’} net/core/dev.c:4618:21: note: referencing argument 2 of type ‘const u8[8]’ {aka ‘const unsigned char[8]’} In file included from net/core/dev.c:91: include/linux/etherdevice.h:375:20: note: in a call to function ‘ether_addr_equal_64bits’ 375 | static inline bool ether_addr_equal_64bits(const u8 addr1[6+2], | ^~~~~~~~~~~~~~~~~~~~~~~ Reported-by: Marc Kleine-Budde Tested-by: Marc Kleine-Budde Link: https://lore.kernel.org/netdev/20220212090811.uuzk6d76agw2vv73@pengutronix.de Cc: Jakub Kicinski Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 2ad71cc90b37..92b10e67d5f8 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -134,7 +134,7 @@ static inline bool is_multicast_ether_addr(const u8 *addr) #endif } -static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2]) +static inline bool is_multicast_ether_addr_64bits(const u8 *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 #ifdef __BIG_ENDIAN @@ -372,8 +372,7 @@ static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2) * Please note that alignment of addr1 & addr2 are only guaranteed to be 16 bits. */ -static inline bool ether_addr_equal_64bits(const u8 addr1[6+2], - const u8 addr2[6+2]) +static inline bool ether_addr_equal_64bits(const u8 *addr1, const u8 *addr2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 u64 fold = (*(const u64 *)addr1) ^ (*(const u64 *)addr2); -- cgit v1.2.3 From 845301001308aab8fb7902548f6c3256d28b8c48 Mon Sep 17 00:00:00 2001 From: Daisuke Nojiri Date: Wed, 26 Jan 2022 10:04:10 -0800 Subject: power: supply: PCHG: Use MKBP for device event handling This change makes the PCHG driver receive device events through MKBP protocol since CrOS EC switched to deliver all peripheral charge events to the MKBP protocol. This will unify PCHG event handling on X86 and ARM. Signed-off-by: Daisuke Nojiri Signed-off-by: Sebastian Reichel --- include/linux/platform_data/cros_ec_commands.h | 64 ++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 271bd87bff0a..95e7e5667291 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -3386,6 +3386,9 @@ enum ec_mkbp_event { /* Send an incoming CEC message to the AP */ EC_MKBP_EVENT_CEC_MESSAGE = 9, + /* Peripheral device charger event */ + EC_MKBP_EVENT_PCHG = 12, + /* Number of MKBP events */ EC_MKBP_EVENT_COUNT, }; @@ -5527,6 +5530,67 @@ enum pchg_state { [PCHG_STATE_CONNECTED] = "CONNECTED", \ } +/* + * Update firmware of peripheral chip + */ +#define EC_CMD_PCHG_UPDATE 0x0136 + +/* Port number is encoded in bit[28:31]. */ +#define EC_MKBP_PCHG_PORT_SHIFT 28 +/* Utility macro for converting MKBP event to port number. */ +#define EC_MKBP_PCHG_EVENT_TO_PORT(e) (((e) >> EC_MKBP_PCHG_PORT_SHIFT) & 0xf) +/* Utility macro for extracting event bits. */ +#define EC_MKBP_PCHG_EVENT_MASK(e) ((e) \ + & GENMASK(EC_MKBP_PCHG_PORT_SHIFT-1, 0)) + +#define EC_MKBP_PCHG_UPDATE_OPENED BIT(0) +#define EC_MKBP_PCHG_WRITE_COMPLETE BIT(1) +#define EC_MKBP_PCHG_UPDATE_CLOSED BIT(2) +#define EC_MKBP_PCHG_UPDATE_ERROR BIT(3) +#define EC_MKBP_PCHG_DEVICE_EVENT BIT(4) + +enum ec_pchg_update_cmd { + /* Reset chip to normal mode. */ + EC_PCHG_UPDATE_CMD_RESET_TO_NORMAL = 0, + /* Reset and put a chip in update (a.k.a. download) mode. */ + EC_PCHG_UPDATE_CMD_OPEN, + /* Write a block of data containing FW image. */ + EC_PCHG_UPDATE_CMD_WRITE, + /* Close update session. */ + EC_PCHG_UPDATE_CMD_CLOSE, + /* End of commands */ + EC_PCHG_UPDATE_CMD_COUNT, +}; + +struct ec_params_pchg_update { + /* PCHG port number */ + uint8_t port; + /* enum ec_pchg_update_cmd */ + uint8_t cmd; + /* Padding */ + uint8_t reserved0; + uint8_t reserved1; + /* Version of new firmware */ + uint32_t version; + /* CRC32 of new firmware */ + uint32_t crc32; + /* Address in chip memory where is written to */ + uint32_t addr; + /* Size of */ + uint32_t size; + /* Partial data of new firmware */ + uint8_t data[]; +} __ec_align4; + +BUILD_ASSERT(EC_PCHG_UPDATE_CMD_COUNT + < BIT(sizeof(((struct ec_params_pchg_update *)0)->cmd)*8)); + +struct ec_response_pchg_update { + /* Block size */ + uint32_t block_size; +} __ec_align4; + + /*****************************************************************************/ /* Voltage regulator controls */ -- cgit v1.2.3 From f68f2ff91512c199ec24883001245912afc17873 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 20 Apr 2021 23:22:52 -0700 Subject: fortify: Detect struct member overflows in memcpy() at compile-time memcpy() is dead; long live memcpy() tl;dr: In order to eliminate a large class of common buffer overflow flaws that continue to persist in the kernel, have memcpy() (under CONFIG_FORTIFY_SOURCE) perform bounds checking of the destination struct member when they have a known size. This would have caught all of the memcpy()-related buffer write overflow flaws identified in at least the last three years. Background and analysis: While stack-based buffer overflow flaws are largely mitigated by stack canaries (and similar) features, heap-based buffer overflow flaws continue to regularly appear in the kernel. Many classes of heap buffer overflows are mitigated by FORTIFY_SOURCE when using the strcpy() family of functions, but a significant number remain exposed through the memcpy() family of functions. At its core, FORTIFY_SOURCE uses the compiler's __builtin_object_size() internal[0] to determine the available size at a target address based on the compile-time known structure layout details. It operates in two modes: outer bounds (0) and inner bounds (1). In mode 0, the size of the enclosing structure is used. In mode 1, the size of the specific field is used. For example: struct object { u16 scalar1; /* 2 bytes */ char array[6]; /* 6 bytes */ u64 scalar2; /* 8 bytes */ u32 scalar3; /* 4 bytes */ u32 scalar4; /* 4 bytes */ } instance; __builtin_object_size(instance.array, 0) == 22, since the remaining size of the enclosing structure starting from "array" is 22 bytes (6 + 8 + 4 + 4). __builtin_object_size(instance.array, 1) == 6, since the remaining size of the specific field "array" is 6 bytes. The initial implementation of FORTIFY_SOURCE used mode 0 because there were many cases of both strcpy() and memcpy() functions being used to write (or read) across multiple fields in a structure. For example, it would catch this, which is writing 2 bytes beyond the end of "instance": memcpy(&instance.array, data, 25); While this didn't protect against overwriting adjacent fields in a given structure, it would at least stop overflows from reaching beyond the end of the structure into neighboring memory, and provided a meaningful mitigation of a subset of buffer overflow flaws. However, many desirable targets remain within the enclosing structure (for example function pointers). As it happened, there were very few cases of strcpy() family functions intentionally writing beyond the end of a string buffer. Once all known cases were removed from the kernel, the strcpy() family was tightened[1] to use mode 1, providing greater mitigation coverage. What remains is switching memcpy() to mode 1 as well, but making the switch is much more difficult because of how frustrating it can be to find existing "normal" uses of memcpy() that expect to write (or read) across multiple fields. The root cause of the problem is that the C language lacks a common pattern to indicate the intent of an author's use of memcpy(), and is further complicated by the available compile-time and run-time mitigation behaviors. The FORTIFY_SOURCE mitigation comes in two halves: the compile-time half, when both the buffer size _and_ the length of the copy is known, and the run-time half, when only the buffer size is known. If neither size is known, there is no bounds checking possible. At compile-time when the compiler sees that a length will always exceed a known buffer size, a warning can be deterministically emitted. For the run-time half, the length is tested against the known size of the buffer, and the overflowing operation is detected. (The performance overhead for these tests is virtually zero.) It is relatively easy to find compile-time false-positives since a warning is always generated. Fixing the false positives, however, can be very time-consuming as there are hundreds of instances. While it's possible some over-read conditions could lead to kernel memory exposures, the bulk of the risk comes from the run-time flaws where the length of a write may end up being attacker-controlled and lead to an overflow. Many of the compile-time false-positives take a form similar to this: memcpy(&instance.scalar2, data, sizeof(instance.scalar2) + sizeof(instance.scalar3)); and the run-time ones are similar, but lack a constant expression for the size of the copy: memcpy(instance.array, data, length); The former is meant to cover multiple fields (though its style has been frowned upon more recently), but has been technically legal. Both lack any expressivity in the C language about the author's _intent_ in a way that a compiler can check when the length isn't known at compile time. A comment doesn't work well because what's needed is something a compiler can directly reason about. Is a given memcpy() call expected to overflow into neighbors? Is it not? By using the new struct_group() macro, this intent can be much more easily encoded. It is not as easy to find the run-time false-positives since the code path to exercise a seemingly out-of-bounds condition that is actually expected may not be trivially reachable. Tightening the restrictions to block an operation for a false positive will either potentially create a greater flaw (if a copy is truncated by the mitigation), or destabilize the kernel (e.g. with a BUG()), making things completely useless for the end user. As a result, tightening the memcpy() restriction (when there is a reasonable level of uncertainty of the number of false positives), needs to first WARN() with no truncation. (Though any sufficiently paranoid end-user can always opt to set the panic_on_warn=1 sysctl.) Once enough development time has passed, the mitigation can be further intensified. (Note that this patch is only the compile-time checking step, which is a prerequisite to doing run-time checking, which will come in future patches.) Given the potential frustrations of weeding out all the false positives when tightening the run-time checks, it is reasonable to wonder if these changes would actually add meaningful protection. Looking at just the last three years, there are 23 identified flaws with a CVE that mention "buffer overflow", and 11 are memcpy()-related buffer overflows. (For the remaining 12: 7 are array index overflows that would be mitigated by systems built with CONFIG_UBSAN_BOUNDS=y: CVE-2019-0145, CVE-2019-14835, CVE-2019-14896, CVE-2019-14897, CVE-2019-14901, CVE-2019-17666, CVE-2021-28952. 2 are miscalculated allocation sizes which could be mitigated with memory tagging: CVE-2019-16746, CVE-2019-2181. 1 is an iovec buffer bug maybe mitigated by memory tagging: CVE-2020-10742. 1 is a type confusion bug mitigated by stack canaries: CVE-2020-10942. 1 is a string handling logic bug with no mitigation I'm aware of: CVE-2021-28972.) At my last count on an x86_64 allmodconfig build, there are 35,294 calls to memcpy(). With callers instrumented to report all places where the buffer size is known but the length remains unknown (i.e. a run-time bounds check is added), we can count how many new run-time bounds checks are added when the destination and source arguments of memcpy() are changed to use "mode 1" bounds checking: 1,276. This means for the future run-time checking, there is a worst-case upper bounds of 3.6% false positives to fix. In addition, there were around 150 new compile-time warnings to evaluate and fix (which have now been fixed). With this instrumentation it's also possible to compare the places where the known 11 memcpy() flaw overflows manifested against the resulting list of potential new run-time bounds checks, as a measure of potential efficacy of the tightened mitigation. Much to my surprise, horror, and delight, all 11 flaws would have been detected by the newly added run-time bounds checks, making this a distinctly clear mitigation improvement: 100% coverage for known memcpy() flaws, with a possible 2 orders of magnitude gain in coverage over existing but undiscovered run-time dynamic length flaws (i.e. 1265 newly covered sites in addition to the 11 known), against only <4% of all memcpy() callers maybe gaining a false positive run-time check, with only about 150 new compile-time instances needing evaluation. Specifically these would have been mitigated: CVE-2020-24490 https://git.kernel.org/linus/a2ec905d1e160a33b2e210e45ad30445ef26ce0e CVE-2020-12654 https://git.kernel.org/linus/3a9b153c5591548612c3955c9600a98150c81875 CVE-2020-12653 https://git.kernel.org/linus/b70261a288ea4d2f4ac7cd04be08a9f0f2de4f4d CVE-2019-14895 https://git.kernel.org/linus/3d94a4a8373bf5f45cf5f939e88b8354dbf2311b CVE-2019-14816 https://git.kernel.org/linus/7caac62ed598a196d6ddf8d9c121e12e082cac3a CVE-2019-14815 https://git.kernel.org/linus/7caac62ed598a196d6ddf8d9c121e12e082cac3a CVE-2019-14814 https://git.kernel.org/linus/7caac62ed598a196d6ddf8d9c121e12e082cac3a CVE-2019-10126 https://git.kernel.org/linus/69ae4f6aac1578575126319d3f55550e7e440449 CVE-2019-9500 https://git.kernel.org/linus/1b5e2423164b3670e8bc9174e4762d297990deff no-CVE-yet https://git.kernel.org/linus/130f634da1af649205f4a3dd86cbe5c126b57914 no-CVE-yet https://git.kernel.org/linus/d10a87a3535cce2b890897914f5d0d83df669c63 To accelerate the review of potential run-time false positives, it's also worth noting that it is possible to partially automate checking by examining the memcpy() buffer argument to check for the destination struct member having a neighboring array member. It is reasonable to expect that the vast majority of run-time false positives would look like the already evaluated and fixed compile-time false positives, where the most common pattern is neighboring arrays. (And, FWIW, many of the compile-time fixes were actual bugs, so it is reasonable to assume we'll have similar cases of actual bugs getting fixed for run-time checks.) Implementation: Tighten the memcpy() destination buffer size checking to use the actual ("mode 1") target buffer size as the bounds check instead of their enclosing structure's ("mode 0") size. Use a common inline for memcpy() (and memmove() in a following patch), since all the tests are the same. All new cross-field memcpy() uses must use the struct_group() macro or similar to target a specific range of fields, so that FORTIFY_SOURCE can reason about the size and safety of the copy. For now, cross-member "mode 1" _read_ detection at compile-time will be limited to W=1 builds, since it is, unfortunately, very common. As the priority is solving write overflows, read overflows will be part of a future phase (and can be fixed in parallel, for anyone wanting to look at W=1 build output). For run-time, the "mode 0" size checking and mitigation is left unchanged, with "mode 1" to be added in stages. In this patch, no new run-time checks are added. Future patches will first bounds-check writes, and only perform a WARN() for now. This way any missed run-time false positives can be flushed out over the coming several development cycles, but system builders who have tested their workloads to be WARN()-free can enable the panic_on_warn=1 sysctl to immediately gain a mitigation against this class of buffer overflows. Once that is under way, run-time bounds-checking of reads can be similarly enabled. Related classes of flaws that will remain unmitigated: - memcpy() with flexible array structures, as the compiler does not currently have visibility into the size of the trailing flexible array. These can be fixed in the future by refactoring such cases to use a new set of flexible array structure helpers to perform the common serialization/deserialization code patterns doing allocation and/or copying. - memcpy() with raw pointers (e.g. void *, char *, etc), or otherwise having their buffer size unknown at compile time, have no good mitigation beyond memory tagging (and even that would only protect against inter-object overflow, not intra-object neighboring field overflows), or refactoring. Some kind of "fat pointer" solution is likely needed to gain proper size-of-buffer awareness. (e.g. see struct membuf) - type confusion where a higher level type's allocation size does not match the resulting cast type eventually passed to a deeper memcpy() call where the compiler cannot see the true type. In theory, greater static analysis could catch these, and the use of -Warray-bounds will help find some of these. [0] https://gcc.gnu.org/onlinedocs/gcc/Object-Size-Checking.html [1] https://git.kernel.org/linus/6a39e62abbafd1d58d1722f40c7d26ef379c6a2f Signed-off-by: Kees Cook --- include/linux/fortify-string.h | 109 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 97 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index a6cd6815f249..f578d00403ad 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -8,7 +8,9 @@ void fortify_panic(const char *name) __noreturn __cold; void __read_overflow(void) __compiletime_error("detected read beyond size of object (1st parameter)"); void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)"); +void __read_overflow2_field(size_t avail, size_t wanted) __compiletime_warning("detected read beyond size of field (2nd parameter); maybe use struct_group()?"); void __write_overflow(void) __compiletime_error("detected write beyond size of object (1st parameter)"); +void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning("detected write beyond size of field (1st parameter); maybe use struct_group()?"); #define __compiletime_strlen(p) \ ({ \ @@ -209,22 +211,105 @@ __FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size) return __underlying_memset(p, c, size); } -__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size) +/* + * To make sure the compiler can enforce protection against buffer overflows, + * memcpy(), memmove(), and memset() must not be used beyond individual + * struct members. If you need to copy across multiple members, please use + * struct_group() to create a named mirror of an anonymous struct union. + * (e.g. see struct sk_buff.) Read overflow checking is currently only + * done when a write overflow is also present, or when building with W=1. + * + * Mitigation coverage matrix + * Bounds checking at: + * +-------+-------+-------+-------+ + * | Compile time | Run time | + * memcpy() argument sizes: | write | read | write | read | + * dest source length +-------+-------+-------+-------+ + * memcpy(known, known, constant) | y | y | n/a | n/a | + * memcpy(known, unknown, constant) | y | n | n/a | V | + * memcpy(known, known, dynamic) | n | n | B | B | + * memcpy(known, unknown, dynamic) | n | n | B | V | + * memcpy(unknown, known, constant) | n | y | V | n/a | + * memcpy(unknown, unknown, constant) | n | n | V | V | + * memcpy(unknown, known, dynamic) | n | n | V | B | + * memcpy(unknown, unknown, dynamic) | n | n | V | V | + * +-------+-------+-------+-------+ + * + * y = perform deterministic compile-time bounds checking + * n = cannot perform deterministic compile-time bounds checking + * n/a = no run-time bounds checking needed since compile-time deterministic + * B = can perform run-time bounds checking (currently unimplemented) + * V = vulnerable to run-time overflow (will need refactoring to solve) + * + */ +__FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size, + const size_t p_size, + const size_t q_size, + const size_t p_size_field, + const size_t q_size_field, + const char *func) { - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); - if (__builtin_constant_p(size)) { - if (p_size < size) + /* + * Length argument is a constant expression, so we + * can perform compile-time bounds checking where + * buffer sizes are known. + */ + + /* Error when size is larger than enclosing struct. */ + if (p_size > p_size_field && p_size < size) __write_overflow(); - if (q_size < size) + if (q_size > q_size_field && q_size < size) __read_overflow2(); + + /* Warn when write size argument larger than dest field. */ + if (p_size_field < size) + __write_overflow_field(p_size_field, size); + /* + * Warn for source field over-read when building with W=1 + * or when an over-write happened, so both can be fixed at + * the same time. + */ + if ((IS_ENABLED(KBUILD_EXTRA_WARN1) || p_size_field < size) && + q_size_field < size) + __read_overflow2_field(q_size_field, size); } - if (p_size < size || q_size < size) - fortify_panic(__func__); - return __underlying_memcpy(p, q, size); + /* + * At this point, length argument may not be a constant expression, + * so run-time bounds checking can be done where buffer sizes are + * known. (This is not an "else" because the above checks may only + * be compile-time warnings, and we want to still warn for run-time + * overflows.) + */ + + /* + * Always stop accesses beyond the struct that contains the + * field, when the buffer's remaining size is known. + * (The -1 test is to optimize away checks where the buffer + * lengths are unknown.) + */ + if ((p_size != (size_t)(-1) && p_size < size) || + (q_size != (size_t)(-1) && q_size < size)) + fortify_panic(func); } +#define __fortify_memcpy_chk(p, q, size, p_size, q_size, \ + p_size_field, q_size_field, op) ({ \ + size_t __fortify_size = (size_t)(size); \ + fortify_memcpy_chk(__fortify_size, p_size, q_size, \ + p_size_field, q_size_field, #op); \ + __underlying_##op(p, q, __fortify_size); \ +}) + +/* + * __builtin_object_size() must be captured here to avoid evaluating argument + * side-effects further into the macro layers. + */ +#define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ + __builtin_object_size(p, 0), __builtin_object_size(q, 0), \ + __builtin_object_size(p, 1), __builtin_object_size(q, 1), \ + memcpy) + __FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size) { size_t p_size = __builtin_object_size(p, 0); @@ -304,13 +389,14 @@ __FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp) return __real_kmemdup(p, size, gfp); } -/* defined after fortified strlen and memcpy to reuse them */ +/* Defined after fortified strlen to reuse it. */ __FORTIFY_INLINE char *strcpy(char *p, const char *q) { size_t p_size = __builtin_object_size(p, 1); size_t q_size = __builtin_object_size(q, 1); size_t size; + /* If neither buffer size is known, immediately give up. */ if (p_size == (size_t)-1 && q_size == (size_t)-1) return __underlying_strcpy(p, q); size = strlen(q) + 1; @@ -320,14 +406,13 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q) /* Run-time check for dynamic size overflow. */ if (p_size < size) fortify_panic(__func__); - memcpy(p, q, size); + __underlying_memcpy(p, q, size); return p; } /* Don't use these outside the FORITFY_SOURCE implementation */ #undef __underlying_memchr #undef __underlying_memcmp -#undef __underlying_memcpy #undef __underlying_memmove #undef __underlying_memset #undef __underlying_strcat -- cgit v1.2.3 From 938a000e3f9bead24ea753286b3e4d2423275c9e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Jun 2021 14:48:19 -0700 Subject: fortify: Detect struct member overflows in memmove() at compile-time As done for memcpy(), also update memmove() to use the same tightened compile-time checks under CONFIG_FORTIFY_SOURCE. Signed-off-by: Kees Cook --- include/linux/fortify-string.h | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index f578d00403ad..098d8a322a7a 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -309,22 +309,10 @@ __FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size, __builtin_object_size(p, 0), __builtin_object_size(q, 0), \ __builtin_object_size(p, 1), __builtin_object_size(q, 1), \ memcpy) - -__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size) -{ - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); - - if (__builtin_constant_p(size)) { - if (p_size < size) - __write_overflow(); - if (q_size < size) - __read_overflow2(); - } - if (p_size < size || q_size < size) - fortify_panic(__func__); - return __underlying_memmove(p, q, size); -} +#define memmove(p, q, s) __fortify_memcpy_chk(p, q, s, \ + __builtin_object_size(p, 0), __builtin_object_size(q, 0), \ + __builtin_object_size(p, 1), __builtin_object_size(q, 1), \ + memmove) extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan); __FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size) @@ -413,7 +401,6 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q) /* Don't use these outside the FORITFY_SOURCE implementation */ #undef __underlying_memchr #undef __underlying_memcmp -#undef __underlying_memmove #undef __underlying_memset #undef __underlying_strcat #undef __underlying_strcpy -- cgit v1.2.3 From 28e77cc1c0686621a4d416f599cee5ab369daa0a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Jun 2021 14:42:23 -0700 Subject: fortify: Detect struct member overflows in memset() at compile-time As done for memcpy(), also update memset() to use the same tightened compile-time bounds checking under CONFIG_FORTIFY_SOURCE. Signed-off-by: Kees Cook --- include/linux/fortify-string.h | 54 +++++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 098d8a322a7a..53123712bb3b 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -200,17 +200,56 @@ __FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count) return p; } -__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size) +__FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, + const size_t p_size, + const size_t p_size_field) { - size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size)) { + /* + * Length argument is a constant expression, so we + * can perform compile-time bounds checking where + * buffer sizes are known. + */ - if (__builtin_constant_p(size) && p_size < size) - __write_overflow(); - if (p_size < size) - fortify_panic(__func__); - return __underlying_memset(p, c, size); + /* Error when size is larger than enclosing struct. */ + if (p_size > p_size_field && p_size < size) + __write_overflow(); + + /* Warn when write size is larger than dest field. */ + if (p_size_field < size) + __write_overflow_field(p_size_field, size); + } + /* + * At this point, length argument may not be a constant expression, + * so run-time bounds checking can be done where buffer sizes are + * known. (This is not an "else" because the above checks may only + * be compile-time warnings, and we want to still warn for run-time + * overflows.) + */ + + /* + * Always stop accesses beyond the struct that contains the + * field, when the buffer's remaining size is known. + * (The -1 test is to optimize away checks where the buffer + * lengths are unknown.) + */ + if (p_size != (size_t)(-1) && p_size < size) + fortify_panic("memset"); } +#define __fortify_memset_chk(p, c, size, p_size, p_size_field) ({ \ + size_t __fortify_size = (size_t)(size); \ + fortify_memset_chk(__fortify_size, p_size, p_size_field), \ + __underlying_memset(p, c, __fortify_size); \ +}) + +/* + * __builtin_object_size() must be captured here to avoid evaluating argument + * side-effects further into the macro layers. + */ +#define memset(p, c, s) __fortify_memset_chk(p, c, s, \ + __builtin_object_size(p, 0), __builtin_object_size(p, 1)) + /* * To make sure the compiler can enforce protection against buffer overflows, * memcpy(), memmove(), and memset() must not be used beyond individual @@ -401,7 +440,6 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q) /* Don't use these outside the FORITFY_SOURCE implementation */ #undef __underlying_memchr #undef __underlying_memcmp -#undef __underlying_memset #undef __underlying_strcat #undef __underlying_strcpy #undef __underlying_strlen -- cgit v1.2.3 From f361143141362485b39eb40bb4910e3f4219180f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 8 Feb 2022 14:53:43 -0800 Subject: fortify: Replace open-coded __gnu_inline attribute Replace open-coded gnu_inline attribute with the normal kernel convention for attributes: __gnu_inline Signed-off-by: Kees Cook Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20220208225350.1331628-2-keescook@chromium.org --- include/linux/fortify-string.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 53123712bb3b..439aad24ab3b 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -2,7 +2,7 @@ #ifndef _LINUX_FORTIFY_STRING_H_ #define _LINUX_FORTIFY_STRING_H_ -#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline)) +#define __FORTIFY_INLINE extern __always_inline __gnu_inline #define __RENAME(x) __asm__(#x) void fortify_panic(const char *name) __noreturn __cold; -- cgit v1.2.3 From f0202b8ca48cef152d4cdf775f39be6d3b372e1e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 8 Feb 2022 14:53:44 -0800 Subject: Compiler Attributes: Add __pass_object_size for Clang In order to gain greater visibility to type information when using __builtin_object_size(), Clang has a function attribute "pass_object_size" that will make size information available for marked arguments in a function by way of implicit additional function arguments that are then wired up the __builtin_object_size(). This is needed to implement FORTIFY_SOURCE in Clang, as a workaround to Clang's __builtin_object_size() having limited visibility[1] into types across function calls (even inlines). This attribute has an additional benefit that it can be used even on non-inline functions to gain argument size information. [1] https://github.com/llvm/llvm-project/issues/53516 Cc: Nick Desaulniers Cc: Nathan Chancellor Cc: llvm@lists.linux.dev Reviewed-by: Miguel Ojeda Signed-off-by: Kees Cook Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20220208225350.1331628-3-keescook@chromium.org --- include/linux/compiler_attributes.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 37e260020221..d0c503772061 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -263,6 +263,20 @@ */ #define __packed __attribute__((__packed__)) +/* + * Note: the "type" argument should match any __builtin_object_size(p, type) usage. + * + * Optional: not supported by gcc. + * Optional: not supported by icc. + * + * clang: https://clang.llvm.org/docs/AttributeReference.html#pass-object-size-pass-dynamic-object-size + */ +#if __has_attribute(__pass_object_size__) +# define __pass_object_size(type) __attribute__((__pass_object_size__(type))) +#else +# define __pass_object_size(type) +#endif + /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-pure-function-attribute */ -- cgit v1.2.3 From d694dbaefd6fa6bdde84b704779eab53942753a5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 8 Feb 2022 14:53:45 -0800 Subject: Compiler Attributes: Add __overloadable for Clang In order for FORTIFY_SOURCE to use __pass_object_size on an "extern inline" function, as all the fortified string functions are, the functions must be marked as being overloadable (i.e. different prototypes due to the implicitly injected object size arguments). This allows the __pass_object_size versions to take precedence. Cc: Nathan Chancellor Cc: llvm@lists.linux.dev Reviewed-by: Miguel Ojeda Reviewed-by: Nick Desaulniers Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220208225350.1331628-4-keescook@chromium.org --- include/linux/compiler_attributes.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index d0c503772061..dcaf55f5d1ae 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -257,6 +257,18 @@ */ #define __noreturn __attribute__((__noreturn__)) +/* + * Optional: not supported by gcc. + * Optional: not supported by icc. + * + * clang: https://clang.llvm.org/docs/AttributeReference.html#overloadable + */ +#if __has_attribute(__overloadable__) +# define __overloadable __attribute__((__overloadable__)) +#else +# define __overloadable +#endif + /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-packed-type-attribute * clang: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-packed-variable-attribute -- cgit v1.2.3 From 1c7f4e5c1b6c9d9b508007c141dca77cab9434b4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 8 Feb 2022 14:53:46 -0800 Subject: Compiler Attributes: Add __diagnose_as for Clang Clang will perform various compile-time diagnostics on uses of various functions (e.g. simple bounds-checking on strcpy(), etc). These diagnostics can be assigned to other functions (for example, new implementations of the string functions under CONFIG_FORTIFY_SOURCE) using the "diagnose_as_builtin" attribute. This allows those functions to retain their compile-time diagnostic warnings. Cc: Nathan Chancellor Cc: llvm@lists.linux.dev Reviewed-by: Miguel Ojeda Reviewed-by: Nick Desaulniers Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220208225350.1331628-5-keescook@chromium.org --- include/linux/compiler_attributes.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index dcaf55f5d1ae..445e80517cab 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -100,6 +100,19 @@ # define __copy(symbol) #endif +/* + * Optional: not supported by gcc + * Optional: only supported since clang >= 14.0 + * Optional: not supported by icc + * + * clang: https://clang.llvm.org/docs/AttributeReference.html#diagnose_as_builtin + */ +#if __has_attribute(__diagnose_as_builtin__) +# define __diagnose_as(builtin...) __attribute__((__diagnose_as_builtin__(builtin))) +#else +# define __diagnose_as(builtin...) +#endif + /* * Don't. Just don't. See commit 771c035372a0 ("deprecate the '__deprecated' * attribute warnings entirely and for good") for more information. -- cgit v1.2.3 From 0a2b782a00f33e7d06dc43d099fa071ae97bee77 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 8 Feb 2022 14:53:47 -0800 Subject: fortify: Make pointer arguments const In preparation for using Clang's __pass_object_size attribute, make all the pointer arguments to the fortified string functions const. Nothing was changing their values anyway, so this added requirement (needed by __pass_object_size) requires no code changes and has no impact on the binary instruction output. Signed-off-by: Kees Cook Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20220208225350.1331628-6-keescook@chromium.org --- include/linux/fortify-string.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 439aad24ab3b..f874ada4b9af 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -50,7 +50,7 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) #define __underlying_strncpy __builtin_strncpy #endif -__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) +__FORTIFY_INLINE char *strncpy(char * const p, const char *q, __kernel_size_t size) { size_t p_size = __builtin_object_size(p, 1); @@ -61,7 +61,7 @@ __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) return __underlying_strncpy(p, q, size); } -__FORTIFY_INLINE char *strcat(char *p, const char *q) +__FORTIFY_INLINE char *strcat(char * const p, const char *q) { size_t p_size = __builtin_object_size(p, 1); @@ -73,7 +73,7 @@ __FORTIFY_INLINE char *strcat(char *p, const char *q) } extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); -__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) +__FORTIFY_INLINE __kernel_size_t strnlen(const char * const p, __kernel_size_t maxlen) { size_t p_size = __builtin_object_size(p, 1); size_t p_len = __compiletime_strlen(p); @@ -94,7 +94,7 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) } /* defined after fortified strnlen to reuse it. */ -__FORTIFY_INLINE __kernel_size_t strlen(const char *p) +__FORTIFY_INLINE __kernel_size_t strlen(const char * const p) { __kernel_size_t ret; size_t p_size = __builtin_object_size(p, 1); @@ -110,7 +110,7 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p) /* defined after fortified strlen to reuse it */ extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); -__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) +__FORTIFY_INLINE size_t strlcpy(char * const p, const char * const q, size_t size) { size_t p_size = __builtin_object_size(p, 1); size_t q_size = __builtin_object_size(q, 1); @@ -137,7 +137,7 @@ __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) /* defined after fortified strnlen to reuse it */ extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy); -__FORTIFY_INLINE ssize_t strscpy(char *p, const char *q, size_t size) +__FORTIFY_INLINE ssize_t strscpy(char * const p, const char * const q, size_t size) { size_t len; /* Use string size rather than possible enclosing struct size. */ @@ -183,7 +183,7 @@ __FORTIFY_INLINE ssize_t strscpy(char *p, const char *q, size_t size) } /* defined after fortified strlen and strnlen to reuse them */ -__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count) +__FORTIFY_INLINE char *strncat(char * const p, const char * const q, __kernel_size_t count) { size_t p_len, copy_len; size_t p_size = __builtin_object_size(p, 1); @@ -354,7 +354,7 @@ __FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size, memmove) extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan); -__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size) +__FORTIFY_INLINE void *memscan(void * const p, int c, __kernel_size_t size) { size_t p_size = __builtin_object_size(p, 0); @@ -365,7 +365,7 @@ __FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size) return __real_memscan(p, c, size); } -__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size) +__FORTIFY_INLINE int memcmp(const void * const p, const void * const q, __kernel_size_t size) { size_t p_size = __builtin_object_size(p, 0); size_t q_size = __builtin_object_size(q, 0); @@ -381,7 +381,7 @@ __FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size) return __underlying_memcmp(p, q, size); } -__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size) +__FORTIFY_INLINE void *memchr(const void * const p, int c, __kernel_size_t size) { size_t p_size = __builtin_object_size(p, 0); @@ -393,7 +393,7 @@ __FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size) } void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv); -__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size) +__FORTIFY_INLINE void *memchr_inv(const void * const p, int c, size_t size) { size_t p_size = __builtin_object_size(p, 0); @@ -405,7 +405,7 @@ __FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size) } extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup); -__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp) +__FORTIFY_INLINE void *kmemdup(const void * const p, size_t size, gfp_t gfp) { size_t p_size = __builtin_object_size(p, 0); @@ -417,7 +417,7 @@ __FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp) } /* Defined after fortified strlen to reuse it. */ -__FORTIFY_INLINE char *strcpy(char *p, const char *q) +__FORTIFY_INLINE char *strcpy(char * const p, const char * const q) { size_t p_size = __builtin_object_size(p, 1); size_t q_size = __builtin_object_size(q, 1); -- cgit v1.2.3 From 92df138a8d663cefebc3124041253677a53c92cf Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 8 Feb 2022 14:53:48 -0800 Subject: fortify: Use __diagnose_as() for better diagnostic coverage In preparation for using Clang's __pass_object_size, add __diagnose_as() attributes to mark the functions as being the same as the indicated builtins. When __daignose_as() is available, Clang will have a more complete ability to apply its own diagnostic analysis to callers of these functions, as if they were the builtins themselves. Without __diagnose_as, Clang's compile time diagnostic messages won't be as precise as they could be, but at least users of older toolchains will still benefit from having fortified routines. Signed-off-by: Kees Cook Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20220208225350.1331628-7-keescook@chromium.org --- include/linux/fortify-string.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index f874ada4b9af..db1ad1c1c79a 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -50,7 +50,8 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) #define __underlying_strncpy __builtin_strncpy #endif -__FORTIFY_INLINE char *strncpy(char * const p, const char *q, __kernel_size_t size) +__FORTIFY_INLINE __diagnose_as(__builtin_strncpy, 1, 2, 3) +char *strncpy(char * const p, const char *q, __kernel_size_t size) { size_t p_size = __builtin_object_size(p, 1); @@ -61,7 +62,8 @@ __FORTIFY_INLINE char *strncpy(char * const p, const char *q, __kernel_size_t si return __underlying_strncpy(p, q, size); } -__FORTIFY_INLINE char *strcat(char * const p, const char *q) +__FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2) +char *strcat(char * const p, const char *q) { size_t p_size = __builtin_object_size(p, 1); @@ -94,7 +96,8 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const p, __kernel_size_t m } /* defined after fortified strnlen to reuse it. */ -__FORTIFY_INLINE __kernel_size_t strlen(const char * const p) +__FORTIFY_INLINE __diagnose_as(__builtin_strlen, 1) +__kernel_size_t strlen(const char * const p) { __kernel_size_t ret; size_t p_size = __builtin_object_size(p, 1); @@ -183,7 +186,8 @@ __FORTIFY_INLINE ssize_t strscpy(char * const p, const char * const q, size_t si } /* defined after fortified strlen and strnlen to reuse them */ -__FORTIFY_INLINE char *strncat(char * const p, const char * const q, __kernel_size_t count) +__FORTIFY_INLINE __diagnose_as(__builtin_strncat, 1, 2, 3) +char *strncat(char * const p, const char * const q, __kernel_size_t count) { size_t p_len, copy_len; size_t p_size = __builtin_object_size(p, 1); @@ -365,7 +369,8 @@ __FORTIFY_INLINE void *memscan(void * const p, int c, __kernel_size_t size) return __real_memscan(p, c, size); } -__FORTIFY_INLINE int memcmp(const void * const p, const void * const q, __kernel_size_t size) +__FORTIFY_INLINE __diagnose_as(__builtin_memcmp, 1, 2, 3) +int memcmp(const void * const p, const void * const q, __kernel_size_t size) { size_t p_size = __builtin_object_size(p, 0); size_t q_size = __builtin_object_size(q, 0); @@ -381,7 +386,8 @@ __FORTIFY_INLINE int memcmp(const void * const p, const void * const q, __kernel return __underlying_memcmp(p, q, size); } -__FORTIFY_INLINE void *memchr(const void * const p, int c, __kernel_size_t size) +__FORTIFY_INLINE __diagnose_as(__builtin_memchr, 1, 2, 3) +void *memchr(const void * const p, int c, __kernel_size_t size) { size_t p_size = __builtin_object_size(p, 0); @@ -417,7 +423,8 @@ __FORTIFY_INLINE void *kmemdup(const void * const p, size_t size, gfp_t gfp) } /* Defined after fortified strlen to reuse it. */ -__FORTIFY_INLINE char *strcpy(char * const p, const char * const q) +__FORTIFY_INLINE __diagnose_as(__builtin_strcpy, 1, 2) +char *strcpy(char * const p, const char * const q) { size_t p_size = __builtin_object_size(p, 1); size_t q_size = __builtin_object_size(q, 1); -- cgit v1.2.3 From 67ebc3ab446230c77fe3b545a9d8a11cac1cfb6e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 8 Feb 2022 14:53:49 -0800 Subject: fortify: Make sure strlen() may still be used as a constant expression In preparation for enabling Clang FORTIFY_SOURCE support, redefine strlen() as a macro that tests for being a constant expression so that strlen() can still be used in static initializers, which is lost when adding __pass_object_size and __overloadable. An example of this usage can be seen here: https://lore.kernel.org/all/202201252321.dRmWZ8wW-lkp@intel.com/ Notably, this constant expression feature of strlen() is not available for architectures that build with -ffreestanding. This means the kernel currently does not universally expect strlen() to be used this way, but since there _are_ some build configurations that depend on it, retain the characteristic for Clang FORTIFY_SOURCE builds too. Signed-off-by: Kees Cook Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20220208225350.1331628-8-keescook@chromium.org --- include/linux/fortify-string.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index db1ad1c1c79a..f77cf22e2d60 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -2,6 +2,8 @@ #ifndef _LINUX_FORTIFY_STRING_H_ #define _LINUX_FORTIFY_STRING_H_ +#include + #define __FORTIFY_INLINE extern __always_inline __gnu_inline #define __RENAME(x) __asm__(#x) @@ -95,9 +97,16 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const p, __kernel_size_t m return ret; } -/* defined after fortified strnlen to reuse it. */ +/* + * Defined after fortified strnlen to reuse it. However, it must still be + * possible for strlen() to be used on compile-time strings for use in + * static initializers (i.e. as a constant expression). + */ +#define strlen(p) \ + __builtin_choose_expr(__is_constexpr(__builtin_strlen(p)), \ + __builtin_strlen(p), __fortify_strlen(p)) __FORTIFY_INLINE __diagnose_as(__builtin_strlen, 1) -__kernel_size_t strlen(const char * const p) +__kernel_size_t __fortify_strlen(const char * const p) { __kernel_size_t ret; size_t p_size = __builtin_object_size(p, 1); -- cgit v1.2.3 From 281d0c962752fb40866dd8d4cade68656f34bd1f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 8 Feb 2022 14:53:50 -0800 Subject: fortify: Add Clang support Enable FORTIFY_SOURCE support for Clang: Use the new __pass_object_size and __overloadable attributes so that Clang will have appropriate visibility into argument sizes such that __builtin_object_size(p, 1) will behave correctly. Additional details available here: https://github.com/llvm/llvm-project/issues/53516 https://github.com/ClangBuiltLinux/linux/issues/1401 A bug with __builtin_constant_p() of globally defined variables was fixed in Clang 13 (and backported to 12.0.1), so FORTIFY support must depend on that version or later. Additional details here: https://bugs.llvm.org/show_bug.cgi?id=41459 commit a52f8a59aef4 ("fortify: Explicitly disable Clang support") A bug with Clang's -mregparm=3 and -m32 makes some builtins unusable, so removing -ffreestanding (to gain the needed libcall optimizations with Clang) cannot be done. Without the libcall optimizations, Clang cannot provide appropriate FORTIFY coverage, so it must be disabled for CONFIG_X86_32. Additional details here; https://github.com/llvm/llvm-project/issues/53645 Cc: Miguel Ojeda Cc: Nick Desaulniers Cc: Nathan Chancellor Cc: George Burgess IV Cc: llvm@lists.linux.dev Signed-off-by: Kees Cook Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20220208225350.1331628-9-keescook@chromium.org --- include/linux/fortify-string.h | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index f77cf22e2d60..295637a66c46 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -4,7 +4,7 @@ #include -#define __FORTIFY_INLINE extern __always_inline __gnu_inline +#define __FORTIFY_INLINE extern __always_inline __gnu_inline __overloadable #define __RENAME(x) __asm__(#x) void fortify_panic(const char *name) __noreturn __cold; @@ -52,8 +52,17 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) #define __underlying_strncpy __builtin_strncpy #endif +/* + * Clang's use of __builtin_object_size() within inlines needs hinting via + * __pass_object_size(). The preference is to only ever use type 1 (member + * size, rather than struct size), but there remain some stragglers using + * type 0 that will be converted in the future. + */ +#define POS __pass_object_size(1) +#define POS0 __pass_object_size(0) + __FORTIFY_INLINE __diagnose_as(__builtin_strncpy, 1, 2, 3) -char *strncpy(char * const p, const char *q, __kernel_size_t size) +char *strncpy(char * const POS p, const char *q, __kernel_size_t size) { size_t p_size = __builtin_object_size(p, 1); @@ -65,7 +74,7 @@ char *strncpy(char * const p, const char *q, __kernel_size_t size) } __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2) -char *strcat(char * const p, const char *q) +char *strcat(char * const POS p, const char *q) { size_t p_size = __builtin_object_size(p, 1); @@ -77,7 +86,7 @@ char *strcat(char * const p, const char *q) } extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); -__FORTIFY_INLINE __kernel_size_t strnlen(const char * const p, __kernel_size_t maxlen) +__FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size_t maxlen) { size_t p_size = __builtin_object_size(p, 1); size_t p_len = __compiletime_strlen(p); @@ -106,7 +115,7 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const p, __kernel_size_t m __builtin_choose_expr(__is_constexpr(__builtin_strlen(p)), \ __builtin_strlen(p), __fortify_strlen(p)) __FORTIFY_INLINE __diagnose_as(__builtin_strlen, 1) -__kernel_size_t __fortify_strlen(const char * const p) +__kernel_size_t __fortify_strlen(const char * const POS p) { __kernel_size_t ret; size_t p_size = __builtin_object_size(p, 1); @@ -122,7 +131,7 @@ __kernel_size_t __fortify_strlen(const char * const p) /* defined after fortified strlen to reuse it */ extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); -__FORTIFY_INLINE size_t strlcpy(char * const p, const char * const q, size_t size) +__FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, size_t size) { size_t p_size = __builtin_object_size(p, 1); size_t q_size = __builtin_object_size(q, 1); @@ -149,7 +158,7 @@ __FORTIFY_INLINE size_t strlcpy(char * const p, const char * const q, size_t siz /* defined after fortified strnlen to reuse it */ extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy); -__FORTIFY_INLINE ssize_t strscpy(char * const p, const char * const q, size_t size) +__FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size) { size_t len; /* Use string size rather than possible enclosing struct size. */ @@ -196,7 +205,7 @@ __FORTIFY_INLINE ssize_t strscpy(char * const p, const char * const q, size_t si /* defined after fortified strlen and strnlen to reuse them */ __FORTIFY_INLINE __diagnose_as(__builtin_strncat, 1, 2, 3) -char *strncat(char * const p, const char * const q, __kernel_size_t count) +char *strncat(char * const POS p, const char * const POS q, __kernel_size_t count) { size_t p_len, copy_len; size_t p_size = __builtin_object_size(p, 1); @@ -367,7 +376,7 @@ __FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size, memmove) extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan); -__FORTIFY_INLINE void *memscan(void * const p, int c, __kernel_size_t size) +__FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size) { size_t p_size = __builtin_object_size(p, 0); @@ -379,7 +388,7 @@ __FORTIFY_INLINE void *memscan(void * const p, int c, __kernel_size_t size) } __FORTIFY_INLINE __diagnose_as(__builtin_memcmp, 1, 2, 3) -int memcmp(const void * const p, const void * const q, __kernel_size_t size) +int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t size) { size_t p_size = __builtin_object_size(p, 0); size_t q_size = __builtin_object_size(q, 0); @@ -396,7 +405,7 @@ int memcmp(const void * const p, const void * const q, __kernel_size_t size) } __FORTIFY_INLINE __diagnose_as(__builtin_memchr, 1, 2, 3) -void *memchr(const void * const p, int c, __kernel_size_t size) +void *memchr(const void * const POS0 p, int c, __kernel_size_t size) { size_t p_size = __builtin_object_size(p, 0); @@ -408,7 +417,7 @@ void *memchr(const void * const p, int c, __kernel_size_t size) } void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv); -__FORTIFY_INLINE void *memchr_inv(const void * const p, int c, size_t size) +__FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) { size_t p_size = __builtin_object_size(p, 0); @@ -420,7 +429,7 @@ __FORTIFY_INLINE void *memchr_inv(const void * const p, int c, size_t size) } extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup); -__FORTIFY_INLINE void *kmemdup(const void * const p, size_t size, gfp_t gfp) +__FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp) { size_t p_size = __builtin_object_size(p, 0); @@ -433,7 +442,7 @@ __FORTIFY_INLINE void *kmemdup(const void * const p, size_t size, gfp_t gfp) /* Defined after fortified strlen to reuse it. */ __FORTIFY_INLINE __diagnose_as(__builtin_strcpy, 1, 2) -char *strcpy(char * const p, const char * const q) +char *strcpy(char * const POS p, const char * const POS q) { size_t p_size = __builtin_object_size(p, 1); size_t q_size = __builtin_object_size(q, 1); @@ -462,4 +471,7 @@ char *strcpy(char * const p, const char * const q) #undef __underlying_strncat #undef __underlying_strncpy +#undef POS +#undef POS0 + #endif /* _LINUX_FORTIFY_STRING_H_ */ -- cgit v1.2.3 From ddbd89deb7d32b1fbb879f48d68fda1a8ac58e8e Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Fri, 11 Feb 2022 02:12:52 +0100 Subject: swiotlb: fix info leak with DMA_FROM_DEVICE The problem I'm addressing was discovered by the LTP test covering cve-2018-1000204. A short description of what happens follows: 1) The test case issues a command code 00 (TEST UNIT READY) via the SG_IO interface with: dxfer_len == 524288, dxdfer_dir == SG_DXFER_FROM_DEV and a corresponding dxferp. The peculiar thing about this is that TUR is not reading from the device. 2) In sg_start_req() the invocation of blk_rq_map_user() effectively bounces the user-space buffer. As if the device was to transfer into it. Since commit a45b599ad808 ("scsi: sg: allocate with __GFP_ZERO in sg_build_indirect()") we make sure this first bounce buffer is allocated with GFP_ZERO. 3) For the rest of the story we keep ignoring that we have a TUR, so the device won't touch the buffer we prepare as if the we had a DMA_FROM_DEVICE type of situation. My setup uses a virtio-scsi device and the buffer allocated by SG is mapped by the function virtqueue_add_split() which uses DMA_FROM_DEVICE for the "in" sgs (here scatter-gather and not scsi generics). This mapping involves bouncing via the swiotlb (we need swiotlb to do virtio in protected guest like s390 Secure Execution, or AMD SEV). 4) When the SCSI TUR is done, we first copy back the content of the second (that is swiotlb) bounce buffer (which most likely contains some previous IO data), to the first bounce buffer, which contains all zeros. Then we copy back the content of the first bounce buffer to the user-space buffer. 5) The test case detects that the buffer, which it zero-initialized, ain't all zeros and fails. One can argue that this is an swiotlb problem, because without swiotlb we leak all zeros, and the swiotlb should be transparent in a sense that it does not affect the outcome (if all other participants are well behaved). Copying the content of the original buffer into the swiotlb buffer is the only way I can think of to make swiotlb transparent in such scenarios. So let's do just that if in doubt, but allow the driver to tell us that the whole mapped buffer is going to be overwritten, in which case we can preserve the old behavior and avoid the performance impact of the extra bounce. Signed-off-by: Halil Pasic Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index dca2b1355bb1..6150d11a607e 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -61,6 +61,14 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 9) +/* + * This is a hint to the DMA-mapping subsystem that the device is expected + * to overwrite the entire mapped size, thus the caller does not require any + * of the previous buffer contents to be preserved. This allows + * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers. + */ +#define DMA_ATTR_OVERWRITE (1UL << 10) + /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can * be given to a device to use as a DMA source or target. It is specific to a -- cgit v1.2.3 From cd149eff8d2201a63c074a6d9d03e52926aa535d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 9 Feb 2022 15:27:04 +0300 Subject: mtd: spi-nor: intel-spi: Disable write protection only if asked Currently the driver tries to disable the BIOS write protection automatically even if this is not what the user wants. For this reason modify the driver so that by default it does not touch the write protection. Only if specifically asked by the user (setting writeable=1 command line parameter) the driver tries to disable the BIOS write protection. Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Reviewed-by: Mauro Lima Reviewed-by: Tudor Ambarus Acked-by: Lee Jones Link: https://lore.kernel.org/r/20220209122706.42439-2-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- include/linux/platform_data/x86/intel-spi.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/x86/intel-spi.h b/include/linux/platform_data/x86/intel-spi.h index 7f53a5c6f35e..7dda3f690465 100644 --- a/include/linux/platform_data/x86/intel-spi.h +++ b/include/linux/platform_data/x86/intel-spi.h @@ -19,11 +19,13 @@ enum intel_spi_type { /** * struct intel_spi_boardinfo - Board specific data for Intel SPI driver * @type: Type which this controller is compatible with - * @writeable: The chip is writeable + * @set_writeable: Try to make the chip writeable (optional) + * @data: Data to be passed to @set_writeable can be %NULL */ struct intel_spi_boardinfo { enum intel_spi_type type; - bool writeable; + bool (*set_writeable)(void __iomem *base, void *data); + void *data; }; #endif /* INTEL_SPI_PDATA_H */ -- cgit v1.2.3 From e23e5a05d1fd9479586c40ffbcc056b3e34ef816 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 9 Feb 2022 15:27:05 +0300 Subject: mtd: spi-nor: intel-spi: Convert to SPI MEM The preferred way to implement SPI-NOR controller drivers is through SPI subsubsystem utilizing the SPI MEM core functions. This converts the Intel SPI flash controller driver over the SPI MEM by moving the driver from SPI-NOR subsystem to SPI subsystem and in one go make it use the SPI MEM functions. The driver name will be changed from intel-spi to spi-intel to match the convention used in the SPI subsystem. Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Reviewed-by: Mauro Lima Reviewed-by: Boris Brezillon Acked-by: Lee Jones Acked-by: Pratyush Yadav Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/r/20220209122706.42439-3-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- include/linux/mfd/lpc_ich.h | 2 +- include/linux/platform_data/x86/intel-spi.h | 31 ----------------------------- include/linux/platform_data/x86/spi-intel.h | 31 +++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 32 deletions(-) delete mode 100644 include/linux/platform_data/x86/intel-spi.h create mode 100644 include/linux/platform_data/x86/spi-intel.h (limited to 'include') diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h index 39967a5eca6d..ea4a4b1b246a 100644 --- a/include/linux/mfd/lpc_ich.h +++ b/include/linux/mfd/lpc_ich.h @@ -8,7 +8,7 @@ #ifndef LPC_ICH_H #define LPC_ICH_H -#include +#include /* GPIO resources */ #define ICH_RES_GPIO 0 diff --git a/include/linux/platform_data/x86/intel-spi.h b/include/linux/platform_data/x86/intel-spi.h deleted file mode 100644 index 7dda3f690465..000000000000 --- a/include/linux/platform_data/x86/intel-spi.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Intel PCH/PCU SPI flash driver. - * - * Copyright (C) 2016, Intel Corporation - * Author: Mika Westerberg - */ - -#ifndef INTEL_SPI_PDATA_H -#define INTEL_SPI_PDATA_H - -enum intel_spi_type { - INTEL_SPI_BYT = 1, - INTEL_SPI_LPT, - INTEL_SPI_BXT, - INTEL_SPI_CNL, -}; - -/** - * struct intel_spi_boardinfo - Board specific data for Intel SPI driver - * @type: Type which this controller is compatible with - * @set_writeable: Try to make the chip writeable (optional) - * @data: Data to be passed to @set_writeable can be %NULL - */ -struct intel_spi_boardinfo { - enum intel_spi_type type; - bool (*set_writeable)(void __iomem *base, void *data); - void *data; -}; - -#endif /* INTEL_SPI_PDATA_H */ diff --git a/include/linux/platform_data/x86/spi-intel.h b/include/linux/platform_data/x86/spi-intel.h new file mode 100644 index 000000000000..a512ec37abbb --- /dev/null +++ b/include/linux/platform_data/x86/spi-intel.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Intel PCH/PCU SPI flash driver. + * + * Copyright (C) 2016, Intel Corporation + * Author: Mika Westerberg + */ + +#ifndef SPI_INTEL_PDATA_H +#define SPI_INTEL_PDATA_H + +enum intel_spi_type { + INTEL_SPI_BYT = 1, + INTEL_SPI_LPT, + INTEL_SPI_BXT, + INTEL_SPI_CNL, +}; + +/** + * struct intel_spi_boardinfo - Board specific data for Intel SPI driver + * @type: Type which this controller is compatible with + * @set_writeable: Try to make the chip writeable (optional) + * @data: Data to be passed to @set_writeable can be %NULL + */ +struct intel_spi_boardinfo { + enum intel_spi_type type; + bool (*set_writeable)(void __iomem *base, void *data); + void *data; +}; + +#endif /* SPI_INTEL_PDATA_H */ -- cgit v1.2.3 From f48dc6b9664963107e500aecfc2f4df27dc5afb6 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 11 Feb 2022 00:19:54 +0100 Subject: spi: Retire legacy GPIO handling All drivers using GPIOs as chip select have been rewritten to use GPIO descriptors passing the ->use_gpio_descriptors flag. Retire the code and fields used by the legacy GPIO API. Do not drop the ->use_gpio_descriptors flag: it now only indicates that we want to use GPIOs in addition to native chip selects. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220210231954.807904-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index c3746ff35691..579d71cdf6fa 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -137,9 +137,6 @@ extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); * for driver coldplugging, and in uevents used for hotplugging * @driver_override: If the name of a driver is written to this attribute, then * the device will bind to the named driver and only the named driver. - * @cs_gpio: LEGACY: gpio number of the chipselect line (optional, -ENOENT when - * not using a GPIO line) use cs_gpiod in new drivers by opting in on - * the spi_master. * @cs_gpiod: gpio descriptor of the chipselect line (optional, NULL when * not using a GPIO line) * @word_delay: delay to be inserted between consecutive @@ -186,7 +183,6 @@ struct spi_device { void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; - int cs_gpio; /* LEGACY: chip select gpio */ struct gpio_desc *cs_gpiod; /* chip select gpio desc */ struct spi_delay word_delay; /* inter-word delay */ /* CS delays */ @@ -418,17 +414,12 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * controller has native support for memory like operations. * @unprepare_message: undo any work done by prepare_message(). * @slave_abort: abort the ongoing transfer request on an SPI slave controller - * @cs_gpios: LEGACY: array of GPIO descs to use as chip select lines; one per - * CS number. Any individual value may be -ENOENT for CS lines that - * are not GPIOs (driven by the SPI controller itself). Use the cs_gpiods - * in new drivers. * @cs_gpiods: Array of GPIO descs to use as chip select lines; one per CS * number. Any individual value may be NULL for CS lines that * are not GPIOs (driven by the SPI controller itself). * @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab - * GPIO descriptors rather than using global GPIO numbers grabbed by the - * driver. This will fill in @cs_gpiods and @cs_gpios should not be used, - * and SPI devices will have the cs_gpiod assigned rather than cs_gpio. + * GPIO descriptors. This will fill in @cs_gpiods and SPI devices will have + * the cs_gpiod assigned if a GPIO line is found for the chipselect. * @unused_native_cs: When cs_gpiods is used, spi_register_controller() will * fill in this field with the first unused native CS, to be used by SPI * controller drivers that need to drive a native CS when using GPIO CS. @@ -642,7 +633,6 @@ struct spi_controller { const struct spi_controller_mem_ops *mem_ops; /* gpio chip select */ - int *cs_gpios; struct gpio_desc **cs_gpiods; bool use_gpio_descriptors; s8 unused_native_cs; -- cgit v1.2.3 From 1de785a58035031f81a43c42f355fa1db510dc36 Mon Sep 17 00:00:00 2001 From: Jeff LaBundy Date: Sun, 23 Jan 2022 13:01:05 -0600 Subject: mfd: iqs62x: Provide device revision to sub-devices Individual sub-devices may elect to make decisions based on the specific revision of silicon encountered at probe. This data is already read from the device, but is not retained. Pass this data on to the sub-devices by adding the software and hardware numbers (registers 0x01 and 0x02, respectively) to the iqs62x_core struct. Signed-off-by: Jeff LaBundy Signed-off-by: Lee Jones --- include/linux/mfd/iqs62x.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/iqs62x.h b/include/linux/mfd/iqs62x.h index 5ced55eae11b..ffc86010af74 100644 --- a/include/linux/mfd/iqs62x.h +++ b/include/linux/mfd/iqs62x.h @@ -14,6 +14,11 @@ #define IQS624_PROD_NUM 0x43 #define IQS625_PROD_NUM 0x4E +#define IQS620_HW_NUM_V0 0x82 +#define IQS620_HW_NUM_V1 IQS620_HW_NUM_V0 +#define IQS620_HW_NUM_V2 IQS620_HW_NUM_V0 +#define IQS620_HW_NUM_V3 0x92 + #define IQS621_ALS_FLAGS 0x16 #define IQS622_ALS_FLAGS 0x14 @@ -129,6 +134,8 @@ struct iqs62x_core { struct completion fw_done; enum iqs62x_ui_sel ui_sel; unsigned long event_cache; + u8 sw_num; + u8 hw_num; }; extern const struct iqs62x_event_desc iqs62x_events[IQS62X_NUM_EVENTS]; -- cgit v1.2.3 From 65c53595bc2a14e77b455c8a12fc2631cbe43868 Mon Sep 17 00:00:00 2001 From: Colin Foster Date: Sun, 13 Feb 2022 11:12:52 -0800 Subject: net: ocelot: align macros for consistency In the ocelot.h file, several read / write macros were split across multiple lines, while others weren't. Split all macros that exceed the 80 character column width and match the style of the rest of the file. Signed-off-by: Colin Foster Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 5c3a3597f1d2..1dbf8aff861c 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -742,25 +742,39 @@ struct ocelot_policer { u32 burst; /* bytes */ }; -#define ocelot_read_ix(ocelot, reg, gi, ri) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) -#define ocelot_read_gix(ocelot, reg, gi) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi)) -#define ocelot_read_rix(ocelot, reg, ri) __ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri)) -#define ocelot_read(ocelot, reg) __ocelot_read_ix(ocelot, reg, 0) - -#define ocelot_write_ix(ocelot, val, reg, gi, ri) __ocelot_write_ix(ocelot, val, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) -#define ocelot_write_gix(ocelot, val, reg, gi) __ocelot_write_ix(ocelot, val, reg, reg##_GSZ * (gi)) -#define ocelot_write_rix(ocelot, val, reg, ri) __ocelot_write_ix(ocelot, val, reg, reg##_RSZ * (ri)) +#define ocelot_read_ix(ocelot, reg, gi, ri) \ + __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) +#define ocelot_read_gix(ocelot, reg, gi) \ + __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi)) +#define ocelot_read_rix(ocelot, reg, ri) \ + __ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri)) +#define ocelot_read(ocelot, reg) \ + __ocelot_read_ix(ocelot, reg, 0) + +#define ocelot_write_ix(ocelot, val, reg, gi, ri) \ + __ocelot_write_ix(ocelot, val, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) +#define ocelot_write_gix(ocelot, val, reg, gi) \ + __ocelot_write_ix(ocelot, val, reg, reg##_GSZ * (gi)) +#define ocelot_write_rix(ocelot, val, reg, ri) \ + __ocelot_write_ix(ocelot, val, reg, reg##_RSZ * (ri)) #define ocelot_write(ocelot, val, reg) __ocelot_write_ix(ocelot, val, reg, 0) -#define ocelot_rmw_ix(ocelot, val, m, reg, gi, ri) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) -#define ocelot_rmw_gix(ocelot, val, m, reg, gi) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi)) -#define ocelot_rmw_rix(ocelot, val, m, reg, ri) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_RSZ * (ri)) +#define ocelot_rmw_ix(ocelot, val, m, reg, gi, ri) \ + __ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) +#define ocelot_rmw_gix(ocelot, val, m, reg, gi) \ + __ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi)) +#define ocelot_rmw_rix(ocelot, val, m, reg, ri) \ + __ocelot_rmw_ix(ocelot, val, m, reg, reg##_RSZ * (ri)) #define ocelot_rmw(ocelot, val, m, reg) __ocelot_rmw_ix(ocelot, val, m, reg, 0) -#define ocelot_field_write(ocelot, reg, val) regmap_field_write((ocelot)->regfields[(reg)], (val)) -#define ocelot_field_read(ocelot, reg, val) regmap_field_read((ocelot)->regfields[(reg)], (val)) -#define ocelot_fields_write(ocelot, id, reg, val) regmap_fields_write((ocelot)->regfields[(reg)], (id), (val)) -#define ocelot_fields_read(ocelot, id, reg, val) regmap_fields_read((ocelot)->regfields[(reg)], (id), (val)) +#define ocelot_field_write(ocelot, reg, val) \ + regmap_field_write((ocelot)->regfields[(reg)], (val)) +#define ocelot_field_read(ocelot, reg, val) \ + regmap_field_read((ocelot)->regfields[(reg)], (val)) +#define ocelot_fields_write(ocelot, id, reg, val) \ + regmap_fields_write((ocelot)->regfields[(reg)], (id), (val)) +#define ocelot_fields_read(ocelot, id, reg, val) \ + regmap_fields_read((ocelot)->regfields[(reg)], (id), (val)) #define ocelot_target_read_ix(ocelot, target, reg, gi, ri) \ __ocelot_target_read_ix(ocelot, target, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) -- cgit v1.2.3 From 40f3a5c81555880a437dfd3301826074dff18138 Mon Sep 17 00:00:00 2001 From: Colin Foster Date: Sun, 13 Feb 2022 11:12:53 -0800 Subject: net: mscc: ocelot: add ability to perform bulk reads Regmap supports bulk register reads. Ocelot does not. This patch adds support for Ocelot to invoke bulk regmap reads. That will allow any driver that performs consecutive reads over memory regions to optimize that access. Signed-off-by: Colin Foster Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 1dbf8aff861c..6687ba35eaf3 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -742,6 +742,9 @@ struct ocelot_policer { u32 burst; /* bytes */ }; +#define ocelot_bulk_read_rix(ocelot, reg, ri, buf, count) \ + __ocelot_bulk_read_ix(ocelot, reg, reg##_RSZ * (ri), buf, count) + #define ocelot_read_ix(ocelot, reg, gi, ri) \ __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) #define ocelot_read_gix(ocelot, reg, gi) \ @@ -798,6 +801,8 @@ struct ocelot_policer { u32 ocelot_port_readl(struct ocelot_port *port, u32 reg); void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg); void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg); +int __ocelot_bulk_read_ix(struct ocelot *ocelot, u32 reg, u32 offset, void *buf, + int count); u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset); void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset); void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg, -- cgit v1.2.3 From d87b1c08f38a2ce40cf559df36c107a2e6c16a8f Mon Sep 17 00:00:00 2001 From: Colin Foster Date: Sun, 13 Feb 2022 11:12:54 -0800 Subject: net: mscc: ocelot: use bulk reads for stats Create and utilize bulk regmap reads instead of single access for gathering stats. The background reading of statistics happens frequently, and over a few contiguous memory regions. High speed PCIe buses and MMIO access will probably see negligible performance increase. Lower speed buses like SPI and I2C could see significant performance increase, since the bus configuration and register access times account for a large percentage of data transfer time. Signed-off-by: Colin Foster Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 6687ba35eaf3..cacb103e4bad 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -540,6 +540,13 @@ struct ocelot_stat_layout { char name[ETH_GSTRING_LEN]; }; +struct ocelot_stats_region { + struct list_head node; + u32 offset; + int count; + u32 *buf; +}; + enum ocelot_tag_prefix { OCELOT_TAG_PREFIX_DISABLED = 0, OCELOT_TAG_PREFIX_NONE, @@ -671,6 +678,7 @@ struct ocelot { struct regmap_field *regfields[REGFIELD_MAX]; const u32 *const *map; const struct ocelot_stat_layout *stats_layout; + struct list_head stats_regions; unsigned int num_stats; u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM]; -- cgit v1.2.3 From 26394fc118d6115390bd5b3a0fb17096271da227 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Fri, 11 Feb 2022 17:30:42 +0000 Subject: ipv6: mcast: use rcu-safe version of ipv6_get_lladdr() Some time ago 8965779d2c0e ("ipv6,mcast: always hold idev->lock before mca_lock") switched ipv6_get_lladdr() to __ipv6_get_lladdr(), which is rcu-unsafe version. That was OK, because idev->lock was held for these codepaths. In 88e2ca308094 ("mld: convert ifmcaddr6 to RCU") these external locks were removed, so we probably need to restore the original rcu-safe call. Otherwise, we occasionally get a machine crashed/stalled with the following in dmesg: [ 3405.966610][T230589] general protection fault, probably for non-canonical address 0xdead00000000008c: 0000 [#1] SMP NOPTI [ 3405.982083][T230589] CPU: 44 PID: 230589 Comm: kworker/44:3 Tainted: G O 5.15.19-cloudflare-2022.2.1 #1 [ 3405.998061][T230589] Hardware name: SUPA-COOL-SERV [ 3406.009552][T230589] Workqueue: mld mld_ifc_work [ 3406.017224][T230589] RIP: 0010:__ipv6_get_lladdr+0x34/0x60 [ 3406.025780][T230589] Code: 57 10 48 83 c7 08 48 89 e5 48 39 d7 74 3e 48 8d 82 38 ff ff ff eb 13 48 8b 90 d0 00 00 00 48 8d 82 38 ff ff ff 48 39 d7 74 22 <66> 83 78 32 20 77 1b 75 e4 89 ca 23 50 2c 75 dd 48 8b 50 08 48 8b [ 3406.055748][T230589] RSP: 0018:ffff94e4b3fc3d10 EFLAGS: 00010202 [ 3406.065617][T230589] RAX: dead00000000005a RBX: ffff94e4b3fc3d30 RCX: 0000000000000040 [ 3406.077477][T230589] RDX: dead000000000122 RSI: ffff94e4b3fc3d30 RDI: ffff8c3a31431008 [ 3406.089389][T230589] RBP: ffff94e4b3fc3d10 R08: 0000000000000000 R09: 0000000000000000 [ 3406.101445][T230589] R10: ffff8c3a31430000 R11: 000000000000000b R12: ffff8c2c37887100 [ 3406.113553][T230589] R13: ffff8c3a39537000 R14: 00000000000005dc R15: ffff8c3a31431000 [ 3406.125730][T230589] FS: 0000000000000000(0000) GS:ffff8c3b9fc80000(0000) knlGS:0000000000000000 [ 3406.138992][T230589] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3406.149895][T230589] CR2: 00007f0dfea1db60 CR3: 000000387b5f2000 CR4: 0000000000350ee0 [ 3406.162421][T230589] Call Trace: [ 3406.170235][T230589] [ 3406.177736][T230589] mld_newpack+0xfe/0x1a0 [ 3406.186686][T230589] add_grhead+0x87/0xa0 [ 3406.195498][T230589] add_grec+0x485/0x4e0 [ 3406.204310][T230589] ? newidle_balance+0x126/0x3f0 [ 3406.214024][T230589] mld_ifc_work+0x15d/0x450 [ 3406.223279][T230589] process_one_work+0x1e6/0x380 [ 3406.232982][T230589] worker_thread+0x50/0x3a0 [ 3406.242371][T230589] ? rescuer_thread+0x360/0x360 [ 3406.252175][T230589] kthread+0x127/0x150 [ 3406.261197][T230589] ? set_kthread_struct+0x40/0x40 [ 3406.271287][T230589] ret_from_fork+0x22/0x30 [ 3406.280812][T230589] [ 3406.288937][T230589] Modules linked in: ... [last unloaded: kheaders] [ 3406.476714][T230589] ---[ end trace 3525a7655f2f3b9e ]--- Fixes: 88e2ca308094 ("mld: convert ifmcaddr6 to RCU") Reported-by: David Pinilla Caparros Signed-off-by: Ignat Korchagin Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index e7ce719838b5..59940e230b78 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -109,8 +109,6 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); -int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, - u32 banned_flags); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, -- cgit v1.2.3 From a2614140dc0f467a83aa3bb4b6ee2d6480a76202 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 11 Feb 2022 19:45:06 +0200 Subject: net: dsa: mv88e6xxx: flush switchdev FDB workqueue before removing VLAN mv88e6xxx is special among DSA drivers in that it requires the VTU to contain the VID of the FDB entry it modifies in mv88e6xxx_port_db_load_purge(), otherwise it will return -EOPNOTSUPP. Sometimes due to races this is not always satisfied even if external code does everything right (first deletes the FDB entries, then the VLAN), because DSA commits to hardware FDB entries asynchronously since commit c9eb3e0f8701 ("net: dsa: Add support for learning FDB through notification"). Therefore, the mv88e6xxx driver must close this race condition by itself, by asking DSA to flush the switchdev workqueue of any FDB deletions in progress, prior to exiting a VLAN. Fixes: c9eb3e0f8701 ("net: dsa: Add support for learning FDB through notification") Reported-by: Rafael Richter Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 57b3e4e7413b..85a5ba3772f5 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1187,6 +1187,7 @@ void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds); void dsa_switch_shutdown(struct dsa_switch *ds); struct dsa_switch *dsa_switch_find(int tree_index, int sw_index); +void dsa_flush_workqueue(void); #ifdef CONFIG_PM_SLEEP int dsa_switch_suspend(struct dsa_switch *ds); int dsa_switch_resume(struct dsa_switch *ds); -- cgit v1.2.3 From 5891cd5ec46c2c2eb6427cb54d214b149635dd0e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Feb 2022 12:06:23 -0800 Subject: net_sched: add __rcu annotation to netdev->qdisc syzbot found a data-race [1] which lead me to add __rcu annotations to netdev->qdisc, and proper accessors to get LOCKDEP support. [1] BUG: KCSAN: data-race in dev_activate / qdisc_lookup_rcu write to 0xffff888168ad6410 of 8 bytes by task 13559 on cpu 1: attach_default_qdiscs net/sched/sch_generic.c:1167 [inline] dev_activate+0x2ed/0x8f0 net/sched/sch_generic.c:1221 __dev_open+0x2e9/0x3a0 net/core/dev.c:1416 __dev_change_flags+0x167/0x3f0 net/core/dev.c:8139 rtnl_configure_link+0xc2/0x150 net/core/rtnetlink.c:3150 __rtnl_newlink net/core/rtnetlink.c:3489 [inline] rtnl_newlink+0xf4d/0x13e0 net/core/rtnetlink.c:3529 rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5594 netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2494 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5612 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x602/0x6d0 net/netlink/af_netlink.c:1343 netlink_sendmsg+0x728/0x850 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:705 [inline] sock_sendmsg net/socket.c:725 [inline] ____sys_sendmsg+0x39a/0x510 net/socket.c:2413 ___sys_sendmsg net/socket.c:2467 [inline] __sys_sendmsg+0x195/0x230 net/socket.c:2496 __do_sys_sendmsg net/socket.c:2505 [inline] __se_sys_sendmsg net/socket.c:2503 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2503 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff888168ad6410 of 8 bytes by task 13560 on cpu 0: qdisc_lookup_rcu+0x30/0x2e0 net/sched/sch_api.c:323 __tcf_qdisc_find+0x74/0x3a0 net/sched/cls_api.c:1050 tc_del_tfilter+0x1c7/0x1350 net/sched/cls_api.c:2211 rtnetlink_rcv_msg+0x5ba/0x7e0 net/core/rtnetlink.c:5585 netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2494 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5612 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x602/0x6d0 net/netlink/af_netlink.c:1343 netlink_sendmsg+0x728/0x850 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:705 [inline] sock_sendmsg net/socket.c:725 [inline] ____sys_sendmsg+0x39a/0x510 net/socket.c:2413 ___sys_sendmsg net/socket.c:2467 [inline] __sys_sendmsg+0x195/0x230 net/socket.c:2496 __do_sys_sendmsg net/socket.c:2505 [inline] __se_sys_sendmsg net/socket.c:2503 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2503 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0xffffffff85dee080 -> 0xffff88815d96ec00 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 13560 Comm: syz-executor.2 Not tainted 5.17.0-rc3-syzkaller-00116-gf1baf68e1383-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 470502de5bdb ("net: sched: unlock rules update API") Signed-off-by: Eric Dumazet Cc: Vlad Buslov Reported-by: syzbot Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e490b84732d1..8b5a314db167 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2158,7 +2158,7 @@ struct net_device { struct netdev_queue *_tx ____cacheline_aligned_in_smp; unsigned int num_tx_queues; unsigned int real_num_tx_queues; - struct Qdisc *qdisc; + struct Qdisc __rcu *qdisc; unsigned int tx_queue_len; spinlock_t tx_global_lock; -- cgit v1.2.3 From baebdf48c360080710f80699eea3affbb13d6c65 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 12 Feb 2022 00:38:38 +0100 Subject: net: dev: Makes sure netif_rx() can be invoked in any context. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dave suggested a while ago (eleven years by now) "Let's make netif_rx() work in all contexts and get rid of netif_rx_ni()". Eric agreed and pointed out that modern devices should use netif_receive_skb() to avoid the overhead. In the meantime someone added another variant, netif_rx_any_context(), which behaves as suggested. netif_rx() must be invoked with disabled bottom halves to ensure that pending softirqs, which were raised within the function, are handled. netif_rx_ni() can be invoked only from process context (bottom halves must be enabled) because the function handles pending softirqs without checking if bottom halves were disabled or not. netif_rx_any_context() invokes on the former functions by checking in_interrupts(). netif_rx() could be taught to handle both cases (disabled and enabled bottom halves) by simply disabling bottom halves while invoking netif_rx_internal(). The local_bh_enable() invocation will then invoke pending softirqs only if the BH-disable counter drops to zero. Eric is concerned about the overhead of BH-disable+enable especially in regard to the loopback driver. As critical as this driver is, it will receive a shortcut to avoid the additional overhead which is not needed. Add a local_bh_disable() section in netif_rx() to ensure softirqs are handled if needed. Provide __netif_rx() which does not disable BH and has a lockdep assert to ensure that interrupts are disabled. Use this shortcut in the loopback driver and in drivers/net/*.c. Make netif_rx_ni() and netif_rx_any_context() invoke netif_rx() so they can be removed once they are no more users left. Link: https://lkml.kernel.org/r/20100415.020246.218622820.davem@davemloft.net Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Eric Dumazet Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 ++++++++++++-- include/trace/events/net.h | 14 -------------- 2 files changed, 12 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5f6e2c0b0c90..e99db8341da5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3672,8 +3672,18 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); int netif_rx(struct sk_buff *skb); -int netif_rx_ni(struct sk_buff *skb); -int netif_rx_any_context(struct sk_buff *skb); +int __netif_rx(struct sk_buff *skb); + +static inline int netif_rx_ni(struct sk_buff *skb) +{ + return netif_rx(skb); +} + +static inline int netif_rx_any_context(struct sk_buff *skb) +{ + return netif_rx(skb); +} + int netif_receive_skb(struct sk_buff *skb); int netif_receive_skb_core(struct sk_buff *skb); void netif_receive_skb_list_internal(struct list_head *head); diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 78c448c6ab4c..032b431b987b 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -260,13 +260,6 @@ DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_entry, TP_ARGS(skb) ); -DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_ni_entry, - - TP_PROTO(const struct sk_buff *skb), - - TP_ARGS(skb) -); - DECLARE_EVENT_CLASS(net_dev_rx_exit_template, TP_PROTO(int ret), @@ -312,13 +305,6 @@ DEFINE_EVENT(net_dev_rx_exit_template, netif_rx_exit, TP_ARGS(ret) ); -DEFINE_EVENT(net_dev_rx_exit_template, netif_rx_ni_exit, - - TP_PROTO(int ret), - - TP_ARGS(ret) -); - DEFINE_EVENT(net_dev_rx_exit_template, netif_receive_skb_list_exit, TP_PROTO(int ret), -- cgit v1.2.3 From 76f05d88623e559eb9fc41db9fb911e67fab0e7a Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Mon, 14 Feb 2022 12:46:52 +0530 Subject: net: wwan: debugfs obtained dev reference not dropped WWAN driver call's wwan_get_debugfs_dir() to obtain WWAN debugfs dir entry. As part of this procedure it returns a reference to a found device. Since there is no debugfs interface available at WWAN subsystem, it is not possible to drop dev reference post debugfs use. This leads to side effects like post wwan driver load and reload the wwan instance gets increment from wwanX to wwanX+1. A new debugfs interface is added in wwan subsystem so that wwan driver can drop the obtained dev reference post debugfs use. void wwan_put_debugfs_dir(struct dentry *dir) Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- include/linux/wwan.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/wwan.h b/include/linux/wwan.h index afb3334ec8c5..5ce2acf444fb 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -174,11 +174,13 @@ void wwan_unregister_ops(struct device *parent); #ifdef CONFIG_WWAN_DEBUGFS struct dentry *wwan_get_debugfs_dir(struct device *parent); +void wwan_put_debugfs_dir(struct dentry *dir); #else static inline struct dentry *wwan_get_debugfs_dir(struct device *parent) { return ERR_PTR(-ENODEV); } +static inline void wwan_put_debugfs_dir(struct dentry *dir) {} #endif #endif /* __WWAN_H */ -- cgit v1.2.3 From 32e92d9f6f876721cc4f565f8369d542b6266380 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 3 Feb 2022 17:59:20 +0800 Subject: iommu/iova: Separate out rcache init Currently the rcache structures are allocated for all IOVA domains, even if they do not use "fast" alloc+free interface. This is wasteful of memory. In addition, fails in init_iova_rcaches() are not handled safely, which is less than ideal. Make "fast" users call a separate rcache init explicitly, which includes error checking. Signed-off-by: John Garry Reviewed-by: Robin Murphy Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/1643882360-241739-1-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- include/linux/iova.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/iova.h b/include/linux/iova.h index cea79cb9f26c..320a70e40233 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -21,18 +21,8 @@ struct iova { unsigned long pfn_lo; /* Lowest allocated pfn */ }; -struct iova_magazine; -struct iova_cpu_rcache; -#define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */ -#define MAX_GLOBAL_MAGS 32 /* magazines per bin */ - -struct iova_rcache { - spinlock_t lock; - unsigned long depot_size; - struct iova_magazine *depot[MAX_GLOBAL_MAGS]; - struct iova_cpu_rcache __percpu *cpu_rcaches; -}; +struct iova_rcache; /* holds all the iova translations for a domain */ struct iova_domain { @@ -46,7 +36,7 @@ struct iova_domain { unsigned long max32_alloc_size; /* Size of last failed allocation */ struct iova anchor; /* rbtree lookup anchor */ - struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ + struct iova_rcache *rcaches; struct hlist_node cpuhp_dead; }; @@ -102,6 +92,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); +int iova_domain_init_rcaches(struct iova_domain *iovad); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); #else -- cgit v1.2.3 From e9e9feebcbc14b174fef862842f8cc9a388e1db3 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:10 +0100 Subject: KVM: s390: Add optional storage key checking to MEMOP IOCTL User space needs a mechanism to perform key checked accesses when emulating instructions. The key can be passed as an additional argument. Having an additional argument is flexible, as user space can pass the guest PSW's key, in order to make an access the same way the CPU would, or pass another key if necessary. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20220211182215.2730017-6-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5191b57e1562..4566f429db2c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -562,7 +562,10 @@ struct kvm_s390_mem_op { __u32 op; /* type of operation */ __u64 buf; /* buffer in userspace */ union { - __u8 ar; /* the access register number */ + struct { + __u8 ar; /* the access register number */ + __u8 key; /* access key, ignored if flag unset */ + }; __u32 sida_offset; /* offset into the sida */ __u8 reserved[32]; /* should be set to 0 */ }; @@ -575,6 +578,7 @@ struct kvm_s390_mem_op { /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) /* for KVM_INTERRUPT */ struct kvm_interrupt { -- cgit v1.2.3 From ef11c9463ae006302ce170a401854a48ea0532ca Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:11 +0100 Subject: KVM: s390: Add vm IOCTL for key checked guest absolute memory access Channel I/O honors storage keys and is performed on absolute memory. For I/O emulation user space therefore needs to be able to do key checked accesses. The vm IOCTL supports read/write accesses, as well as checking if an access would succeed. Unlike relying on KVM_S390_GET_SKEYS for key checking would, the vm IOCTL performs the check in lockstep with the read or write, by, ultimately, mapping the access to move instructions that support key protection checking with a supplied key. Fetch and storage protection override are not applicable to absolute accesses and so are not applied as they are when using the vcpu memop. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20220211182215.2730017-7-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4566f429db2c..4bc7623def87 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -575,6 +575,8 @@ struct kvm_s390_mem_op { #define KVM_S390_MEMOP_LOGICAL_WRITE 1 #define KVM_S390_MEMOP_SIDA_READ 2 #define KVM_S390_MEMOP_SIDA_WRITE 3 +#define KVM_S390_MEMOP_ABSOLUTE_READ 4 +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) -- cgit v1.2.3 From d004079edc166ff19605475211305923c708b4d5 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:13 +0100 Subject: KVM: s390: Add capability for storage key extension of MEM_OP IOCTL Availability of the KVM_CAP_S390_MEM_OP_EXTENSION capability signals that: * The vcpu MEM_OP IOCTL supports storage key checking. * The vm MEM_OP IOCTL exists. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20220211182215.2730017-9-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4bc7623def87..08756eeea065 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1140,6 +1140,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 #define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_S390_MEM_OP_EXTENSION 210 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 5e35d0eb472b48ac9c8ef7017753b8a1f765aa01 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:14 +0100 Subject: KVM: s390: Update api documentation for memop ioctl Document all currently existing operations, flags and explain under which circumstances they are available. Document the recently introduced absolute operations and the storage key protection flag, as well as the existing SIDA operations. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20220211182215.2730017-10-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 08756eeea065..dbc550bbd9fa 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -567,7 +567,7 @@ struct kvm_s390_mem_op { __u8 key; /* access key, ignored if flag unset */ }; __u32 sida_offset; /* offset into the sida */ - __u8 reserved[32]; /* should be set to 0 */ + __u8 reserved[32]; /* ignored */ }; }; /* types for kvm_s390_mem_op->op */ -- cgit v1.2.3 From b2638e56c2ced2ca258d22f939c47327b189e00c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 3 Feb 2022 14:56:13 +0200 Subject: device property: Don't split fwnode_get_irq*() APIs in the code New fwnode_get_irq_byname() landed after an unrelated function by ordering. Move fwnode_iomap(), so fwnode_get_irq*() APIs will go together. No functional change intended. Signed-off-by: Andy Shevchenko Reviewed-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index 95d56a562b6a..4cd4b326941f 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -123,8 +123,6 @@ void fwnode_handle_put(struct fwnode_handle *fwnode); int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index); int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name); -void __iomem *fwnode_iomap(struct fwnode_handle *fwnode, int index); - unsigned int device_get_child_node_count(struct device *dev); static inline bool device_property_read_bool(struct device *dev, @@ -388,8 +386,10 @@ enum dev_dma_attr device_get_dma_attr(struct device *dev); const void *device_get_match_data(struct device *dev); int device_get_phy_mode(struct device *dev); - int fwnode_get_phy_mode(struct fwnode_handle *fwnode); + +void __iomem *fwnode_iomap(struct fwnode_handle *fwnode, int index); + struct fwnode_handle *fwnode_graph_get_next_endpoint( const struct fwnode_handle *fwnode, struct fwnode_handle *prev); struct fwnode_handle * -- cgit v1.2.3 From 7a853c2d5951419fdf3c1c9d2b6f5a38f6a6857d Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:45 -0800 Subject: mm: Change CONFIG option for mm->pasid field This currently depends on CONFIG_IOMMU_SUPPORT. But it is only needed when CONFIG_IOMMU_SVA option is enabled. Change the CONFIG guards around definition and initialization of mm->pasid field. Suggested-by: Jacob Pan Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220207230254.3342514-3-fenghua.yu@intel.com --- include/linux/mm_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5140e5feb486..c5cbfd7915ad 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -631,7 +631,7 @@ struct mm_struct { #endif struct work_struct async_put_work; -#ifdef CONFIG_IOMMU_SUPPORT +#ifdef CONFIG_IOMMU_SVA u32 pasid; #endif } __randomize_layout; -- cgit v1.2.3 From 150154aae4311e7e6458903baecdc8fffe981ed3 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Wed, 1 Dec 2021 10:20:53 +0100 Subject: rcu: Fix description of kvfree_rcu() The kvfree_rcu() header comment's description of the "ptr" parameter is unclear, therefore rephrase it to make it clear that it is a pointer to the memory to eventually be passed to kvfree(). Reported-by: Steven Rostedt Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 88b42eb46406..9d7df8d36af0 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -924,7 +924,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * * kvfree_rcu(ptr); * - * where @ptr is a pointer to kvfree(). + * where @ptr is the pointer to be freed by kvfree(). * * Please note, head-less way of freeing is permitted to * use from a context that has to follow might_sleep() -- cgit v1.2.3 From 58d4292bd037b01fbb940a5170817f7d40caa9d5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 14 Jan 2022 16:07:28 -0800 Subject: rcu: Uninline multi-use function: finish_rcuwait() This is a rarely used function, so uninlining its 3 instructions is probably a win or a wash - but the main motivation is to make independent of task_struct details. Signed-off-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcuwait.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h index 61c56cca95c4..8052d34da782 100644 --- a/include/linux/rcuwait.h +++ b/include/linux/rcuwait.h @@ -47,11 +47,7 @@ static inline void prepare_to_rcuwait(struct rcuwait *w) rcu_assign_pointer(w->task, current); } -static inline void finish_rcuwait(struct rcuwait *w) -{ - rcu_assign_pointer(w->task, NULL); - __set_current_state(TASK_RUNNING); -} +extern void finish_rcuwait(struct rcuwait *w); #define rcuwait_wait_event(w, condition, state) \ ({ \ -- cgit v1.2.3 From e6339d3b443c436c3b8f45eefec2212a8c07065d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 14 Jan 2022 16:16:55 -0800 Subject: rcu: Remove __read_mostly annotations from rcu_scheduler_active externs Remove the __read_mostly attributes from the rcu_scheduler_active extern declarations, because these attributes are ignored for prototypes and we'd have to include the full header to gain this functionally pointless attribute defined. Signed-off-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- include/linux/rcutree.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9d7df8d36af0..e7c39c200e2b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -84,7 +84,7 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ void rcu_init(void); -extern int rcu_scheduler_active __read_mostly; +extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); void rcu_report_dead(unsigned int cpu); void rcutree_migrate_callbacks(int cpu); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 53209d669400..76665db179fa 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -62,7 +62,7 @@ static inline void rcu_irq_exit_check_preempt(void) { } void exit_rcu(void); void rcu_scheduler_starting(void); -extern int rcu_scheduler_active __read_mostly; +extern int rcu_scheduler_active; void rcu_end_inkernel_boot(void); bool rcu_inkernel_boot_has_ended(void); bool rcu_is_watching(void); -- cgit v1.2.3 From 7a5fbc9bcba5325a45297a4ba00091f39a63a1ed Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:46 -0800 Subject: iommu/ioasid: Introduce a helper to check for valid PASIDs Define a pasid_valid() helper to check if a given PASID is valid. [ bp: Massage commit message. ] Suggested-by: Ashok Raj Suggested-by: Jacob Pan Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220207230254.3342514-4-fenghua.yu@intel.com --- include/linux/ioasid.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index e9dacd4b9f6b..2237f64dbaae 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -41,6 +41,10 @@ void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator); int ioasid_set_data(ioasid_t ioasid, void *data); +static inline bool pasid_valid(ioasid_t ioasid) +{ + return ioasid != INVALID_IOASID; +} #else /* !CONFIG_IOASID */ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, @@ -78,5 +82,10 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data) return -ENOTSUPP; } +static inline bool pasid_valid(ioasid_t ioasid) +{ + return false; +} + #endif /* CONFIG_IOASID */ #endif /* __LINUX_IOASID_H */ -- cgit v1.2.3 From a6cbd44093ef305b02ad5f80ed54abf0148a696c Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:47 -0800 Subject: kernel/fork: Initialize mm's PASID A new mm doesn't have a PASID yet when it's created. Initialize the mm's PASID on fork() or for init_mm to INVALID_IOASID (-1). INIT_PASID (0) is reserved for kernel legacy DMA PASID. It cannot be allocated to a user process. Initializing the process's PASID to 0 may cause confusion that's why the process uses the reserved kernel legacy DMA PASID. Initializing the PASID to INVALID_IOASID (-1) explicitly tells the process doesn't have a valid PASID yet. Even though the only user of mm_pasid_init() is in fork.c, define it in as the first of three mm/pasid life cycle functions (init/set/drop) to keep these all together. Suggested-by: Dave Hansen Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-5-fenghua.yu@intel.com --- include/linux/sched/mm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index aa5f09ca5bcf..c74d1edbac2f 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -8,6 +8,7 @@ #include #include #include +#include /* * Routines for handling mm_structs @@ -433,4 +434,13 @@ static inline void membarrier_update_current_mm(struct mm_struct *next_mm) } #endif +#ifdef CONFIG_IOMMU_SVA +static inline void mm_pasid_init(struct mm_struct *mm) +{ + mm->pasid = INVALID_IOASID; +} +#else +static inline void mm_pasid_init(struct mm_struct *mm) {} +#endif + #endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From 8cb37a5974a48569aab8a1736d21399fddbdbdb2 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 31 Jan 2022 10:05:20 +0100 Subject: stack: Introduce CONFIG_RANDOMIZE_KSTACK_OFFSET The randomize_kstack_offset feature is unconditionally compiled in when the architecture supports it. To add constraints on compiler versions, we require a dedicated Kconfig variable. Therefore, introduce RANDOMIZE_KSTACK_OFFSET. Furthermore, this option is now also configurable by EXPERT kernels: while the feature is supposed to have zero performance overhead when disabled, due to its use of static branches, there are few cases where giving a distribution the option to disable the feature entirely makes sense. For example, in very resource constrained environments, which would never enable the feature to begin with, in which case the additional kernel code size increase would be redundant. Signed-off-by: Marco Elver Reviewed-by: Nathan Chancellor Acked-by: Peter Zijlstra (Intel) Acked-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220131090521.1947110-1-elver@google.com --- include/linux/randomize_kstack.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index bebc911161b6..91f1b990a3c3 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -2,6 +2,7 @@ #ifndef _LINUX_RANDOMIZE_KSTACK_H #define _LINUX_RANDOMIZE_KSTACK_H +#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET #include #include #include @@ -50,5 +51,9 @@ void *__builtin_alloca(size_t size); raw_cpu_write(kstack_offset, offset); \ } \ } while (0) +#else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */ +#define add_random_kstack_offset() do { } while (0) +#define choose_random_kstack_offset(rand) do { } while (0) +#endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */ #endif -- cgit v1.2.3 From efa90c11f62e6b7252fb75efe2787056872a627c Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 31 Jan 2022 10:05:21 +0100 Subject: stack: Constrain and fix stack offset randomization with Clang builds All supported versions of Clang perform auto-init of __builtin_alloca() when stack auto-init is on (CONFIG_INIT_STACK_ALL_{ZERO,PATTERN}). add_random_kstack_offset() uses __builtin_alloca() to add a stack offset. This means, when CONFIG_INIT_STACK_ALL_{ZERO,PATTERN} is enabled, add_random_kstack_offset() will auto-init that unused portion of the stack used to add an offset. There are several problems with this: 1. These offsets can be as large as 1023 bytes. Performing memset() on them isn't exactly cheap, and this is done on every syscall entry. 2. Architectures adding add_random_kstack_offset() to syscall entry implemented in C require them to be 'noinstr' (e.g. see x86 and s390). The potential problem here is that a call to memset may occur, which is not noinstr. A x86_64 defconfig kernel with Clang 11 and CONFIG_VMLINUX_VALIDATION shows: | vmlinux.o: warning: objtool: do_syscall_64()+0x9d: call to memset() leaves .noinstr.text section | vmlinux.o: warning: objtool: do_int80_syscall_32()+0xab: call to memset() leaves .noinstr.text section | vmlinux.o: warning: objtool: __do_fast_syscall_32()+0xe2: call to memset() leaves .noinstr.text section | vmlinux.o: warning: objtool: fixup_bad_iret()+0x2f: call to memset() leaves .noinstr.text section Clang 14 (unreleased) will introduce a way to skip alloca initialization via __builtin_alloca_uninitialized() (https://reviews.llvm.org/D115440). Constrain RANDOMIZE_KSTACK_OFFSET to only be enabled if no stack auto-init is enabled, the compiler is GCC, or Clang is version 14+. Use __builtin_alloca_uninitialized() if the compiler provides it, as is done by Clang 14. Link: https://lkml.kernel.org/r/YbHTKUjEejZCLyhX@elver.google.com Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall") Signed-off-by: Marco Elver Reviewed-by: Nathan Chancellor Acked-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220131090521.1947110-2-elver@google.com --- include/linux/randomize_kstack.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index 91f1b990a3c3..1468caf001c0 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -17,8 +17,20 @@ DECLARE_PER_CPU(u32, kstack_offset); * alignment. Also, since this use is being explicitly masked to a max of * 10 bits, stack-clash style attacks are unlikely. For more details see * "VLAs" in Documentation/process/deprecated.rst + * + * The normal __builtin_alloca() is initialized with INIT_STACK_ALL (currently + * only with Clang and not GCC). Initializing the unused area on each syscall + * entry is expensive, and generating an implicit call to memset() may also be + * problematic (such as in noinstr functions). Therefore, if the compiler + * supports it (which it should if it initializes allocas), always use the + * "uninitialized" variant of the builtin. */ -void *__builtin_alloca(size_t size); +#if __has_builtin(__builtin_alloca_uninitialized) +#define __kstack_alloca __builtin_alloca_uninitialized +#else +#define __kstack_alloca __builtin_alloca +#endif + /* * Use, at most, 10 bits of entropy. We explicitly cap this to keep the * "VLA" from being unbounded (see above). 10 bits leaves enough room for @@ -37,7 +49,7 @@ void *__builtin_alloca(size_t size); if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ u32 offset = raw_cpu_read(kstack_offset); \ - u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset)); \ + u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \ /* Keep allocation even after "ptr" loses scope. */ \ asm volatile("" :: "r"(ptr) : "memory"); \ } \ -- cgit v1.2.3 From 481153991c410e322383490527c20b0c41dbf652 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 10 Feb 2022 22:33:41 +0100 Subject: i2c: don't expose function which is only used internally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i2c_setup_smbus_alert() is only needed within the I2C core, so no need to expose it to other modules. Signed-off-by: Wolfram Sang Reviewed-by: Niklas Söderlund Signed-off-by: Wolfram Sang --- include/linux/i2c-smbus.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h index 95cf902e0bda..ced1c6ead52a 100644 --- a/include/linux/i2c-smbus.h +++ b/include/linux/i2c-smbus.h @@ -30,14 +30,6 @@ struct i2c_client *i2c_new_smbus_alert_device(struct i2c_adapter *adapter, struct i2c_smbus_alert_setup *setup); int i2c_handle_smbus_alert(struct i2c_client *ara); -#if IS_ENABLED(CONFIG_I2C_SMBUS) -int i2c_setup_smbus_alert(struct i2c_adapter *adap); -#else -static inline int i2c_setup_smbus_alert(struct i2c_adapter *adap) -{ - return 0; -} -#endif #if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_I2C_SLAVE) struct i2c_client *i2c_new_slave_host_notify_device(struct i2c_adapter *adapter); void i2c_free_slave_host_notify_device(struct i2c_client *client); -- cgit v1.2.3 From 701fac40384f07197b106136012804c3cae0b3de Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:48 -0800 Subject: iommu/sva: Assign a PASID to mm on PASID allocation and free it on mm exit PASIDs are process-wide. It was attempted to use refcounted PASIDs to free them when the last thread drops the refcount. This turned out to be complex and error prone. Given the fact that the PASID space is 20 bits, which allows up to 1M processes to have a PASID associated concurrently, PASID resource exhaustion is not a realistic concern. Therefore, it was decided to simplify the approach and stick with lazy on demand PASID allocation, but drop the eager free approach and make an allocated PASID's lifetime bound to the lifetime of the process. Get rid of the refcounting mechanisms and replace/rename the interfaces to reflect this new approach. [ bp: Massage commit message. ] Suggested-by: Dave Hansen Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Lu Baolu Reviewed-by: Jacob Pan Reviewed-by: Thomas Gleixner Acked-by: Joerg Roedel Link: https://lore.kernel.org/r/20220207230254.3342514-6-fenghua.yu@intel.com --- include/linux/ioasid.h | 12 ++---------- include/linux/sched/mm.h | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index 2237f64dbaae..af1c9d62e642 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -34,8 +34,7 @@ struct ioasid_allocator_ops { #if IS_ENABLED(CONFIG_IOASID) ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, void *private); -void ioasid_get(ioasid_t ioasid); -bool ioasid_put(ioasid_t ioasid); +void ioasid_free(ioasid_t ioasid); void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)); int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); @@ -53,14 +52,7 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, return INVALID_IOASID; } -static inline void ioasid_get(ioasid_t ioasid) -{ -} - -static inline bool ioasid_put(ioasid_t ioasid) -{ - return false; -} +static inline void ioasid_free(ioasid_t ioasid) { } static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)) diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index c74d1edbac2f..a80356e9dc69 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -439,8 +439,24 @@ static inline void mm_pasid_init(struct mm_struct *mm) { mm->pasid = INVALID_IOASID; } + +/* Associate a PASID with an mm_struct: */ +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) +{ + mm->pasid = pasid; +} + +static inline void mm_pasid_drop(struct mm_struct *mm) +{ + if (pasid_valid(mm->pasid)) { + ioasid_free(mm->pasid); + mm->pasid = INVALID_IOASID; + } +} #else static inline void mm_pasid_init(struct mm_struct *mm) {} +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {} +static inline void mm_pasid_drop(struct mm_struct *mm) {} #endif #endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From a3d29e8291b622780eb6e4e3eeaf2b24ec78fd43 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 7 Feb 2022 15:02:50 -0800 Subject: sched: Define and initialize a flag to identify valid PASID in the task Add a new single bit field to the task structure to track whether this task has initialized the IA32_PASID MSR to the mm's PASID. Initialize the field to zero when creating a new task with fork/clone. Signed-off-by: Peter Zijlstra Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-8-fenghua.yu@intel.com --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 75ba8aa60248..4e5de3aed410 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -938,6 +938,9 @@ struct task_struct { /* Recursion prevention for eventfd_signal() */ unsigned in_eventfd_signal:1; #endif +#ifdef CONFIG_IOMMU_SVA + unsigned pasid_activated:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ -- cgit v1.2.3 From 45ec846c1cd11835a29c85645065115dd791aa45 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Feb 2022 16:25:58 +0000 Subject: irqdomain: Let irq_domain_set_{info,hwirq_and_chip} take a const irq_chip In order to let a const irqchip be fed to the irqchip layer, adjust the various prototypes. An extra cast in irq_domain_set_hwirq_and_chip() is required to avoid a warning. Signed-off-by: Marc Zyngier Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20220209162607.1118325-2-maz@kernel.org --- include/linux/irqdomain.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index be25a33293e5..00d577f90883 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -479,7 +479,8 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest); extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq); extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, + const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name); extern void irq_domain_reset_irq_data(struct irq_data *irq_data); @@ -522,7 +523,7 @@ extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, - struct irq_chip *chip, + const struct irq_chip *chip, void *chip_data); extern void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq, -- cgit v1.2.3 From 393e1280f765661cf39785e967676a4e57324126 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Feb 2022 16:25:59 +0000 Subject: genirq: Allow irq_chip registration functions to take a const irq_chip In order to let a const irqchip be fed to the irqchip layer, adjust the various prototypes. An extra cast in irq_set_chip()() is required to avoid a warning. Signed-off-by: Marc Zyngier Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20220209162607.1118325-3-maz@kernel.org --- include/linux/irq.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 2cb2e2ac2703..f92788ccdba2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -710,10 +710,11 @@ extern struct irq_chip no_irq_chip; extern struct irq_chip dummy_irq_chip; extern void -irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, +irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip, irq_flow_handler_t handle, const char *name); -static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *chip, +static inline void irq_set_chip_and_handler(unsigned int irq, + const struct irq_chip *chip, irq_flow_handler_t handle) { irq_set_chip_and_handler_name(irq, chip, handle, NULL); @@ -803,7 +804,7 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq) } /* Set/get chip/data for an IRQ: */ -extern int irq_set_chip(unsigned int irq, struct irq_chip *chip); +extern int irq_set_chip(unsigned int irq, const struct irq_chip *chip); extern int irq_set_handler_data(unsigned int irq, void *data); extern int irq_set_chip_data(unsigned int irq, void *data); extern int irq_set_irq_type(unsigned int irq, unsigned int type); -- cgit v1.2.3 From 3fb212a042fbd8eccbb2af1852e03ed7757b9600 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Feb 2022 16:26:04 +0000 Subject: irqchip/versatile-fpga: Switch to dynamic chip name output Move the name output to the relevant callback, which allows us some nice cleanups (mostly owing to the fact that the driver is now DT only. We also drop a random include directive from the ftintc010 driver. Signed-off-by: Marc Zyngier Reviewed-by: Linus Walleij Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20220209162607.1118325-8-maz@kernel.org --- include/linux/irqchip/versatile-fpga.h | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 include/linux/irqchip/versatile-fpga.h (limited to 'include') diff --git a/include/linux/irqchip/versatile-fpga.h b/include/linux/irqchip/versatile-fpga.h deleted file mode 100644 index a978fc8c7996..000000000000 --- a/include/linux/irqchip/versatile-fpga.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef PLAT_FPGA_IRQ_H -#define PLAT_FPGA_IRQ_H - -struct device_node; -struct pt_regs; - -void fpga_handle_irq(struct pt_regs *regs); -void fpga_irq_init(void __iomem *, const char *, int, int, u32, - struct device_node *node); -int fpga_irq_of_init(struct device_node *node, - struct device_node *parent); - -#endif -- cgit v1.2.3 From 9ceaf6f76b203682bb6100e14b3d7da4c0bedde8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Feb 2022 11:15:53 -0800 Subject: bonding: fix data-races around agg_select_timer syzbot reported that two threads might write over agg_select_timer at the same time. Make agg_select_timer atomic to fix the races. BUG: KCSAN: data-race in bond_3ad_initiate_agg_selection / bond_3ad_state_machine_handler read to 0xffff8881242aea90 of 4 bytes by task 1846 on cpu 1: bond_3ad_state_machine_handler+0x99/0x2810 drivers/net/bonding/bond_3ad.c:2317 process_one_work+0x3f6/0x960 kernel/workqueue.c:2307 worker_thread+0x616/0xa70 kernel/workqueue.c:2454 kthread+0x1bf/0x1e0 kernel/kthread.c:377 ret_from_fork+0x1f/0x30 write to 0xffff8881242aea90 of 4 bytes by task 25910 on cpu 0: bond_3ad_initiate_agg_selection+0x18/0x30 drivers/net/bonding/bond_3ad.c:1998 bond_open+0x658/0x6f0 drivers/net/bonding/bond_main.c:3967 __dev_open+0x274/0x3a0 net/core/dev.c:1407 dev_open+0x54/0x190 net/core/dev.c:1443 bond_enslave+0xcef/0x3000 drivers/net/bonding/bond_main.c:1937 do_set_master net/core/rtnetlink.c:2532 [inline] do_setlink+0x94f/0x2500 net/core/rtnetlink.c:2736 __rtnl_newlink net/core/rtnetlink.c:3414 [inline] rtnl_newlink+0xfeb/0x13e0 net/core/rtnetlink.c:3529 rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5594 netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2494 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5612 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x602/0x6d0 net/netlink/af_netlink.c:1343 netlink_sendmsg+0x728/0x850 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:705 [inline] sock_sendmsg net/socket.c:725 [inline] ____sys_sendmsg+0x39a/0x510 net/socket.c:2413 ___sys_sendmsg net/socket.c:2467 [inline] __sys_sendmsg+0x195/0x230 net/socket.c:2496 __do_sys_sendmsg net/socket.c:2505 [inline] __se_sys_sendmsg net/socket.c:2503 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2503 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x00000050 -> 0x0000004f Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 25910 Comm: syz-executor.1 Tainted: G W 5.17.0-rc4-syzkaller-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Jay Vosburgh Cc: Veaceslav Falico Signed-off-by: David S. Miller --- include/net/bond_3ad.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index 38785d48baff..184105d68294 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -262,7 +262,7 @@ struct ad_system { struct ad_bond_info { struct ad_system system; /* 802.3ad system structure */ struct bond_3ad_stats stats; - u32 agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */ + atomic_t agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */ u16 aggregator_identifier; }; -- cgit v1.2.3 From 5e50f5d4ff31e95599d695df1f0a4e7d2d6fef99 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Sat, 12 Feb 2022 18:59:21 +0100 Subject: security: add sctp_assoc_established hook security_sctp_assoc_established() is added to replace security_inet_conn_established() called in sctp_sf_do_5_1E_ca(), so that asoc can be accessed in security subsystem and save the peer secid to asoc->peer_secid. Fixes: 72e89f50084c ("security: Add support for SCTP security hooks") Reported-by: Prashanth Prahlad Based-on-patch-by: Xin Long Reviewed-by: Xin Long Tested-by: Richard Haines Signed-off-by: Ondrej Mosnacek Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 ++ include/linux/lsm_hooks.h | 5 +++++ include/linux/security.h | 8 ++++++++ 3 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index a5a724c308d8..45931d81ccc3 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -332,6 +332,8 @@ LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname, struct sockaddr *address, int addrlen) LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc, struct sock *sk, struct sock *newsk) +LSM_HOOK(int, 0, sctp_assoc_established, struct sctp_association *asoc, + struct sk_buff *skb) #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 3bf5c658bc44..419b5febc3ca 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1046,6 +1046,11 @@ * @asoc pointer to current sctp association structure. * @sk pointer to current sock structure. * @newsk pointer to new sock structure. + * @sctp_assoc_established: + * Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet + * to the security module. + * @asoc pointer to sctp association structure. + * @skb pointer to skbuff of association packet. * * Security hooks for Infiniband * diff --git a/include/linux/security.h b/include/linux/security.h index 6d72772182c8..25b3ef71f495 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1422,6 +1422,8 @@ int security_sctp_bind_connect(struct sock *sk, int optname, struct sockaddr *address, int addrlen); void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk); +int security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb); #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, @@ -1641,6 +1643,12 @@ static inline void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *newsk) { } + +static inline int security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ + return 0; +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND -- cgit v1.2.3 From 8487614a8a8a70d44957a11693dbefd8350f402c Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 2 Feb 2022 20:17:32 -0600 Subject: dt-bindings: rtc: sun6i: Add H616, R329, and D1 support These new RTC variants all have a single alarm, like the R40 variant. For the new SoCs, start requiring a complete list of input clocks. The H616 has three required clocks. The R329 also has three required clocks (but one is different), plus an optional crystal oscillator input. The D1 RTC is identical to the one in the R329. And since these new SoCs will have a well-defined output clock order as well, they do not need the clock-output-names property. Signed-off-by: Samuel Holland Reviewed-by: Rob Herring Reviewed-by: Maxime Ripard Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220203021736.13434-3-samuel@sholland.org --- include/dt-bindings/clock/sun6i-rtc.h | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 include/dt-bindings/clock/sun6i-rtc.h (limited to 'include') diff --git a/include/dt-bindings/clock/sun6i-rtc.h b/include/dt-bindings/clock/sun6i-rtc.h new file mode 100644 index 000000000000..c845493e4d37 --- /dev/null +++ b/include/dt-bindings/clock/sun6i-rtc.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: (GPL-2.0+ or MIT) */ + +#ifndef _DT_BINDINGS_CLK_SUN6I_RTC_H_ +#define _DT_BINDINGS_CLK_SUN6I_RTC_H_ + +#define CLK_OSC32K 0 +#define CLK_OSC32K_FANOUT 1 +#define CLK_IOSC 2 + +#endif /* _DT_BINDINGS_CLK_SUN6I_RTC_H_ */ -- cgit v1.2.3 From b62a8486de3ab1d7c2353ec422b9cca3abfcfbcd Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:52 +0000 Subject: elfcore: Replace CONFIG_{IA64, UML} checks with a new option As arm64 is about to introduce MTE-specific phdrs in the core dump, add a common CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS option currently selectable by UML_X86 and IA64. Signed-off-by: Catalin Marinas Cc: Eric Biederman Link: https://lore.kernel.org/r/20220131165456.2160675-2-catalin.marinas@arm.com Signed-off-by: Will Deacon --- include/linux/elfcore.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 746e081879a5..f8e206e82476 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -114,7 +114,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg #endif } -#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64) +#ifdef CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its @@ -149,6 +149,6 @@ static inline size_t elf_core_extra_data_size(void) { return 0; } -#endif +#endif /* CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS */ #endif /* _LINUX_ELFCORE_H */ -- cgit v1.2.3 From 761b9b366cec0c81a1cd80930f00611d86521d1b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:53 +0000 Subject: elf: Introduce the ARM MTE ELF segment type Memory tags will be dumped in the core file as segments with their own type. Discussions with the binutils and the generic ABI community settled on using new definitions in the PT_*PROC space (and to be documented in the processor-specific ABIs). Introduce PT_ARM_MEMTAG_MTE as (PT_LOPROC + 0x1). Not included in this patch since there is no upstream support but the CHERI/BSD community will also reserve: #define PT_ARM_MEMTAG_CHERI (PT_LOPROC + 0x2) #define PT_RISCV_MEMTAG_CHERI (PT_LOPROC + 0x3) Signed-off-by: Catalin Marinas Acked-by: Luis Machado Link: https://lore.kernel.org/r/20220131165456.2160675-3-catalin.marinas@arm.com Signed-off-by: Will Deacon --- include/uapi/linux/elf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 61bf4774b8f2..fe8e5b74cb39 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -40,6 +40,9 @@ typedef __s64 Elf64_Sxword; #define PT_GNU_STACK (PT_LOOS + 0x474e551) +/* ARM MTE memory tag segment type */ +#define PT_ARM_MEMTAG_MTE (PT_LOPROC + 0x1) + /* * Extended Numbering * -- cgit v1.2.3 From f41b6be1ebdae452819551ed35a46e6fd32bf467 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Fri, 4 Feb 2022 10:33:51 +0100 Subject: tee: remove unused tee_shm_pool_alloc_res_mem() None of the drivers in the TEE subsystem uses tee_shm_pool_alloc_res_mem() so remove the function. Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'include') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 5e1533ee3785..6b0f0d01ebdf 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -278,36 +278,6 @@ static inline void tee_shm_pool_mgr_destroy(struct tee_shm_pool_mgr *poolm) poolm->ops->destroy_poolmgr(poolm); } -/** - * struct tee_shm_pool_mem_info - holds information needed to create a shared - * memory pool - * @vaddr: Virtual address of start of pool - * @paddr: Physical address of start of pool - * @size: Size in bytes of the pool - */ -struct tee_shm_pool_mem_info { - unsigned long vaddr; - phys_addr_t paddr; - size_t size; -}; - -/** - * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved - * memory range - * @priv_info: Information for driver private shared memory pool - * @dmabuf_info: Information for dma-buf shared memory pool - * - * Start and end of pools will must be page aligned. - * - * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied - * in @dmabuf, others will use the range provided by @priv. - * - * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. - */ -struct tee_shm_pool * -tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info, - struct tee_shm_pool_mem_info *dmabuf_info); - /** * tee_shm_pool_free() - Free a shared memory pool * @pool: The shared memory pool to free -- cgit v1.2.3 From 71cc47d4cc1f7a333584e0f2f7c863c71a6d3ced Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Fri, 4 Feb 2022 10:33:52 +0100 Subject: tee: add tee_shm_alloc_user_buf() Adds a new function tee_shm_alloc_user_buf() for user mode allocations, replacing passing the flags TEE_SHM_MAPPED | TEE_SHM_DMA_BUF to tee_shm_alloc(). Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 6b0f0d01ebdf..a4393c8c38f3 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2015-2016, Linaro Limited + * Copyright (c) 2015-2016 Linaro Limited */ #ifndef __TEE_DRV_H -- cgit v1.2.3 From d88e0493a054c9fe72ade41a42d42e958ee6503d Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Fri, 4 Feb 2022 10:33:53 +0100 Subject: tee: simplify shm pool handling Replaces the shared memory pool based on two pools with a single pool. The alloc() function pointer in struct tee_shm_pool_ops gets another parameter, align. This makes it possible to make less than page aligned allocations from the optional reserved shared memory pool while still making user space allocations page aligned. With in practice unchanged behaviour using only a single pool for bookkeeping. The allocation algorithm in the static OP-TEE shared memory pool is changed from best-fit to first-fit since only the latter supports an alignment parameter. The best-fit algorithm was previously the default choice and not a conscious one. The optee and amdtee drivers are updated as needed to work with this changed pool handling. This also removes OPTEE_SHM_NUM_PRIV_PAGES which becomes obsolete with this change as the private pages can be mixed with the payload pages. The OP-TEE driver changes minimum alignment for argument struct from 8 bytes to 512 bytes. A typical OP-TEE private shm allocation is 224 bytes (argument struct with 6 parameters, needed for open session). So with an alignment of 512 well waste a bit more than 50%. Before this we had a single page reserved for this so worst case usage compared to that would be 3 pages instead of 1 page. However, this worst case only occurs if there is a high pressure from multiple threads on secure world. All in all this should scale up and down better than fixed boundaries. Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 60 +++++++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index a4393c8c38f3..ed641dc314bd 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2015-2016 Linaro Limited + * Copyright (c) 2015-2022 Linaro Limited */ #ifndef __TEE_DRV_H @@ -221,62 +221,39 @@ struct tee_shm { }; /** - * struct tee_shm_pool_mgr - shared memory manager + * struct tee_shm_pool - shared memory pool * @ops: operations * @private_data: private data for the shared memory manager */ -struct tee_shm_pool_mgr { - const struct tee_shm_pool_mgr_ops *ops; +struct tee_shm_pool { + const struct tee_shm_pool_ops *ops; void *private_data; }; /** - * struct tee_shm_pool_mgr_ops - shared memory pool manager operations + * struct tee_shm_pool_ops - shared memory pool operations * @alloc: called when allocating shared memory * @free: called when freeing shared memory - * @destroy_poolmgr: called when destroying the pool manager + * @destroy_pool: called when destroying the pool */ -struct tee_shm_pool_mgr_ops { - int (*alloc)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm, - size_t size); - void (*free)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm); - void (*destroy_poolmgr)(struct tee_shm_pool_mgr *poolmgr); +struct tee_shm_pool_ops { + int (*alloc)(struct tee_shm_pool *pool, struct tee_shm *shm, + size_t size, size_t align); + void (*free)(struct tee_shm_pool *pool, struct tee_shm *shm); + void (*destroy_pool)(struct tee_shm_pool *pool); }; -/** - * tee_shm_pool_alloc() - Create a shared memory pool from shm managers - * @priv_mgr: manager for driver private shared memory allocations - * @dmabuf_mgr: manager for dma-buf shared memory allocations - * - * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied - * in @dmabuf, others will use the range provided by @priv. - * - * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. - */ -struct tee_shm_pool *tee_shm_pool_alloc(struct tee_shm_pool_mgr *priv_mgr, - struct tee_shm_pool_mgr *dmabuf_mgr); - /* - * tee_shm_pool_mgr_alloc_res_mem() - Create a shm manager for reserved - * memory + * tee_shm_pool_alloc_res_mem() - Create a shm manager for reserved memory * @vaddr: Virtual address of start of pool * @paddr: Physical address of start of pool * @size: Size in bytes of the pool * - * @returns pointer to a 'struct tee_shm_pool_mgr' or an ERR_PTR on failure. - */ -struct tee_shm_pool_mgr *tee_shm_pool_mgr_alloc_res_mem(unsigned long vaddr, - phys_addr_t paddr, - size_t size, - int min_alloc_order); - -/** - * tee_shm_pool_mgr_destroy() - Free a shared memory manager + * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. */ -static inline void tee_shm_pool_mgr_destroy(struct tee_shm_pool_mgr *poolm) -{ - poolm->ops->destroy_poolmgr(poolm); -} +struct tee_shm_pool *tee_shm_pool_alloc_res_mem(unsigned long vaddr, + phys_addr_t paddr, size_t size, + int min_alloc_order); /** * tee_shm_pool_free() - Free a shared memory pool @@ -285,7 +262,10 @@ static inline void tee_shm_pool_mgr_destroy(struct tee_shm_pool_mgr *poolm) * The must be no remaining shared memory allocated from this pool when * this function is called. */ -void tee_shm_pool_free(struct tee_shm_pool *pool); +static inline void tee_shm_pool_free(struct tee_shm_pool *pool) +{ + pool->ops->destroy_pool(pool); +} /** * tee_get_drvdata() - Return driver_data pointer -- cgit v1.2.3 From 5d41f1b3e3282909b6bbceacb9aebe1d3c849a49 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Fri, 4 Feb 2022 10:33:54 +0100 Subject: tee: replace tee_shm_alloc() tee_shm_alloc() is replaced by three new functions, tee_shm_alloc_user_buf() - for user mode allocations, replacing passing the flags TEE_SHM_MAPPED | TEE_SHM_DMA_BUF tee_shm_alloc_kernel_buf() - for kernel mode allocations, slightly optimized compared to using the flags TEE_SHM_MAPPED | TEE_SHM_DMA_BUF. tee_shm_alloc_priv_buf() - primarily for TEE driver internal use. This also makes the interface easier to use as we can get rid of the somewhat hard to use flags parameter. The TEE subsystem and the TEE drivers are updated to use the new functions instead. Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index ed641dc314bd..7f038f8787c7 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -273,21 +273,7 @@ static inline void tee_shm_pool_free(struct tee_shm_pool *pool) */ void *tee_get_drvdata(struct tee_device *teedev); -/** - * tee_shm_alloc() - Allocate shared memory - * @ctx: Context that allocates the shared memory - * @size: Requested size of shared memory - * @flags: Flags setting properties for the requested shared memory. - * - * Memory allocated as global shared memory is automatically freed when the - * TEE file pointer is closed. The @flags field uses the bits defined by - * TEE_SHM_* above. TEE_SHM_MAPPED must currently always be set. If - * TEE_SHM_DMA_BUF global shared memory will be allocated and associated - * with a dma-buf handle, else driver private memory. - * - * @returns a pointer to 'struct tee_shm' - */ -struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags); +struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size); struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size); /** -- cgit v1.2.3 From 056d3fed3d1ff3f5d699be337f048f9eed2befaf Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Fri, 4 Feb 2022 10:33:56 +0100 Subject: tee: add tee_shm_register_{user,kernel}_buf() Adds the two new functions tee_shm_register_user_buf() and tee_shm_register_kernel_buf() which should be used instead of the old tee_shm_register(). This avoids having the caller supplying the flags parameter which exposes a bit more than desired of the internals of the TEE subsystem. Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 7f038f8787c7..c9d2cc32a5ed 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -287,6 +287,8 @@ struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size); */ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, size_t length, u32 flags); +struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx, + void *addr, size_t length); /** * tee_shm_is_registered() - Check if shared memory object in registered in TEE -- cgit v1.2.3 From 53e16519c2eccdb2e1b123405466a29aaea1132e Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Fri, 4 Feb 2022 10:33:58 +0100 Subject: tee: replace tee_shm_register() tee_shm_register() is replaced by the previously introduced functions tee_shm_register_user_buf() and tee_shm_register_kernel_buf(). Since there are not external callers left we can remove tee_shm_register() and refactor the remains. Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index c9d2cc32a5ed..a3b663ef0694 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -276,17 +276,6 @@ void *tee_get_drvdata(struct tee_device *teedev); struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size); struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size); -/** - * tee_shm_register() - Register shared memory buffer - * @ctx: Context that registers the shared memory - * @addr: Address is userspace of the shared buffer - * @length: Length of the shared buffer - * @flags: Flags setting properties for the requested shared memory. - * - * @returns a pointer to 'struct tee_shm' - */ -struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, - size_t length, u32 flags); struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx, void *addr, size_t length); -- cgit v1.2.3 From a45ea4efa358577c623d7353a6ba9af3c17f6ca0 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Fri, 4 Feb 2022 10:33:59 +0100 Subject: tee: refactor TEE_SHM_* flags Removes the redundant TEE_SHM_DMA_BUF, TEE_SHM_EXT_DMA_BUF, TEE_SHM_MAPPED and TEE_SHM_KERNEL_MAPPED flags. TEE_SHM_REGISTER is renamed to TEE_SHM_DYNAMIC in order to better match its usage. Assigns new values to the remaining flags to void gaps. Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index a3b663ef0694..911cad324acc 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -20,14 +20,11 @@ * specific TEE driver. */ -#define TEE_SHM_MAPPED BIT(0) /* Memory mapped by the kernel */ -#define TEE_SHM_DMA_BUF BIT(1) /* Memory with dma-buf handle */ -#define TEE_SHM_EXT_DMA_BUF BIT(2) /* Memory with dma-buf handle */ -#define TEE_SHM_REGISTER BIT(3) /* Memory registered in secure world */ -#define TEE_SHM_USER_MAPPED BIT(4) /* Memory mapped in user space */ -#define TEE_SHM_POOL BIT(5) /* Memory allocated from pool */ -#define TEE_SHM_KERNEL_MAPPED BIT(6) /* Memory mapped in kernel space */ -#define TEE_SHM_PRIV BIT(7) /* Memory private to TEE driver */ +#define TEE_SHM_DYNAMIC BIT(0) /* Dynamic shared memory registered */ + /* in secure world */ +#define TEE_SHM_USER_MAPPED BIT(1) /* Memory mapped in user space */ +#define TEE_SHM_POOL BIT(2) /* Memory allocated from pool */ +#define TEE_SHM_PRIV BIT(3) /* Memory private to TEE driver */ struct device; struct tee_device; @@ -280,13 +277,13 @@ struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx, void *addr, size_t length); /** - * tee_shm_is_registered() - Check if shared memory object in registered in TEE + * tee_shm_is_dynamic() - Check if shared memory object is of the dynamic kind * @shm: Shared memory handle - * @returns true if object is registered in TEE + * @returns true if object is dynamic shared memory */ -static inline bool tee_shm_is_registered(struct tee_shm *shm) +static inline bool tee_shm_is_dynamic(struct tee_shm *shm) { - return shm && (shm->flags & TEE_SHM_REGISTER); + return shm && (shm->flags & TEE_SHM_DYNAMIC); } /** -- cgit v1.2.3 From 8d23a54f5beea59b560855fb571e5d73d783e0b4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Feb 2022 19:02:12 +0200 Subject: net: bridge: switchdev: differentiate new VLANs from changed ones br_switchdev_port_vlan_add() currently emits a SWITCHDEV_PORT_OBJ_ADD event with a SWITCHDEV_OBJ_ID_PORT_VLAN for 2 distinct cases: - a struct net_bridge_vlan got created - an existing struct net_bridge_vlan was modified This makes it impossible for switchdev drivers to properly balance PORT_OBJ_ADD with PORT_OBJ_DEL events, so if we want to allow that to happen, we must provide a way for drivers to distinguish between a VLAN with changed flags and a new one. Annotate struct switchdev_obj_port_vlan with a "bool changed" that distinguishes the 2 cases above. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/switchdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d353793dfeb5..92cc763991e9 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -81,6 +81,13 @@ struct switchdev_obj_port_vlan { struct switchdev_obj obj; u16 flags; u16 vid; + /* If set, the notifier signifies a change of one of the following + * flags for a VLAN that already exists: + * - BRIDGE_VLAN_INFO_PVID + * - BRIDGE_VLAN_INFO_UNTAGGED + * Entries with BRIDGE_VLAN_INFO_BRENTRY unset are not notified at all. + */ + bool changed; }; #define SWITCHDEV_OBJ_PORT_VLAN(OBJ) \ -- cgit v1.2.3 From c4076cdd21f8d68a96f1e7124bd8915c7e31a474 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Feb 2022 19:02:16 +0200 Subject: net: switchdev: introduce switchdev_handle_port_obj_{add,del} for foreign interfaces The switchdev_handle_port_obj_add() helper is good for replicating a port object on the lower interfaces of @dev, if that object was emitted on a bridge, or on a bridge port that is a LAG. However, drivers that use this helper limit themselves to a box from which they can no longer intercept port objects notified on neighbor ports ("foreign interfaces"). One such driver is DSA, where software bridging with foreign interfaces such as standalone NICs or Wi-Fi APs is an important use case. There, a VLAN installed on a neighbor bridge port roughly corresponds to a forwarding VLAN installed on the DSA switch's CPU port. To support this use case while also making use of the benefits of the switchdev_handle_* replication helper for port objects, introduce a new variant of these functions that crawls through the neighbor ports of @dev, in search of potentially compatible switchdev ports that are interested in the event. The strategy is identical to switchdev_handle_fdb_event_to_device(): if @dev wasn't a switchdev interface, then go one step upper, and recursively call this function on the bridge that this port belongs to. At the next recursion step, __switchdev_handle_port_obj_add() will iterate through the bridge's lower interfaces. Among those, some will be switchdev interfaces, and one will be the original @dev that we came from. To prevent infinite recursion, we must suppress reentry into the original @dev, and just call the @add_cb for the switchdev_interfaces. It looks like this: br0 / | \ / | \ / | \ swp0 swp1 eth0 1. __switchdev_handle_port_obj_add(eth0) -> check_cb(eth0) returns false -> eth0 has no lower interfaces -> eth0's bridge is br0 -> switchdev_lower_dev_find(br0, check_cb, foreign_dev_check_cb)) finds br0 2. __switchdev_handle_port_obj_add(br0) -> check_cb(br0) returns false -> netdev_for_each_lower_dev -> check_cb(swp0) returns true, so we don't skip this interface 3. __switchdev_handle_port_obj_add(swp0) -> check_cb(swp0) returns true, so we call add_cb(swp0) (back to netdev_for_each_lower_dev from 2) -> check_cb(swp1) returns true, so we don't skip this interface 4. __switchdev_handle_port_obj_add(swp1) -> check_cb(swp1) returns true, so we call add_cb(swp1) (back to netdev_for_each_lower_dev from 2) -> check_cb(eth0) returns false, so we skip this interface to avoid infinite recursion Note: eth0 could have been a LAG, and we don't want to suppress the recursion through its lowers if those exist, so when check_cb() returns false, we still call switchdev_lower_dev_find() to estimate whether there's anything worth a recursion beneath that LAG. Using check_cb() and foreign_dev_check_cb(), switchdev_lower_dev_find() not only figures out whether the lowers of the LAG are switchdev, but also whether they actively offload the LAG or not (whether the LAG is "foreign" to the switchdev interface or not). The port_obj_info->orig_dev is preserved across recursive calls, so switchdev drivers still know on which device was this notification originally emitted. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/switchdev.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 92cc763991e9..c32e1c8f79ec 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -324,11 +324,26 @@ int switchdev_handle_port_obj_add(struct net_device *dev, int (*add_cb)(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj, struct netlink_ext_ack *extack)); +int switchdev_handle_port_obj_add_foreign(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack)); int switchdev_handle_port_obj_del(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), int (*del_cb)(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj)); +int switchdev_handle_port_obj_del_foreign(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj)); int switchdev_handle_port_attr_set(struct net_device *dev, struct switchdev_notifier_port_attr_info *port_attr_info, @@ -447,6 +462,18 @@ switchdev_handle_port_obj_add(struct net_device *dev, return 0; } +static inline int switchdev_handle_port_obj_add_foreign(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack)) +{ + return 0; +} + static inline int switchdev_handle_port_obj_del(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, @@ -457,6 +484,18 @@ switchdev_handle_port_obj_del(struct net_device *dev, return 0; } +static inline int +switchdev_handle_port_obj_del_foreign(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj)) +{ + return 0; +} + static inline int switchdev_handle_port_attr_set(struct net_device *dev, struct switchdev_notifier_port_attr_info *port_attr_info, -- cgit v1.2.3 From 134ef2388e7f3271d13223decdb5e45b0f84489f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Feb 2022 19:02:17 +0200 Subject: net: dsa: add explicit support for host bridge VLANs Currently, DSA programs VLANs on shared (DSA and CPU) ports each time it does so on user ports. This is good for basic functionality but has several limitations: - the VLAN group which must reach the CPU may be radically different from the VLAN group that must be autonomously forwarded by the switch. In other words, the admin may want to isolate noisy stations and avoid traffic from them going to the control processor of the switch, where it would just waste useless cycles. The bridge already supports independent control of VLAN groups on bridge ports and on the bridge itself, and when VLAN-aware, it will drop packets in software anyway if their VID isn't added as a 'self' entry towards the bridge device. - Replaying host FDB entries may depend, for some drivers like mv88e6xxx, on replaying the host VLANs as well. The 2 VLAN groups are approximately the same in most regular cases, but there are corner cases when timing matters, and DSA's approximation of replicating VLANs on shared ports simply does not work. - If a user makes the bridge (implicitly the CPU port) join a VLAN by accident, there is no way for the CPU port to isolate itself from that noisy VLAN except by rebooting the system. This is because for each VLAN added on a user port, DSA will add it on shared ports too, but for each VLAN deletion on a user port, it will remain installed on shared ports, since DSA has no good indication of whether the VLAN is still in use or not. Now that the bridge driver emits well-balanced SWITCHDEV_OBJ_ID_PORT_VLAN addition and removal events, DSA has a simple and straightforward task of separating the bridge port VLANs (these have an orig_dev which is a DSA slave interface, or a LAG interface) from the host VLANs (these have an orig_dev which is a bridge interface), and to keep a simple reference count of each VID on each shared port. Forwarding VLANs must be installed on the bridge ports and on all DSA ports interconnecting them. We don't have a good view of the exact topology, so we simply install forwarding VLANs on all DSA ports, which is what has been done until now. Host VLANs must be installed primarily on the dedicated CPU port of each bridge port. More subtly, they must also be installed on upstream-facing and downstream-facing DSA ports that are connecting the bridge ports and the CPU. This ensures that the mv88e6xxx's problem (VID of host FDB entry may be absent from VTU) is still addressed even if that switch is in a cross-chip setup, and it has no local CPU port. Therefore: - user ports contain only bridge port (forwarding) VLANs, and no refcounting is necessary - DSA ports contain both forwarding and host VLANs. Refcounting is necessary among these 2 types. - CPU ports contain only host VLANs. Refcounting is also necessary. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index fd1f62a6e0a8..85cb9aed4c51 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -312,6 +312,10 @@ struct dsa_port { struct mutex addr_lists_lock; struct list_head fdbs; struct list_head mdbs; + + /* List of VLANs that CPU and DSA ports are members of. */ + struct mutex vlans_lock; + struct list_head vlans; }; /* TODO: ideally DSA ports would have a single dp->link_dp member, @@ -332,6 +336,12 @@ struct dsa_mac_addr { struct list_head list; }; +struct dsa_vlan { + u16 vid; + refcount_t refcount; + struct list_head list; +}; + struct dsa_switch { struct device *dev; -- cgit v1.2.3 From a257cacc38718c83cee003487e03197f237f5c3f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:41:02 +0100 Subject: asm-generic: Define CONFIG_HAVE_FUNCTION_DESCRIPTORS Replace HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR by a config option named CONFIG_HAVE_FUNCTION_DESCRIPTORS and use it instead of 'dereference_function_descriptor' macro to know whether an arch has function descriptors. To limit churn in one of the following patches, use an #ifdef/#else construct with empty first part instead of an #ifndef in asm-generic/sections.h On powerpc, make sure the config option matches the ABI used by the compiler with a BUILD_BUG_ON() and add missing _CALL_ELF=2 when calling 'sparse' so that sparse sees the same piece of code as GCC. And include a helper to check whether an arch has function descriptors or not : have_function_descriptors() Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Reviewed-by: Nicholas Piggin Acked-by: Helge Deller Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4a0f11fb0ea74a3197bc44dd7ba25e53a24fd03d.1644928018.git.christophe.leroy@csgroup.eu --- include/asm-generic/sections.h | 8 +++++++- include/linux/kallsyms.h | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 690f741764e1..3ef83e1aebee 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -59,11 +59,17 @@ extern char __noinstr_text_start[], __noinstr_text_end[]; extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ -#ifndef dereference_function_descriptor +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +#else #define dereference_function_descriptor(p) ((void *)(p)) #define dereference_kernel_function_descriptor(p) ((void *)(p)) #endif +static inline bool have_function_descriptors(void) +{ + return IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS); +} + /** * memory_contains - checks if an object is contained within a memory region * @begin: virtual address of the beginning of the memory region diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 4176c7eca7b5..ce1bd2fbf23e 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -48,7 +48,7 @@ static inline int is_ksym_addr(unsigned long addr) static inline void *dereference_symbol_descriptor(void *ptr) { -#ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS struct module *mod; ptr = dereference_kernel_function_descriptor(ptr); -- cgit v1.2.3 From 0dc690e4ef5b901e9d4b53520854fbd5c749e09d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:41:03 +0100 Subject: asm-generic: Define 'func_desc_t' to commonly describe function descriptors We have three architectures using function descriptors, each with its own type and name. Add a common typedef that can be used in generic code. Also add a stub typedef for architecture without function descriptors, to avoid a forest of #ifdefs. It replaces the similar 'func_desc_t' previously defined in arch/powerpc/kernel/module_64.c Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Acked-by: Arnd Bergmann Acked-by: Helge Deller Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f1f91b142b3c1082bdc1586ce71c9bac1e75213c.1644928018.git.christophe.leroy@csgroup.eu --- include/asm-generic/sections.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 3ef83e1aebee..bbf97502470c 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -63,6 +63,11 @@ extern __visible const void __nosave_begin, __nosave_end; #else #define dereference_function_descriptor(p) ((void *)(p)) #define dereference_kernel_function_descriptor(p) ((void *)(p)) + +/* An address is simply the address of the function. */ +typedef struct { + unsigned long addr; +} func_desc_t; #endif static inline bool have_function_descriptors(void) -- cgit v1.2.3 From e1478d8eaf27704db17a44dee4c53696ed01fc9c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:41:04 +0100 Subject: asm-generic: Refactor dereference_[kernel]_function_descriptor() dereference_function_descriptor() and dereference_kernel_function_descriptor() are identical on the three architectures implementing them. Make them common and put them out-of-line in kernel/extable.c which is one of the users and has similar type of functions. Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Reviewed-by: Arnd Bergmann Acked-by: Helge Deller Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/449db09b2eba57f4ab05f80102a67d8675bc8bcd.1644928018.git.christophe.leroy@csgroup.eu --- include/asm-generic/sections.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index bbf97502470c..d0f7bdd2fdf2 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -60,6 +60,8 @@ extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ #ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +void *dereference_function_descriptor(void *ptr); +void *dereference_kernel_function_descriptor(void *ptr); #else #define dereference_function_descriptor(p) ((void *)(p)) #define dereference_kernel_function_descriptor(p) ((void *)(p)) -- cgit v1.2.3 From ba2689234be92024e5635d30fe744f4853ad97db Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 18 Nov 2021 13:59:46 +0000 Subject: arm64: entry: Add vectors that have the bhb mitigation sequences Some CPUs affected by Spectre-BHB need a sequence of branches, or a firmware call to be run before any indirect branch. This needs to go in the vectors. No CPU needs both. While this can be patched in, it would run on all CPUs as there is a single set of vectors. If only one part of a big/little combination is affected, the unaffected CPUs have to run the mitigation too. Create extra vectors that include the sequence. Subsequent patches will allow affected CPUs to select this set of vectors. Later patches will modify the loop count to match what the CPU requires. Reviewed-by: Catalin Marinas Signed-off-by: James Morse --- include/linux/arm-smccc.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 63ccb5252190..220c8c60e021 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -92,6 +92,11 @@ ARM_SMCCC_SMC_32, \ 0, 0x7fff) +#define ARM_SMCCC_ARCH_WORKAROUND_3 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 0x3fff) + #define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ARM_SMCCC_SMC_32, \ -- cgit v1.2.3 From 08bc13d8efe30258850ecdc5d4f38c0bc07689c0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 10 Feb 2022 20:12:43 +0100 Subject: ieee80211: use tab to indent struct ieee80211_neighbor_ap_info Somehow spaces were used here, use tab instead. Link: https://lore.kernel.org/r/20220210201242.da8fa2e5ae8d.Ia452db01876e52e815f6337fef437049df0d8bd9@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 60ee7b3f58e7..a2940a853783 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4005,10 +4005,10 @@ static inline bool for_each_element_completed(const struct element *element, #define IEEE80211_RNR_TBTT_PARAMS_COLOC_AP 0x40 struct ieee80211_neighbor_ap_info { - u8 tbtt_info_hdr; - u8 tbtt_info_len; - u8 op_class; - u8 channel; + u8 tbtt_info_hdr; + u8 tbtt_info_len; + u8 op_class; + u8 channel; } __packed; enum ieee80211_range_params_max_total_ltf { -- cgit v1.2.3 From d61f4274daa41565b5f9ce589b4ba1239781c139 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 14 Feb 2022 17:29:21 +0100 Subject: ieee80211: add helper to check HE capability element size This element has a very dynamic structure, create a small helper function to validate its size. We're currently checking it in mac80211 in a conversion function, but that's actually slightly buggy. Link: https://lore.kernel.org/r/20220214172920.750bee9eaf37.Ie18359bd38143b7dc949078f10752413e6d36854@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a2940a853783..dfc84680af82 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -9,7 +9,7 @@ * Copyright (c) 2006, Michael Wu * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH - * Copyright (c) 2018 - 2021 Intel Corporation + * Copyright (c) 2018 - 2022 Intel Corporation */ #ifndef LINUX_IEEE80211_H @@ -2338,6 +2338,29 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info) return n; } +static inline bool ieee80211_he_capa_size_ok(const u8 *data, u8 len) +{ + const struct ieee80211_he_cap_elem *he_cap_ie_elem = (const void *)data; + u8 needed = sizeof(*he_cap_ie_elem); + + if (len < needed) + return false; + + needed += ieee80211_he_mcs_nss_size(he_cap_ie_elem); + if (len < needed) + return false; + + if (he_cap_ie_elem->phy_cap_info[6] & + IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) { + if (len < needed + 1) + return false; + needed += ieee80211_he_ppe_size(data[needed], + he_cap_ie_elem->phy_cap_info); + } + + return len >= needed; +} + /* HE Operation defines */ #define IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK 0x00000007 #define IEEE80211_HE_OPERATION_TWT_REQUIRED 0x00000008 -- cgit v1.2.3 From cbc1ca0a9d0a54cb8aa7c54c16e71599551e3f74 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 14 Feb 2022 17:29:51 +0100 Subject: ieee80211: Add EHT (802.11be) definitions Based on Draft P802.11be_D1.4. Signed-off-by: Ilan Peer Link: https://lore.kernel.org/r/20220214173004.928e23cacb2b.Id30a3ef2844b296efbd5486fe1da9ca36a95c5cf@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 299 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 299 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index dfc84680af82..7204bc28ca31 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -1925,6 +1926,111 @@ struct ieee80211_mu_edca_param_set { struct ieee80211_he_mu_edca_param_ac_rec ac_vo; } __packed; +#define IEEE80211_EHT_MCS_NSS_RX 0x0f +#define IEEE80211_EHT_MCS_NSS_TX 0xf0 + +/** + * struct ieee80211_eht_mcs_nss_supp_20mhz_only - EHT 20MHz only station max + * supported NSS for per MCS. + * + * For each field below, bits 0 - 3 indicate the maximal number of spatial + * streams for Rx, and bits 4 - 7 indicate the maximal number of spatial streams + * for Tx. + * + * @rx_tx_mcs7_max_nss: indicates the maximum number of spatial streams + * supported for reception and the maximum number of spatial streams + * supported for transmission for MCS 0 - 7. + * @rx_tx_mcs9_max_nss: indicates the maximum number of spatial streams + * supported for reception and the maximum number of spatial streams + * supported for transmission for MCS 8 - 9. + * @rx_tx_mcs11_max_nss: indicates the maximum number of spatial streams + * supported for reception and the maximum number of spatial streams + * supported for transmission for MCS 10 - 11. + * @rx_tx_mcs13_max_nss: indicates the maximum number of spatial streams + * supported for reception and the maximum number of spatial streams + * supported for transmission for MCS 12 - 13. + */ +struct ieee80211_eht_mcs_nss_supp_20mhz_only { + u8 rx_tx_mcs7_max_nss; + u8 rx_tx_mcs9_max_nss; + u8 rx_tx_mcs11_max_nss; + u8 rx_tx_mcs13_max_nss; +}; + +/** + * struct ieee80211_eht_mcs_nss_supp_bw - EHT max supported NSS per MCS (except + * 20MHz only stations). + * + * For each field below, bits 0 - 3 indicate the maximal number of spatial + * streams for Rx, and bits 4 - 7 indicate the maximal number of spatial streams + * for Tx. + * + * @rx_tx_mcs9_max_nss: indicates the maximum number of spatial streams + * supported for reception and the maximum number of spatial streams + * supported for transmission for MCS 0 - 9. + * @rx_tx_mcs11_max_nss: indicates the maximum number of spatial streams + * supported for reception and the maximum number of spatial streams + * supported for transmission for MCS 10 - 11. + * @rx_tx_mcs13_max_nss: indicates the maximum number of spatial streams + * supported for reception and the maximum number of spatial streams + * supported for transmission for MCS 12 - 13. + */ +struct ieee80211_eht_mcs_nss_supp_bw { + u8 rx_tx_mcs9_max_nss; + u8 rx_tx_mcs11_max_nss; + u8 rx_tx_mcs13_max_nss; +}; + +/** + * struct ieee80211_eht_cap_elem_fixed - EHT capabilities fixed data + * + * This structure is the "EHT Capabilities element" fixed fields as + * described in P802.11be_D1.4 section 9.4.2.313. + * + * @mac_cap_info: MAC capabilities, see IEEE80211_EHT_MAC_CAP* + * @phy_cap_info: PHY capabilities, see IEEE80211_EHT_PHY_CAP* + */ +struct ieee80211_eht_cap_elem_fixed { + u8 mac_cap_info[2]; + u8 phy_cap_info[9]; +} __packed; + +/** + * struct ieee80211_eht_cap_elem - EHT capabilities element + * @fixed: fixed parts, see &ieee80211_eht_cap_elem_fixed + * @optional: optional parts + */ +struct ieee80211_eht_cap_elem { + struct ieee80211_eht_cap_elem_fixed fixed; + + /* + * Followed by: + * Supported EHT-MCS And NSS Set field: 4, 3, 6 or 9 octets. + * EHT PPE Thresholds field: variable length. + */ + u8 optional[]; +} __packed; + +/** + * struct ieee80211_eht_operation - eht operation element + * + * This structure is the "EHT Operation Element" fields as + * described in P802.11be_D1.4 section 9.4.2.311 + * + * FIXME: The spec is unclear how big the fields are, and doesn't + * indicate the "Disabled Subchannel Bitmap Present" in the + * structure (Figure 9-1002a) at all ... + */ +struct ieee80211_eht_operation { + u8 chan_width; + u8 ccfs; + u8 present_bm; + + u8 disable_subchannel_bitmap[]; +} __packed; + +#define IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT 0x1 + /* 802.11ac VHT Capabilities */ #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 0x00000000 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 0x00000001 @@ -2129,6 +2235,8 @@ enum ieee80211_client_reg_power { #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G 0x04 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G 0x08 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G 0x10 +#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL 0x1e + #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_2G 0x20 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_5G 0x40 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK 0xfe @@ -2622,6 +2730,194 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define S1G_OPER_CH_WIDTH_PRIMARY_1MHZ BIT(0) #define S1G_OPER_CH_WIDTH_OPER GENMASK(4, 1) +/* EHT MAC capabilities as defined in P802.11be_D1.4 section 9.4.2.313.2 */ +#define IEEE80211_EHT_MAC_CAP0_NSEP_PRIO_ACCESS 0x01 +#define IEEE80211_EHT_MAC_CAP0_OM_CONTROL 0x02 +#define IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 0x04 +#define IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2 0x08 +#define IEEE80211_EHT_MAC_CAP0_RESTRICTED_TWT 0x10 +#define IEEE80211_EHT_MAC_CAP0_SCS_TRAFFIC_DESC 0x20 +#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_MASK 0xc0 +#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_3895 0 +#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_7991 1 +#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_11454 2 + +/* EHT PHY capabilities as defined in P802.11be_D1.4 section 9.4.2.313.3 */ +#define IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ 0x02 +#define IEEE80211_EHT_PHY_CAP0_242_TONE_RU_GT20MHZ 0x04 +#define IEEE80211_EHT_PHY_CAP0_NDP_4_EHT_LFT_32_GI 0x08 +#define IEEE80211_EHT_PHY_CAP0_PARTIAL_BW_UL_MU_MIMO 0x10 +#define IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER 0x20 +#define IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMEE 0x40 + +/* EHT beamformee number of spatial streams <= 80MHz is split */ +#define IEEE80211_EHT_PHY_CAP0_BEAMFORMEE_SS_80MHZ_MASK 0x80 +#define IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_80MHZ_MASK 0x03 + +#define IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_160MHZ_MASK 0x1c +#define IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_320MHZ_MASK 0xe0 + +#define IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_80MHZ_MASK 0x07 +#define IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_160MHZ_MASK 0x38 + +/* EHT number of sounding dimensions for 320MHz is split */ +#define IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK 0xc0 +#define IEEE80211_EHT_PHY_CAP3_SOUNDING_DIM_320MHZ_MASK 0x01 +#define IEEE80211_EHT_PHY_CAP3_NG_16_SU_FEEDBACK 0x02 +#define IEEE80211_EHT_PHY_CAP3_NG_16_MU_FEEDBACK 0x04 +#define IEEE80211_EHT_PHY_CAP3_CODEBOOK_4_2_SU_FDBK 0x08 +#define IEEE80211_EHT_PHY_CAP3_CODEBOOK_7_5_MU_FDBK 0x10 +#define IEEE80211_EHT_PHY_CAP3_TRIG_SU_BF_FDBK 0x20 +#define IEEE80211_EHT_PHY_CAP3_TRIG_MU_BF_PART_BW_FDBK 0x40 +#define IEEE80211_EHT_PHY_CAP3_TRIG_CQI_FDBK 0x80 + +#define IEEE80211_EHT_PHY_CAP4_PART_BW_DL_MU_MIMO 0x01 +#define IEEE80211_EHT_PHY_CAP4_PSR_SR_SUPP 0x02 +#define IEEE80211_EHT_PHY_CAP4_POWER_BOOST_FACT_SUPP 0x04 +#define IEEE80211_EHT_PHY_CAP4_EHT_MU_PPDU_4_EHT_LTF_08_GI 0x08 +#define IEEE80211_EHT_PHY_CAP4_MAX_NC_MASK 0xf0 + +#define IEEE80211_EHT_PHY_CAP5_NON_TRIG_CQI_FEEDBACK 0x01 +#define IEEE80211_EHT_PHY_CAP5_TX_LESS_242_TONE_RU_SUPP 0x02 +#define IEEE80211_EHT_PHY_CAP5_RX_LESS_242_TONE_RU_SUPP 0x04 +#define IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT 0x08 +#define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_MASK 0x30 +#define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_0US 0 +#define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_8US 1 +#define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_16US 2 +#define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_20US 3 + +/* Maximum number of supported EHT LTF is split */ +#define IEEE80211_EHT_PHY_CAP5_MAX_NUM_SUPP_EHT_LTF_MASK 0xc0 +#define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK 0x07 + +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK 0x78 +#define IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP 0x80 + +#define IEEE80211_EHT_PHY_CAP7_20MHZ_STA_RX_NDP_WIDER_BW 0x01 +#define IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_80MHZ 0x02 +#define IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_160MHZ 0x04 +#define IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_320MHZ 0x08 +#define IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ 0x10 +#define IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ 0x20 +#define IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ 0x40 +#define IEEE80211_EHT_PHY_CAP7_TB_SOUNDING_FDBK_RATE_LIMIT 0x80 + +#define IEEE80211_EHT_PHY_CAP8_RX_1024QAM_WIDER_BW_DL_OFDMA 0x01 +#define IEEE80211_EHT_PHY_CAP8_RX_4096QAM_WIDER_BW_DL_OFDMA 0x02 + +/* + * EHT operation channel width as defined in P802.11be_D1.4 section 9.4.2.311 + */ +#define IEEE80211_EHT_OPER_CHAN_WIDTH 0x7 +#define IEEE80211_EHT_OPER_CHAN_WIDTH_20MHZ 0 +#define IEEE80211_EHT_OPER_CHAN_WIDTH_40MHZ 1 +#define IEEE80211_EHT_OPER_CHAN_WIDTH_80MHZ 2 +#define IEEE80211_EHT_OPER_CHAN_WIDTH_160MHZ 3 +#define IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ 4 + +/* Calculate 802.11be EHT capabilities IE Tx/Rx EHT MCS NSS Support Field size */ +static inline u8 +ieee80211_eht_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap, + const struct ieee80211_eht_cap_elem_fixed *eht_cap) +{ + u8 count = 0; + + /* on 2.4 GHz, if it supports 40 MHz, the result is 3 */ + if (he_cap->phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G) + return 3; + + /* on 2.4 GHz, these three bits are reserved, so should be 0 */ + if (he_cap->phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G) + count += 3; + + if (he_cap->phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G) + count += 3; + + if (eht_cap->phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) + count += 3; + + return count ? count : 4; +} + +/* 802.11be EHT PPE Thresholds */ +#define IEEE80211_EHT_PPE_THRES_NSS_POS 0 +#define IEEE80211_EHT_PPE_THRES_NSS_MASK 0xf +#define IEEE80211_EHT_PPE_THRES_RU_INDEX_BITMASK_MASK 0x1f0 +#define IEEE80211_EHT_PPE_THRES_INFO_PPET_SIZE 3 +#define IEEE80211_EHT_PPE_THRES_INFO_HEADER_SIZE 9 + +/* + * Calculate 802.11be EHT capabilities IE EHT field size + */ +static inline u8 +ieee80211_eht_ppe_size(u16 ppe_thres_hdr, const u8 *phy_cap_info) +{ + u32 n; + + if (!(phy_cap_info[5] & + IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT)) + return 0; + + n = hweight16(ppe_thres_hdr & + IEEE80211_EHT_PPE_THRES_RU_INDEX_BITMASK_MASK); + n *= 1 + u16_get_bits(ppe_thres_hdr, IEEE80211_EHT_PPE_THRES_NSS_MASK); + + /* + * Each pair is 6 bits, and we need to add the 9 "header" bits to the + * total size. + */ + n = n * IEEE80211_EHT_PPE_THRES_INFO_PPET_SIZE * 2 + + IEEE80211_EHT_PPE_THRES_INFO_HEADER_SIZE; + return DIV_ROUND_UP(n, 8); +} + +static inline bool +ieee80211_eht_capa_size_ok(const u8 *he_capa, const u8 *data, u8 len) +{ + const struct ieee80211_eht_cap_elem_fixed *elem = (const void *)data; + u8 needed = sizeof(struct ieee80211_eht_cap_elem_fixed); + + if (len < needed || !he_capa) + return false; + + needed += ieee80211_eht_mcs_nss_size((const void *)he_capa, + (const void *)data); + if (len < needed) + return false; + + if (elem->phy_cap_info[5] & + IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT) { + u16 ppe_thres_hdr; + + if (len < needed + sizeof(ppe_thres_hdr)) + return false; + + ppe_thres_hdr = get_unaligned_le16(data + needed); + needed += ieee80211_eht_ppe_size(ppe_thres_hdr, + elem->phy_cap_info); + } + + return len >= needed; +} + +static inline bool +ieee80211_eht_oper_size_ok(const u8 *data, u8 len) +{ + const struct ieee80211_eht_operation *elem = (const void *)data; + u8 needed = sizeof(*elem); + + if (len < needed) + return false; + + if (elem->present_bm & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT) + needed += 2; + + return len >= needed; +} #define LISTEN_INT_USF GENMASK(15, 14) #define LISTEN_INT_UI GENMASK(13, 0) @@ -3077,6 +3373,9 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_SHORT_SSID_LIST = 58, WLAN_EID_EXT_HE_6GHZ_CAPA = 59, WLAN_EID_EXT_UL_MU_POWER_CAPA = 60, + WLAN_EID_EXT_EHT_OPERATION = 106, + WLAN_EID_EXT_EHT_MULTI_LINK = 107, + WLAN_EID_EXT_EHT_CAPABILITY = 108, }; /* Action category code */ -- cgit v1.2.3 From 2a2c86f15e17c5013b9897b67d895e64a25ae3cb Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Mon, 14 Feb 2022 17:29:52 +0100 Subject: ieee80211: add EHT 1K aggregation definitions We add the fields for parsing extended ADDBA request/respond, and new max 1K aggregation for limit ADDBA request/respond. Adjust drivers to use the proper macro, IEEE80211_MAX_AMPDU_BUF -> IEEE80211_MAX_AMPDU_BUF_HE. Signed-off-by: Mordechay Goodstein Link: https://lore.kernel.org/r/20220214173004.b8b447ce95b7.I0ee2554c94e89abc7a752b0f7cc7fd79c273efea@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 7204bc28ca31..a4143466cb32 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1024,6 +1024,8 @@ struct ieee80211_tpc_report_ie { #define IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK GENMASK(2, 1) #define IEEE80211_ADDBA_EXT_FRAG_LEVEL_SHIFT 1 #define IEEE80211_ADDBA_EXT_NO_FRAG BIT(0) +#define IEEE80211_ADDBA_EXT_BUF_SIZE_MASK GENMASK(7, 5) +#define IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT 10 struct ieee80211_addba_ext_ie { u8 data; @@ -1698,10 +1700,12 @@ struct ieee80211_ht_operation { * A-MPDU buffer sizes * According to HT size varies from 8 to 64 frames * HE adds the ability to have up to 256 frames. + * EHT adds the ability to have up to 1K frames. */ #define IEEE80211_MIN_AMPDU_BUF 0x8 #define IEEE80211_MAX_AMPDU_BUF_HT 0x40 -#define IEEE80211_MAX_AMPDU_BUF 0x100 +#define IEEE80211_MAX_AMPDU_BUF_HE 0x100 +#define IEEE80211_MAX_AMPDU_BUF_EHT 0x400 /* Spatial Multiplexing Power Save Modes (for capability) */ -- cgit v1.2.3 From 5cd5a8a3e2fb11b1c8a09f062c44c1e228ef987a Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 14 Feb 2022 17:29:53 +0100 Subject: cfg80211: Add data structures to capture EHT capabilities And advertise EHT capabilities to user space when supported. Signed-off-by: Ilan Peer Link: https://lore.kernel.org/r/20220214173004.6fb70658529f.I2413a37c8f7d2d6d638038a3d95360a3fce0114d@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 63 ++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/nl80211.h | 12 +++++++++ 2 files changed, 75 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f6db085ff3df..7bec15f605be 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -360,6 +360,48 @@ struct ieee80211_sta_he_cap { u8 ppe_thres[IEEE80211_HE_PPE_THRES_MAX_LEN]; }; +/** + * struct ieee80211_eht_mcs_nss_supp - EHT max supported NSS per MCS + * + * See P802.11be_D1.3 Table 9-401k - "Subfields of the Supported EHT-MCS + * and NSS Set field" + * + * @only_20mhz: MCS/NSS support for 20 MHz-only STA. + * @bw._80: MCS/NSS support for BW <= 80 MHz + * @bw._160: MCS/NSS support for BW = 160 MHz + * @bw._320: MCS/NSS support for BW = 320 MHz + */ +struct ieee80211_eht_mcs_nss_supp { + union { + struct ieee80211_eht_mcs_nss_supp_20mhz_only only_20mhz; + struct { + struct ieee80211_eht_mcs_nss_supp_bw _80; + struct ieee80211_eht_mcs_nss_supp_bw _160; + struct ieee80211_eht_mcs_nss_supp_bw _320; + } __packed bw; + } __packed; +} __packed; + +#define IEEE80211_EHT_PPE_THRES_MAX_LEN 32 + +/** + * struct ieee80211_sta_eht_cap - STA's EHT capabilities + * + * This structure describes most essential parameters needed + * to describe 802.11be EHT capabilities for a STA. + * + * @has_eht: true iff EHT data is valid. + * @eht_cap_elem: Fixed portion of the eht capabilities element. + * @eht_mcs_nss_supp: The supported NSS/MCS combinations. + * @eht_ppe_thres: Holds the PPE Thresholds data. + */ +struct ieee80211_sta_eht_cap { + bool has_eht; + struct ieee80211_eht_cap_elem_fixed eht_cap_elem; + struct ieee80211_eht_mcs_nss_supp eht_mcs_nss_supp; + u8 eht_ppe_thres[IEEE80211_EHT_PPE_THRES_MAX_LEN]; +}; + /** * struct ieee80211_sband_iftype_data - sband data per interface type * @@ -379,6 +421,7 @@ struct ieee80211_sband_iftype_data { u16 types_mask; struct ieee80211_sta_he_cap he_cap; struct ieee80211_he_6ghz_capa he_6ghz_capa; + struct ieee80211_sta_eht_cap eht_cap; struct { const u8 *data; unsigned int len; @@ -561,6 +604,26 @@ ieee80211_get_he_6ghz_capa(const struct ieee80211_supported_band *sband, return data->he_6ghz_capa.capa; } +/** + * ieee80211_get_eht_iftype_cap - return ETH capabilities for an sband's iftype + * @sband: the sband to search for the iftype on + * @iftype: enum nl80211_iftype + * + * Return: pointer to the struct ieee80211_sta_eht_cap, or NULL is none found + */ +static inline const struct ieee80211_sta_eht_cap * +ieee80211_get_eht_iftype_cap(const struct ieee80211_supported_band *sband, + enum nl80211_iftype iftype) +{ + const struct ieee80211_sband_iftype_data *data = + ieee80211_get_sband_iftype_data(sband, iftype); + + if (data && data->eht_cap.has_eht) + return &data->eht_cap; + + return NULL; +} + /** * wiphy_read_of_freq_limits - read frequency limits from device tree * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 195a238a322e..d305a8b8c536 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3766,6 +3766,14 @@ enum nl80211_mpath_info { * given for all 6 GHz band channels * @NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS: vendor element capabilities that are * advertised on this band/for this iftype (binary) + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MAC: EHT MAC capabilities as in EHT + * capabilities element + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PHY: EHT PHY capabilities as in EHT + * capabilities element + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MCS_SET: EHT supported NSS/MCS as in EHT + * capabilities element + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE: EHT PPE thresholds information as + * defined in EHT capabilities element * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band attribute currently defined */ @@ -3779,6 +3787,10 @@ enum nl80211_band_iftype_attr { NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE, NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA, NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MAC, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PHY, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MCS_SET, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE, /* keep last */ __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST, -- cgit v1.2.3 From 3743bec6120ae0748cb3fda6ff80a690117ef1f3 Mon Sep 17 00:00:00 2001 From: Jia Ding Date: Mon, 14 Feb 2022 17:29:54 +0100 Subject: cfg80211: Add support for EHT 320 MHz channel width Add 320 MHz support in the channel def and center frequency validation with compatible check. Signed-off-by: Jia Ding Co-authored-by: Karthikeyan Periyasamy Signed-off-by: Karthikeyan Periyasamy Co-authored-by: Muna Sinada Signed-off-by: Muna Sinada Co-authored-by: Veerendranath Jakkam Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1640163883-12696-5-git-send-email-quic_vjakkam@quicinc.com Link: https://lore.kernel.org/r/20220214163009.175289-1-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 7 ++++++- include/uapi/linux/nl80211.h | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7bec15f605be..a27f2d699dac 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -109,7 +109,11 @@ struct wiphy; * on this channel. * @IEEE80211_CHAN_16MHZ: 16 MHz bandwidth is permitted * on this channel. - * + * @IEEE80211_CHAN_NO_320MHZ: If the driver supports 320 MHz on the band, + * this flag indicates that a 320 MHz channel cannot use this + * channel as the control or any of the secondary channels. + * This may be due to the driver or due to regulatory bandwidth + * restrictions. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = 1<<0, @@ -131,6 +135,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_4MHZ = 1<<16, IEEE80211_CHAN_8MHZ = 1<<17, IEEE80211_CHAN_16MHZ = 1<<18, + IEEE80211_CHAN_NO_320MHZ = 1<<19, }; #define IEEE80211_CHAN_NO_HT40 \ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d305a8b8c536..9e05973f3f56 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4684,6 +4684,8 @@ enum nl80211_key_mode { * @NL80211_CHAN_WIDTH_4: 4 MHz OFDM channel * @NL80211_CHAN_WIDTH_8: 8 MHz OFDM channel * @NL80211_CHAN_WIDTH_16: 16 MHz OFDM channel + * @NL80211_CHAN_WIDTH_320: 320 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well */ enum nl80211_chan_width { NL80211_CHAN_WIDTH_20_NOHT, @@ -4699,6 +4701,7 @@ enum nl80211_chan_width { NL80211_CHAN_WIDTH_4, NL80211_CHAN_WIDTH_8, NL80211_CHAN_WIDTH_16, + NL80211_CHAN_WIDTH_320, }; /** -- cgit v1.2.3 From cfb14110acf87b4db62e07ba08a80429f1749f40 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Mon, 14 Feb 2022 17:29:55 +0100 Subject: nl80211: add EHT MCS support Add support for reporting and calculating EHT bitrates. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1640163883-12696-7-git-send-email-quic_vjakkam@quicinc.com Link: https://lore.kernel.org/r/20220214163009.175289-2-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 ++++++++ include/uapi/linux/nl80211.h | 62 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a27f2d699dac..f35ffd81d213 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1595,6 +1595,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy, * @RATE_INFO_FLAGS_HE_MCS: HE MCS information * @RATE_INFO_FLAGS_EDMG: 60GHz MCS in EDMG mode * @RATE_INFO_FLAGS_EXTENDED_SC_DMG: 60GHz extended SC MCS + * @RATE_INFO_FLAGS_EHT_MCS: EHT MCS information */ enum rate_info_flags { RATE_INFO_FLAGS_MCS = BIT(0), @@ -1604,6 +1605,7 @@ enum rate_info_flags { RATE_INFO_FLAGS_HE_MCS = BIT(4), RATE_INFO_FLAGS_EDMG = BIT(5), RATE_INFO_FLAGS_EXTENDED_SC_DMG = BIT(6), + RATE_INFO_FLAGS_EHT_MCS = BIT(7), }; /** @@ -1618,6 +1620,8 @@ enum rate_info_flags { * @RATE_INFO_BW_80: 80 MHz bandwidth * @RATE_INFO_BW_160: 160 MHz bandwidth * @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation + * @RATE_INFO_BW_320: 320 MHz bandwidth + * @RATE_INFO_BW_EHT_RU: bandwidth determined by EHT RU allocation */ enum rate_info_bw { RATE_INFO_BW_20 = 0, @@ -1627,6 +1631,8 @@ enum rate_info_bw { RATE_INFO_BW_80, RATE_INFO_BW_160, RATE_INFO_BW_HE_RU, + RATE_INFO_BW_320, + RATE_INFO_BW_EHT_RU, }; /** @@ -1644,6 +1650,9 @@ enum rate_info_bw { * @he_ru_alloc: HE RU allocation (from &enum nl80211_he_ru_alloc, * only valid if bw is %RATE_INFO_BW_HE_RU) * @n_bonded_ch: In case of EDMG the number of bonded channels (1-4) + * @eht_gi: EHT guard interval (from &enum nl80211_eht_gi) + * @eht_ru_alloc: EHT RU allocation (from &enum nl80211_eht_ru_alloc, + * only valid if bw is %RATE_INFO_BW_EHT_RU) */ struct rate_info { u8 flags; @@ -1655,6 +1664,8 @@ struct rate_info { u8 he_dcm; u8 he_ru_alloc; u8 n_bonded_ch; + u8 eht_gi; + u8 eht_ru_alloc; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9e05973f3f56..d0ba70ea5d04 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3392,6 +3392,56 @@ enum nl80211_he_ru_alloc { NL80211_RATE_INFO_HE_RU_ALLOC_2x996, }; +/** + * enum nl80211_eht_gi - EHT guard interval + * @NL80211_RATE_INFO_EHT_GI_0_8: 0.8 usec + * @NL80211_RATE_INFO_EHT_GI_1_6: 1.6 usec + * @NL80211_RATE_INFO_EHT_GI_3_2: 3.2 usec + */ +enum nl80211_eht_gi { + NL80211_RATE_INFO_EHT_GI_0_8, + NL80211_RATE_INFO_EHT_GI_1_6, + NL80211_RATE_INFO_EHT_GI_3_2, +}; + +/** + * enum nl80211_eht_ru_alloc - EHT RU allocation values + * @NL80211_RATE_INFO_EHT_RU_ALLOC_26: 26-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_52: 52-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_52P26: 52+26-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_106: 106-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_106P26: 106+26 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_242: 242-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_484: 484-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_484P242: 484+242 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_996: 996-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_996P484: 996+484 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242: 996+484+242 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_2x996: 2x996-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484: 2x996+484 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_3x996: 3x996-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484: 3x996+484 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_4x996: 4x996-tone RU allocation + */ +enum nl80211_eht_ru_alloc { + NL80211_RATE_INFO_EHT_RU_ALLOC_26, + NL80211_RATE_INFO_EHT_RU_ALLOC_52, + NL80211_RATE_INFO_EHT_RU_ALLOC_52P26, + NL80211_RATE_INFO_EHT_RU_ALLOC_106, + NL80211_RATE_INFO_EHT_RU_ALLOC_106P26, + NL80211_RATE_INFO_EHT_RU_ALLOC_242, + NL80211_RATE_INFO_EHT_RU_ALLOC_484, + NL80211_RATE_INFO_EHT_RU_ALLOC_484P242, + NL80211_RATE_INFO_EHT_RU_ALLOC_996, + NL80211_RATE_INFO_EHT_RU_ALLOC_996P484, + NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242, + NL80211_RATE_INFO_EHT_RU_ALLOC_2x996, + NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484, + NL80211_RATE_INFO_EHT_RU_ALLOC_3x996, + NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484, + NL80211_RATE_INFO_EHT_RU_ALLOC_4x996, +}; + /** * enum nl80211_rate_info - bitrate information * @@ -3431,6 +3481,13 @@ enum nl80211_he_ru_alloc { * @NL80211_RATE_INFO_HE_DCM: HE DCM value (u8, 0/1) * @NL80211_RATE_INFO_RU_ALLOC: HE RU allocation, if not present then * non-OFDMA was used (u8, see &enum nl80211_he_ru_alloc) + * @NL80211_RATE_INFO_320_MHZ_WIDTH: 320 MHz bitrate + * @NL80211_RATE_INFO_EHT_MCS: EHT MCS index (u8, 0-15) + * @NL80211_RATE_INFO_EHT_NSS: EHT NSS value (u8, 1-8) + * @NL80211_RATE_INFO_EHT_GI: EHT guard interval identifier + * (u8, see &enum nl80211_eht_gi) + * @NL80211_RATE_INFO_EHT_RU_ALLOC: EHT RU allocation, if not present then + * non-OFDMA was used (u8, see &enum nl80211_eht_ru_alloc) * @__NL80211_RATE_INFO_AFTER_LAST: internal use */ enum nl80211_rate_info { @@ -3452,6 +3509,11 @@ enum nl80211_rate_info { NL80211_RATE_INFO_HE_GI, NL80211_RATE_INFO_HE_DCM, NL80211_RATE_INFO_HE_RU_ALLOC, + NL80211_RATE_INFO_320_MHZ_WIDTH, + NL80211_RATE_INFO_EHT_MCS, + NL80211_RATE_INFO_EHT_NSS, + NL80211_RATE_INFO_EHT_GI, + NL80211_RATE_INFO_EHT_RU_ALLOC, /* keep last */ __NL80211_RATE_INFO_AFTER_LAST, -- cgit v1.2.3 From c2b3d7699fb0ce66538b829af43970acc2f89060 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Mon, 14 Feb 2022 17:29:56 +0100 Subject: nl80211: add support for 320MHz channel limitation Add support to advertise drivers or regulatory limitations on 320 MHz channels to userspace. Signed-off-by: Sriram R Co-authored-by: Karthikeyan Periyasamy Signed-off-by: Karthikeyan Periyasamy Co-authored-by: Veerendranath Jakkam Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1640163883-12696-6-git-send-email-quic_vjakkam@quicinc.com Link: https://lore.kernel.org/r/20220214163009.175289-3-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d0ba70ea5d04..6a338dafcd07 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3997,6 +3997,8 @@ enum nl80211_wmm_rule { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_16MHZ: 16 MHz operation is allowed * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_NO_320MHZ: any 320 MHz channel using this channel + * as the primary or any of the secondary channels isn't possible * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4033,6 +4035,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_4MHZ, NL80211_FREQUENCY_ATTR_8MHZ, NL80211_FREQUENCY_ATTR_16MHZ, + NL80211_FREQUENCY_ATTR_NO_320MHZ, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4231,6 +4234,7 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed * @NL80211_RRF_NO_160MHZ: 160MHz operation not allowed * @NL80211_RRF_NO_HE: HE operation not allowed + * @NL80211_RRF_NO_320MHZ: 320MHz operation not allowed */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -4249,6 +4253,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_80MHZ = 1<<15, NL80211_RRF_NO_160MHZ = 1<<16, NL80211_RRF_NO_HE = 1<<17, + NL80211_RRF_NO_320MHZ = 1<<18, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR -- cgit v1.2.3 From 31846b657857e6a73d982604f36a34710d98902c Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 14 Feb 2022 17:29:57 +0100 Subject: cfg80211: add NO-EHT flag to regulatory This may be necessary in some cases, add a flag and propagate it, just like the NO-HE that already exists. Signed-off-by: Ilan Peer [split off from a combined 320/no-EHT patch] Link: https://lore.kernel.org/r/20220214173004.dbb85a7b86bb.Ifc1e2daac51c1cc5f895ccfb79faf5eaec3950ec@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f35ffd81d213..5cfc483dece1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -114,6 +114,7 @@ struct wiphy; * channel as the control or any of the secondary channels. * This may be due to the driver or due to regulatory bandwidth * restrictions. + * @IEEE80211_CHAN_NO_EHT: EHT operation is not permitted on this channel. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = 1<<0, @@ -136,6 +137,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_8MHZ = 1<<17, IEEE80211_CHAN_16MHZ = 1<<18, IEEE80211_CHAN_NO_320MHZ = 1<<19, + IEEE80211_CHAN_NO_EHT = 1<<20, }; #define IEEE80211_CHAN_NO_HT40 \ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6a338dafcd07..baf6433c0119 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3999,6 +3999,8 @@ enum nl80211_wmm_rule { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_NO_320MHZ: any 320 MHz channel using this channel * as the primary or any of the secondary channels isn't possible + * @NL80211_FREQUENCY_ATTR_NO_EHT: EHT operation is not allowed on this channel + * in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4036,6 +4038,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_8MHZ, NL80211_FREQUENCY_ATTR_16MHZ, NL80211_FREQUENCY_ATTR_NO_320MHZ, + NL80211_FREQUENCY_ATTR_NO_EHT, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, -- cgit v1.2.3 From ea05fd3581d32a0f1098657005c7a9b763798fe8 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 14 Feb 2022 17:29:58 +0100 Subject: cfg80211: Support configuration of station EHT capabilities Add attributes and some code bits to support userspace passing in EHT capabilities of stations. Signed-off-by: Ilan Peer Link: https://lore.kernel.org/r/20220214173004.ecf0b3ff9627.Icb4a5f2ec7b41d9008ac4cfc16c59baeb84793d3@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ include/uapi/linux/nl80211.h | 10 +++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5cfc483dece1..68713388b617 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1487,6 +1487,8 @@ struct sta_txpwr { * @airtime_weight: airtime scheduler weight for this station * @txpwr: transmit power for an associated station * @he_6ghz_capa: HE 6 GHz Band capabilities of station + * @eht_capa: EHT capabilities of station + * @eht_capa_len: the length of the EHT capabilities */ struct station_parameters { const u8 *supported_rates; @@ -1520,6 +1522,8 @@ struct station_parameters { u16 airtime_weight; struct sta_txpwr txpwr; const struct ieee80211_he_6ghz_capa *he_6ghz_capa; + const struct ieee80211_eht_cap_elem *eht_capa; + u8 eht_capa_len; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index baf6433c0119..98ed52663d6b 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -11,7 +11,7 @@ * Copyright 2008 Jouni Malinen * Copyright 2008 Colin McCabe * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -2659,6 +2659,10 @@ enum nl80211_commands { * enumerated in &enum nl80211_ap_settings_flags. This attribute shall be * used with %NL80211_CMD_START_AP request. * + * @NL80211_ATTR_EHT_CAPABILITY: EHT Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION). Can be set + * only if %NL80211_STA_FLAG_WME is set. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3169,6 +3173,8 @@ enum nl80211_attrs { NL80211_ATTR_AP_SETTINGS_FLAGS, + NL80211_ATTR_EHT_CAPABILITY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3224,6 +3230,8 @@ enum nl80211_attrs { #define NL80211_HE_MAX_CAPABILITY_LEN 54 #define NL80211_MAX_NR_CIPHER_SUITES 5 #define NL80211_MAX_NR_AKM_SUITES 2 +#define NL80211_EHT_MIN_CAPABILITY_LEN 13 +#define NL80211_EHT_MAX_CAPABILITY_LEN 51 #define NL80211_MIN_REMAIN_ON_CHANNEL_TIME 10 -- cgit v1.2.3 From 5dca295dd76756c7918ad7113fc4a3cf8262ed43 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 14 Feb 2022 17:30:00 +0100 Subject: mac80211: Add initial support for EHT and 320 MHz channels Add initial support for EHT and 320 MHz bandwidth in mac80211. As a new IEEE80211_STA_RX_BW_320 is added to enum ieee80211_sta_rx_bandwidth, update the drivers to avoid compilation warnings. Signed-off-by: Ilan Peer Link: https://lore.kernel.org/r/20220214173004.0f144cc0bba6.Iad18111264da87eed5fd7b017f0cc6e58c604e07@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index bd6912d0292b..586d3c26c8ac 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2005,6 +2005,7 @@ enum ieee80211_sta_state { * @IEEE80211_STA_RX_BW_80: station can receive up to 80 MHz * @IEEE80211_STA_RX_BW_160: station can receive up to 160 MHz * (including 80+80 MHz) + * @IEEE80211_STA_RX_BW_320: station can receive up to 320 MHz * * Implementation note: 20 must be zero to be initialized * correctly, the values must be sorted. @@ -2014,6 +2015,7 @@ enum ieee80211_sta_rx_bandwidth { IEEE80211_STA_RX_BW_40, IEEE80211_STA_RX_BW_80, IEEE80211_STA_RX_BW_160, + IEEE80211_STA_RX_BW_320, }; /** -- cgit v1.2.3 From a1de64078bf7a3fed856fef5334132ba1963b683 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 14 Feb 2022 17:30:02 +0100 Subject: mac80211: Handle station association response with EHT When the association is an EHT association, parse the EHT element from the association response and update the station's EHT capabilities accordingly. Signed-off-by: Ilan Peer Link: https://lore.kernel.org/r/20220214173004.f33574718755.I21182234c5303d9423eabd5eb997e7cf75f8e0c8@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 586d3c26c8ac..f118b8fe667a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -636,6 +636,7 @@ struct ieee80211_fils_discovery { * @tx_pwr_env: transmit power envelope array of BSS. * @tx_pwr_env_num: number of @tx_pwr_env. * @pwr_reduction: power constraint of BSS. + * @eht_support: does this BSS support EHT */ struct ieee80211_bss_conf { const u8 *bssid; @@ -710,6 +711,7 @@ struct ieee80211_bss_conf { struct ieee80211_tx_pwr_env tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT]; u8 tx_pwr_env_num; u8 pwr_reduction; + bool eht_support; }; /** @@ -2071,6 +2073,7 @@ struct ieee80211_sta_txpwr { * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities * @he_cap: HE capabilities of this STA * @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities + * @eht_cap: EHT capabilities of this STA * @max_rx_aggregation_subframes: maximal amount of frames in a single AMPDU * that this station is allowed to transmit to us. * Can be modified by driver. @@ -2111,6 +2114,7 @@ struct ieee80211_sta { struct ieee80211_sta_vht_cap vht_cap; struct ieee80211_sta_he_cap he_cap; struct ieee80211_he_6ghz_capa he_6ghz_capa; + struct ieee80211_sta_eht_cap eht_cap; u16 max_rx_aggregation_subframes; bool wme; u8 uapsd_queues; -- cgit v1.2.3 From e6df4ead85d9da1b07dd40bd4c6d2182f3e210c4 Mon Sep 17 00:00:00 2001 From: Zhaoyang Huang Date: Tue, 25 Jan 2022 14:56:58 +0800 Subject: psi: fix possible trigger missing in the window When a new threshold breaching stall happens after a psi event was generated and within the window duration, the new event is not generated because the events are rate-limited to one per window. If after that no new stall is recorded then the event will not be generated even after rate-limiting duration has passed. This is happening because with no new stall, window_update will not be called even though threshold was previously breached. To fix this, record threshold breaching occurrence and generate the event once window duration is passed. Suggested-by: Suren Baghdasaryan Signed-off-by: Zhaoyang Huang Signed-off-by: Peter Zijlstra (Intel) Acked-by: Johannes Weiner Acked-by: Suren Baghdasaryan Link: https://lore.kernel.org/r/1643093818-19835-1-git-send-email-huangzhaoyang@gmail.com --- include/linux/psi_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 516c0fe836fd..dc3ec5e4b9ee 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -144,6 +144,9 @@ struct psi_trigger { /* Refcounting to prevent premature destruction */ struct kref refcount; + + /* Deferred event(s) from previous ratelimit window */ + bool pending_event; }; struct psi_group { -- cgit v1.2.3 From 04d4e665a60902cf36e7ad39af1179cb5df542ad Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 7 Feb 2022 16:59:06 +0100 Subject: sched/isolation: Use single feature type while referring to housekeeping cpumask Refer to housekeeping APIs using single feature types instead of flags. This prevents from passing multiple isolation features at once to housekeeping interfaces, which soon won't be possible anymore as each isolation features will have their own cpumask. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Juri Lelli Reviewed-by: Phil Auld Link: https://lore.kernel.org/r/20220207155910.527133-5-frederic@kernel.org --- include/linux/sched/isolation.h | 43 +++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index cc9f393e2a70..8c15abd67aed 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -5,54 +5,55 @@ #include #include -enum hk_flags { - HK_FLAG_TIMER = 1, - HK_FLAG_RCU = (1 << 1), - HK_FLAG_MISC = (1 << 2), - HK_FLAG_SCHED = (1 << 3), - HK_FLAG_TICK = (1 << 4), - HK_FLAG_DOMAIN = (1 << 5), - HK_FLAG_WQ = (1 << 6), - HK_FLAG_MANAGED_IRQ = (1 << 7), - HK_FLAG_KTHREAD = (1 << 8), +enum hk_type { + HK_TYPE_TIMER, + HK_TYPE_RCU, + HK_TYPE_MISC, + HK_TYPE_SCHED, + HK_TYPE_TICK, + HK_TYPE_DOMAIN, + HK_TYPE_WQ, + HK_TYPE_MANAGED_IRQ, + HK_TYPE_KTHREAD, + HK_TYPE_MAX }; #ifdef CONFIG_CPU_ISOLATION DECLARE_STATIC_KEY_FALSE(housekeeping_overridden); -extern int housekeeping_any_cpu(enum hk_flags flags); -extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags); -extern bool housekeeping_enabled(enum hk_flags flags); -extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags); -extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags); +extern int housekeeping_any_cpu(enum hk_type type); +extern const struct cpumask *housekeeping_cpumask(enum hk_type type); +extern bool housekeeping_enabled(enum hk_type type); +extern void housekeeping_affine(struct task_struct *t, enum hk_type type); +extern bool housekeeping_test_cpu(int cpu, enum hk_type type); extern void __init housekeeping_init(void); #else -static inline int housekeeping_any_cpu(enum hk_flags flags) +static inline int housekeeping_any_cpu(enum hk_type type) { return smp_processor_id(); } -static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags) +static inline const struct cpumask *housekeeping_cpumask(enum hk_type type) { return cpu_possible_mask; } -static inline bool housekeeping_enabled(enum hk_flags flags) +static inline bool housekeeping_enabled(enum hk_type type) { return false; } static inline void housekeeping_affine(struct task_struct *t, - enum hk_flags flags) { } + enum hk_type type) { } static inline void housekeeping_init(void) { } #endif /* CONFIG_CPU_ISOLATION */ -static inline bool housekeeping_cpu(int cpu, enum hk_flags flags) +static inline bool housekeeping_cpu(int cpu, enum hk_type type) { #ifdef CONFIG_CPU_ISOLATION if (static_branch_unlikely(&housekeeping_overridden)) - return housekeeping_test_cpu(cpu, flags); + return housekeeping_test_cpu(cpu, type); #endif return true; } -- cgit v1.2.3 From fe65deb56e552a8c9bf7f27860dbdeac12a36116 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 14 Feb 2022 01:57:16 +0900 Subject: jump_label: Avoid unneeded casts in STATIC_KEY_INIT_{TRUE,FALSE} Commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key") introduced the union to struct static_key. It is more natual to set JUMP_TYPE_* to the .type field without casting. Signed-off-by: Masahiro Yamada Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220213165717.2354046-1-masahiroy@kernel.org --- include/linux/jump_label.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 48b9b2a82767..6924e6837e6d 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -251,10 +251,10 @@ extern void static_key_disable_cpuslocked(struct static_key *key); */ #define STATIC_KEY_INIT_TRUE \ { .enabled = { 1 }, \ - { .entries = (void *)JUMP_TYPE_TRUE } } + { .type = JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ { .enabled = { 0 }, \ - { .entries = (void *)JUMP_TYPE_FALSE } } + { .type = JUMP_TYPE_FALSE } } #else /* !CONFIG_JUMP_LABEL */ -- cgit v1.2.3 From cd27ccfc727e99352321c0c75012ab9c5a90321e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 14 Feb 2022 01:57:17 +0900 Subject: jump_label: Refactor #ifdef of struct static_key Move #ifdef CONFIG_JUMP_LABEL inside the struct static_key. Signed-off-by: Masahiro Yamada Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220213165717.2354046-2-masahiroy@kernel.org --- include/linux/jump_label.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 6924e6837e6d..107751cc047b 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -82,10 +82,9 @@ extern bool static_key_initialized; "%s(): static key '%pS' used before call to jump_label_init()", \ __func__, (key)) -#ifdef CONFIG_JUMP_LABEL - struct static_key { atomic_t enabled; +#ifdef CONFIG_JUMP_LABEL /* * Note: * To make anonymous unions work with old compilers, the static @@ -104,13 +103,9 @@ struct static_key { struct jump_entry *entries; struct static_key_mod *next; }; +#endif /* CONFIG_JUMP_LABEL */ }; -#else -struct static_key { - atomic_t enabled; -}; -#endif /* CONFIG_JUMP_LABEL */ #endif /* __ASSEMBLY__ */ #ifdef CONFIG_JUMP_LABEL -- cgit v1.2.3 From e1be43d9b5d0d1310dbd90185a8e5c7145dde40f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 18 Sep 2021 15:17:53 -0700 Subject: overflow: Implement size_t saturating arithmetic helpers In order to perform more open-coded replacements of common allocation size arithmetic, the kernel needs saturating (SIZE_MAX) helpers for multiplication, addition, and subtraction. For example, it is common in allocators, especially on realloc, to add to an existing size: p = krealloc(map->patch, sizeof(struct reg_sequence) * (map->patch_regs + num_regs), GFP_KERNEL); There is no existing saturating replacement for this calculation, and just leaving the addition open coded inside array_size() could potentially overflow as well. For example, an overflow in an expression for a size_t argument might wrap to zero: array_size(anything, something_at_size_max + 1) == 0 Introduce size_mul(), size_add(), and size_sub() helpers that implicitly promote arguments to size_t and saturated calculations for use in allocations. With these helpers it is also possible to redefine array_size(), array3_size(), flex_array_size(), and struct_size() in terms of the new helpers. As with the check_*_overflow() helpers, the new helpers use __must_check, though what is really desired is a way to make sure that assignment is only to a size_t lvalue. Without this, it's still possible to introduce overflow/underflow via type conversion (i.e. from size_t to int). Enforcing this will currently need to be left to static analysis or future use of -Wconversion. Additionally update the overflow unit tests to force runtime evaluation for the pathological cases. Cc: Rasmus Villemoes Cc: Gustavo A. R. Silva Cc: Nathan Chancellor Cc: Jason Gunthorpe Cc: Nick Desaulniers Cc: Leon Romanovsky Cc: Keith Busch Cc: Len Baker Signed-off-by: Kees Cook --- include/linux/overflow.h | 110 +++++++++++++++++++++++++++++------------------ 1 file changed, 69 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 4669632bd72b..59d7228104d0 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -118,81 +118,94 @@ static inline bool __must_check __must_check_overflow(bool overflow) })) /** - * array_size() - Calculate size of 2-dimensional array. - * - * @a: dimension one - * @b: dimension two + * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX * - * Calculates size of 2-dimensional array: @a * @b. + * @factor1: first factor + * @factor2: second factor * - * Returns: number of bytes needed to represent the array or SIZE_MAX on - * overflow. + * Returns: calculate @factor1 * @factor2, both promoted to size_t, + * with any overflow causing the return value to be SIZE_MAX. The + * lvalue must be size_t to avoid implicit type conversion. */ -static inline __must_check size_t array_size(size_t a, size_t b) +static inline size_t __must_check size_mul(size_t factor1, size_t factor2) { size_t bytes; - if (check_mul_overflow(a, b, &bytes)) + if (check_mul_overflow(factor1, factor2, &bytes)) return SIZE_MAX; return bytes; } /** - * array3_size() - Calculate size of 3-dimensional array. + * size_add() - Calculate size_t addition with saturation at SIZE_MAX * - * @a: dimension one - * @b: dimension two - * @c: dimension three - * - * Calculates size of 3-dimensional array: @a * @b * @c. + * @addend1: first addend + * @addend2: second addend * - * Returns: number of bytes needed to represent the array or SIZE_MAX on - * overflow. + * Returns: calculate @addend1 + @addend2, both promoted to size_t, + * with any overflow causing the return value to be SIZE_MAX. The + * lvalue must be size_t to avoid implicit type conversion. */ -static inline __must_check size_t array3_size(size_t a, size_t b, size_t c) +static inline size_t __must_check size_add(size_t addend1, size_t addend2) { size_t bytes; - if (check_mul_overflow(a, b, &bytes)) - return SIZE_MAX; - if (check_mul_overflow(bytes, c, &bytes)) + if (check_add_overflow(addend1, addend2, &bytes)) return SIZE_MAX; return bytes; } -/* - * Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for - * struct_size() below. +/** + * size_sub() - Calculate size_t subtraction with saturation at SIZE_MAX + * + * @minuend: value to subtract from + * @subtrahend: value to subtract from @minuend + * + * Returns: calculate @minuend - @subtrahend, both promoted to size_t, + * with any overflow causing the return value to be SIZE_MAX. For + * composition with the size_add() and size_mul() helpers, neither + * argument may be SIZE_MAX (or the result with be forced to SIZE_MAX). + * The lvalue must be size_t to avoid implicit type conversion. */ -static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c) +static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) { size_t bytes; - if (check_mul_overflow(a, b, &bytes)) - return SIZE_MAX; - if (check_add_overflow(bytes, c, &bytes)) + if (minuend == SIZE_MAX || subtrahend == SIZE_MAX || + check_sub_overflow(minuend, subtrahend, &bytes)) return SIZE_MAX; return bytes; } /** - * struct_size() - Calculate size of structure with trailing array. - * @p: Pointer to the structure. - * @member: Name of the array member. - * @count: Number of elements in the array. + * array_size() - Calculate size of 2-dimensional array. * - * Calculates size of memory needed for structure @p followed by an - * array of @count number of @member elements. + * @a: dimension one + * @b: dimension two * - * Return: number of bytes needed or SIZE_MAX on overflow. + * Calculates size of 2-dimensional array: @a * @b. + * + * Returns: number of bytes needed to represent the array or SIZE_MAX on + * overflow. */ -#define struct_size(p, member, count) \ - __ab_c_size(count, \ - sizeof(*(p)->member) + __must_be_array((p)->member),\ - sizeof(*(p))) +#define array_size(a, b) size_mul(a, b) + +/** + * array3_size() - Calculate size of 3-dimensional array. + * + * @a: dimension one + * @b: dimension two + * @c: dimension three + * + * Calculates size of 3-dimensional array: @a * @b * @c. + * + * Returns: number of bytes needed to represent the array or SIZE_MAX on + * overflow. + */ +#define array3_size(a, b, c) size_mul(size_mul(a, b), c) /** * flex_array_size() - Calculate size of a flexible array member @@ -208,7 +221,22 @@ static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c) * Return: number of bytes needed or SIZE_MAX on overflow. */ #define flex_array_size(p, member, count) \ - array_size(count, \ - sizeof(*(p)->member) + __must_be_array((p)->member)) + size_mul(count, \ + sizeof(*(p)->member) + __must_be_array((p)->member)) + +/** + * struct_size() - Calculate size of structure with trailing flexible array. + * + * @p: Pointer to the structure. + * @member: Name of the array member. + * @count: Number of elements in the array. + * + * Calculates size of memory needed for structure @p followed by an + * array of @count number of @member elements. + * + * Return: number of bytes needed or SIZE_MAX on overflow. + */ +#define struct_size(p, member, count) \ + size_add(sizeof(*(p)), flex_array_size(p, member, count)) #endif /* __LINUX_OVERFLOW_H */ -- cgit v1.2.3 From 79aa0367385ceaf5351ea77ea1fb66136739ea9d Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 15 Feb 2022 18:54:38 -0500 Subject: drm/amdkfd: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reference: https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays CC: Changcheng Deng Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 6e4268f5e482..baec5a41de3e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -673,7 +673,7 @@ struct kfd_ioctl_svm_args { __u32 op; __u32 nattr; /* Variable length array of attributes */ - struct kfd_ioctl_svm_attribute attrs[0]; + struct kfd_ioctl_svm_attribute attrs[]; }; /** -- cgit v1.2.3 From a65dbf7cded724a5ed4a5e1a718616b048ca0c34 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 23 Dec 2021 09:35:37 +0800 Subject: drm/amdgpu/gfx10: Add GC 10.3.7 Support Needed to properly initialize GC 10.3.7. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 76b580d10a52..55fa660e35f4 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1151,6 +1151,7 @@ struct drm_amdgpu_info_video_caps { #define AMDGPU_FAMILY_NV 143 /* Navi10 */ #define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ #define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ +#define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #if defined(__cplusplus) } -- cgit v1.2.3 From 230f6fa2c1db6a3f3e668cfe95995ac8e6eee212 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 9 Feb 2022 16:40:41 -0800 Subject: overflow: Provide constant expression struct_size There have been cases where struct_size() (or flex_array_size()) needs to be calculated for an initializer, which requires it be a constant expression. This is possible when the "count" argument is a constant expression, so provide this ability for the helpers. Cc: Gustavo A. R. Silva Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Rasmus Villemoes Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Tested-by: Gustavo A. R. Silva Link: https://lore.kernel.org/lkml/20220210010407.GA701603@embeddedor --- include/linux/overflow.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 59d7228104d0..f1221d11f8e5 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -4,6 +4,7 @@ #include #include +#include /* * We need to compute the minimum and maximum values representable in a given @@ -221,8 +222,9 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Return: number of bytes needed or SIZE_MAX on overflow. */ #define flex_array_size(p, member, count) \ - size_mul(count, \ - sizeof(*(p)->member) + __must_be_array((p)->member)) + __builtin_choose_expr(__is_constexpr(count), \ + (count) * sizeof(*(p)->member) + __must_be_array((p)->member), \ + size_mul(count, sizeof(*(p)->member) + __must_be_array((p)->member))) /** * struct_size() - Calculate size of structure with trailing flexible array. @@ -237,6 +239,8 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Return: number of bytes needed or SIZE_MAX on overflow. */ #define struct_size(p, member, count) \ - size_add(sizeof(*(p)), flex_array_size(p, member, count)) + __builtin_choose_expr(__is_constexpr(count), \ + sizeof(*(p)) + flex_array_size(p, member, count), \ + size_add(sizeof(*(p)), flex_array_size(p, member, count))) #endif /* __LINUX_OVERFLOW_H */ -- cgit v1.2.3 From 28db4711bf48303814dcfd8d41a41106e90bc374 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Feb 2022 11:05:38 +0100 Subject: blk-mq: remove the request_queue argument to blk_insert_cloned_request The request must be submitted to the queue it was allocated for, so remove the extra request_queue argument. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220215100540.3892965-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d319ffa59354..3a41d50b85d3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -952,8 +952,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio_set *bs, gfp_t gfp_mask, int (*bio_ctr)(struct bio *, struct bio *, void *), void *data); void blk_rq_unprep_clone(struct request *rq); -blk_status_t blk_insert_cloned_request(struct request_queue *q, - struct request *rq); +blk_status_t blk_insert_cloned_request(struct request *rq); struct rq_map_data { struct page **pages; -- cgit v1.2.3 From 76792055c4c8b2472ca1ae48e0ddaf8497529f08 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Feb 2022 10:45:10 +0100 Subject: block: add a ->free_disk method Add a method to notify the driver that the gendisk is about to be freed. This allows drivers to tie the lifetime of their private data to that of the gendisk and thus deal with device removal races without expensive synchronization and boilerplate code. A new flag is added so that ->free_disk is only called after a successful call to add_disk, which significantly simplifies the error handling path during probing. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220215094514.3828912-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3bfc75a2a450..f757f9c2871f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -146,6 +146,7 @@ struct gendisk { #define GD_READ_ONLY 1 #define GD_DEAD 2 #define GD_NATIVE_CAPACITY 3 +#define GD_ADDED 4 struct mutex open_mutex; /* open/close mutex */ unsigned open_partitions; /* number of open partitions */ @@ -1464,6 +1465,7 @@ struct block_device_operations { void (*unlock_native_capacity) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); int (*set_read_only)(struct block_device *bdev, bool ro); + void (*free_disk)(struct gendisk *disk); /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, -- cgit v1.2.3 From 0b0dff5b3b98c5c7ce848151df9da0b3cdf0cc8b Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 15 Feb 2022 11:00:37 -0500 Subject: ipv6: per-netns exclusive flowlabel checks Ipv6 flowlabels historically require a reservation before use. Optionally in exclusive mode (e.g., user-private). Commit 59c820b2317f ("ipv6: elide flowlabel check if no exclusive leases exist") introduced a fastpath that avoids this check when no exclusive leases exist in the system, and thus any flowlabel use will be granted. That allows skipping the control operation to reserve a flowlabel entirely. Though with a warning if the fast path fails: This is an optimization. Robust applications still have to revert to requesting leases if the fast path fails due to an exclusive lease. Still, this is subtle. Better isolate network namespaces from each other. Flowlabels are per-netns. Also record per-netns whether exclusive leases are in use. Then behavior does not change based on activity in other netns. Changes v2 - wrap in IS_ENABLED(CONFIG_IPV6) to avoid breakage if disabled Fixes: 59c820b2317f ("ipv6: elide flowlabel check if no exclusive leases exist") Link: https://lore.kernel.org/netdev/MWHPR2201MB1072BCCCFCE779E4094837ACD0329@MWHPR2201MB1072.namprd22.prod.outlook.com/ Reported-by: Congyu Liu Signed-off-by: Willem de Bruijn Tested-by: Congyu Liu Link: https://lore.kernel.org/r/20220215160037.1976072-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 5 ++++- include/net/netns/ipv6.h | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3afcb128e064..92eec13d1693 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -393,17 +393,20 @@ static inline void txopt_put(struct ipv6_txoptions *opt) kfree_rcu(opt, rcu); } +#if IS_ENABLED(CONFIG_IPV6) struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label); extern struct static_key_false_deferred ipv6_flowlabel_exclusive; static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label) { - if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key)) + if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key) && + READ_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl)) return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT); return NULL; } +#endif struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ip6_flowlabel *fl, diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index a4b550380316..6bd7e5a85ce7 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -77,9 +77,10 @@ struct netns_ipv6 { spinlock_t fib6_gc_lock; unsigned int ip6_rt_gc_expire; unsigned long ip6_rt_last_gc; + unsigned char flowlabel_has_excl; #ifdef CONFIG_IPV6_MULTIPLE_TABLES - unsigned int fib6_rules_require_fldissect; bool fib6_has_custom_rules; + unsigned int fib6_rules_require_fldissect; #ifdef CONFIG_IPV6_SUBTREES unsigned int fib6_routes_require_src; #endif -- cgit v1.2.3 From f8e9ce4a6e85067d7d7cfa89167f5ce5f0ec2a8a Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 15 Feb 2022 18:11:24 -0800 Subject: mptcp: mptcp_parse_option is no longer exported Options parsing in now done from mptcp_incoming_options(). mptcp_parse_option() has been removed from mptcp.h when CONFIG_MPTCP is defined but not when it is not. Fixes: cfde141ea3fa ("mptcp: move option parsing into mptcp_incoming_options()") Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/net/mptcp.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/mptcp.h b/include/net/mptcp.h index a925349b4b89..0a3b0fb04a3b 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -217,12 +217,6 @@ static inline bool rsk_drop_req(const struct request_sock *req) return false; } -static inline void mptcp_parse_option(const struct sk_buff *skb, - const unsigned char *ptr, int opsize, - struct tcp_options_received *opt_rx) -{ -} - static inline bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, unsigned int *size, struct mptcp_out_options *opts) -- cgit v1.2.3 From 72113d0a7d90d950c7c9a87ab905bffb6bc5752d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:24 +0900 Subject: signal.h: add linux/signal.h and asm/signal.h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/signal.h and asm/signal.h are currently excluded from the UAPI compile-test because of the errors like follows: HDRTEST usr/include/asm/signal.h In file included from : ./usr/include/asm/signal.h:103:9: error: unknown type name ‘size_t’ 103 | size_t ss_size; | ^~~~~~ The errors can be fixed by replacing size_t with __kernel_size_t. Then, remove the no-header-test entries from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Reviewed-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/signal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/asm-generic/signal.h b/include/uapi/asm-generic/signal.h index f634822906e4..0eb69dc8e572 100644 --- a/include/uapi/asm-generic/signal.h +++ b/include/uapi/asm-generic/signal.h @@ -85,7 +85,7 @@ struct sigaction { typedef struct sigaltstack { void __user *ss_sp; int ss_flags; - size_t ss_size; + __kernel_size_t ss_size; } stack_t; #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 4a3233c1a69885aa7e71c48ff39ae11c212ac90a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:25 +0900 Subject: shmbuf.h: add asm/shmbuf.h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit asm/shmbuf.h is currently excluded from the UAPI compile-test because of the errors like follows: HDRTEST usr/include/asm/shmbuf.h In file included from ./usr/include/asm/shmbuf.h:6, from : ./usr/include/asm-generic/shmbuf.h:26:33: error: field ‘shm_perm’ has incomplete type 26 | struct ipc64_perm shm_perm; /* operation perms */ | ^~~~~~~~ ./usr/include/asm-generic/shmbuf.h:27:9: error: unknown type name ‘size_t’ 27 | size_t shm_segsz; /* size of segment (bytes) */ | ^~~~~~ ./usr/include/asm-generic/shmbuf.h:40:9: error: unknown type name ‘__kernel_pid_t’ 40 | __kernel_pid_t shm_cpid; /* pid of creator */ | ^~~~~~~~~~~~~~ ./usr/include/asm-generic/shmbuf.h:41:9: error: unknown type name ‘__kernel_pid_t’ 41 | __kernel_pid_t shm_lpid; /* pid of last operator */ | ^~~~~~~~~~~~~~ The errors can be fixed by replacing size_t with __kernel_size_t and by including proper headers. Then, remove the no-header-test entry from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/shmbuf.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/asm-generic/shmbuf.h b/include/uapi/asm-generic/shmbuf.h index 2bab955e0fed..2979b6dd2c56 100644 --- a/include/uapi/asm-generic/shmbuf.h +++ b/include/uapi/asm-generic/shmbuf.h @@ -3,6 +3,8 @@ #define __ASM_GENERIC_SHMBUF_H #include +#include +#include /* * The shmid64_ds structure for x86 architecture. @@ -24,7 +26,7 @@ struct shmid64_ds { struct ipc64_perm shm_perm; /* operation perms */ - size_t shm_segsz; /* size of segment (bytes) */ + __kernel_size_t shm_segsz; /* size of segment (bytes) */ #if __BITS_PER_LONG == 64 long shm_atime; /* last attach time */ long shm_dtime; /* last detach time */ -- cgit v1.2.3 From 169adc2b6b3c5e86391921117b4ab3aaeb3c6ee1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:26 +0900 Subject: android/binder.h: add linux/android/binder(fs).h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/android/binder.h and linux/android/binderfs.h are currently excluded from the UAPI compile-test because of the errors like follows: HDRTEST usr/include/linux/android/binder.h In file included from : ./usr/include/linux/android/binder.h:291:9: error: unknown type name ‘pid_t’ 291 | pid_t sender_pid; | ^~~~~ ./usr/include/linux/android/binder.h:292:9: error: unknown type name ‘uid_t’ 292 | uid_t sender_euid; | ^~~~~ The errors can be fixed by replacing {pid,uid}_t with __kernel_{pid,uid}_t. Then, remove the no-header-test entries from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- include/uapi/linux/android/binder.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 3246f2c74696..11157fae8a8e 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -288,8 +288,8 @@ struct binder_transaction_data { /* General information about the transaction. */ __u32 flags; - pid_t sender_pid; - uid_t sender_euid; + __kernel_pid_t sender_pid; + __kernel_uid_t sender_euid; binder_size_t data_size; /* number of bytes of data */ binder_size_t offsets_size; /* number of bytes of offsets */ -- cgit v1.2.3 From cbf2820341297b9aed0f846aba35556e94569210 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:27 +0900 Subject: fsmap.h: add linux/fsmap.h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/fsmap.h is currently excluded from the UAPI compile-test because of the error like follows: HDRTEST usr/include/linux/fsmap.h In file included from : ./usr/include/linux/fsmap.h:72:19: error: unknown type name ‘size_t’ 72 | static __inline__ size_t | ^~~~~~ The error can be fixed by replacing size_t with __kernel_size_t. Then, remove the no-header-test entry from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- include/uapi/linux/fsmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/fsmap.h b/include/uapi/linux/fsmap.h index 91fd519a3f7d..c690d17f1d07 100644 --- a/include/uapi/linux/fsmap.h +++ b/include/uapi/linux/fsmap.h @@ -69,7 +69,7 @@ struct fsmap_head { }; /* Size of an fsmap_head with room for nr records. */ -static inline size_t +static inline __kernel_size_t fsmap_sizeof( unsigned int nr) { -- cgit v1.2.3 From 8b4bca21c2c0cb3b5adb80985830a81d4e5d7081 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:28 +0900 Subject: kexec.h: add linux/kexec.h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/kexec.h is currently excluded from the UAPI compile-test because of the errors like follows: HDRTEST usr/include/linux/kexec.h In file included from : ./usr/include/linux/kexec.h:56:9: error: unknown type name ‘size_t’ 56 | size_t bufsz; | ^~~~~~ ./usr/include/linux/kexec.h:58:9: error: unknown type name ‘size_t’ 58 | size_t memsz; | ^~~~~~ The errors can be fixed by replacing size_t with __kernel_size_t. Then, remove the no-header-test entry from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- include/uapi/linux/kexec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 778dc191c265..fb7e2ef60825 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -54,9 +54,9 @@ */ struct kexec_segment { const void *buf; - size_t bufsz; + __kernel_size_t bufsz; const void *mem; - size_t memsz; + __kernel_size_t memsz; }; #endif /* __KERNEL__ */ -- cgit v1.2.3 From 2a5c0fdc70cd653741e910e92ffeb2fa7376db07 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:29 +0900 Subject: reiserfs_xattr.h: add linux/reiserfs_xattr.h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/reiserfs_xattr.h is currently excluded from the UAPI compile-test because of the error like follows: HDRTEST usr/include/linux/reiserfs_xattr.h In file included from : ./usr/include/linux/reiserfs_xattr.h:22:9: error: unknown type name ‘size_t’ 22 | size_t length; | ^~~~~~ The error can be fixed by replacing size_t with __kernel_size_t. Then, remove the no-header-test entry from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- include/uapi/linux/reiserfs_xattr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/reiserfs_xattr.h b/include/uapi/linux/reiserfs_xattr.h index 28f10842f047..503ad018ce5b 100644 --- a/include/uapi/linux/reiserfs_xattr.h +++ b/include/uapi/linux/reiserfs_xattr.h @@ -19,7 +19,7 @@ struct reiserfs_xattr_header { struct reiserfs_security_handle { const char *name; void *value; - size_t length; + __kernel_size_t length; }; #endif /* _LINUX_REISERFS_XATTR_H */ -- cgit v1.2.3 From 6a7bdd89f50d399dd02847e6f398d408b086df50 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 3 Jan 2022 17:24:11 +0100 Subject: media: v4l2-mediabus: Use structures to describe bus configuration The media bus configuration is specified through a set of flags, some of which being mutually exclusive. This doesn't scale to express more complex configurations. Improve the API by replacing the single flags field in v4l2_mbus_config by a union of v4l2_mbus_config_* structures. The flags themselves are still used in those structures, so they are kept here. Drivers are however updated to use structure fields instead of flags when already possible. Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-mediabus.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h index 9c4970fbd8ea..269aaf57ba32 100644 --- a/include/media/v4l2-mediabus.h +++ b/include/media/v4l2-mediabus.h @@ -166,12 +166,26 @@ enum v4l2_mbus_type { /** * struct v4l2_mbus_config - media bus configuration - * @type: in: interface type - * @flags: in / out: configuration flags, depending on @type + * @type: interface type + * @bus: bus configuration data structure + * @bus.parallel: embedded &struct v4l2_mbus_config_parallel. + * Used if the bus is parallel or BT.656. + * @bus.mipi_csi1: embedded &struct v4l2_mbus_config_mipi_csi1. + * Used if the bus is MIPI Alliance's Camera Serial + * Interface version 1 (MIPI CSI1) or Standard + * Mobile Imaging Architecture's Compact Camera Port 2 + * (SMIA CCP2). + * @bus.mipi_csi2: embedded &struct v4l2_mbus_config_mipi_csi2. + * Used if the bus is MIPI Alliance's Camera Serial + * Interface version 2 (MIPI CSI2). */ struct v4l2_mbus_config { enum v4l2_mbus_type type; - unsigned int flags; + union { + struct v4l2_mbus_config_parallel parallel; + struct v4l2_mbus_config_mipi_csi1 mipi_csi1; + struct v4l2_mbus_config_mipi_csi2 mipi_csi2; + } bus; }; /** -- cgit v1.2.3 From 20c238dfb26c23dc493e5fcfeea682af8b33375a Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 3 Jan 2022 17:24:12 +0100 Subject: media: v4l2-mediabus: Drop legacy V4L2_MBUS_CSI2_*_LANE flags The V4L2_MBUS_CSI2_*_LANE flags are a legacy API and are unused. Drop them. Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-mediabus.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h index 269aaf57ba32..44195ceeccca 100644 --- a/include/media/v4l2-mediabus.h +++ b/include/media/v4l2-mediabus.h @@ -21,8 +21,6 @@ * clear both the V4L2_MBUS_HSYNC_ACTIVE_HIGH and the * V4L2_MBUS_HSYNC_ACTIVE_LOW flag at the same time. Instead either flag * V4L2_MBUS_HSYNC_ACTIVE_HIGH or flag V4L2_MBUS_HSYNC_ACTIVE_LOW shall be set. - * The same is true for the V4L2_MBUS_CSI2_1/2/3/4_LANE flags group: only one - * of these four bits shall be set. * * TODO: replace the existing V4L2_MBUS_* flags with structures of fields * to avoid conflicting settings. @@ -69,11 +67,6 @@ #define V4L2_MBUS_DATA_ENABLE_LOW BIT(15) /* Serial flags */ -/* CSI-2 D-PHY number of data lanes. */ -#define V4L2_MBUS_CSI2_1_LANE BIT(0) -#define V4L2_MBUS_CSI2_2_LANE BIT(1) -#define V4L2_MBUS_CSI2_3_LANE BIT(2) -#define V4L2_MBUS_CSI2_4_LANE BIT(3) /* CSI-2 Virtual Channel identifiers. */ #define V4L2_MBUS_CSI2_CHANNEL_0 BIT(4) #define V4L2_MBUS_CSI2_CHANNEL_1 BIT(5) @@ -83,10 +76,6 @@ #define V4L2_MBUS_CSI2_CONTINUOUS_CLOCK BIT(8) #define V4L2_MBUS_CSI2_NONCONTINUOUS_CLOCK BIT(9) -#define V4L2_MBUS_CSI2_LANES (V4L2_MBUS_CSI2_1_LANE | \ - V4L2_MBUS_CSI2_2_LANE | \ - V4L2_MBUS_CSI2_3_LANE | \ - V4L2_MBUS_CSI2_4_LANE) #define V4L2_MBUS_CSI2_CHANNELS (V4L2_MBUS_CSI2_CHANNEL_0 | \ V4L2_MBUS_CSI2_CHANNEL_1 | \ V4L2_MBUS_CSI2_CHANNEL_2 | \ -- cgit v1.2.3 From 5a6ac3f4b46fd86d13e1d8ab4a55e4d89f3ab400 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 3 Jan 2022 17:24:13 +0100 Subject: media: v4l2-mediabus: Drop legacy V4L2_MBUS_CSI2_CHANNEL_* flags The V4L2_MBUS_CSI2_CHANNEL_* flags are a legacy API. Only V4L2_MBUS_CSI2_CHANNEL_0 is used, set in a single driver, and never read. Drop those flags. Virtual channel information should be conveyed through frame descriptors instead. Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-mediabus.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h index 44195ceeccca..c6626a22b394 100644 --- a/include/media/v4l2-mediabus.h +++ b/include/media/v4l2-mediabus.h @@ -67,20 +67,10 @@ #define V4L2_MBUS_DATA_ENABLE_LOW BIT(15) /* Serial flags */ -/* CSI-2 Virtual Channel identifiers. */ -#define V4L2_MBUS_CSI2_CHANNEL_0 BIT(4) -#define V4L2_MBUS_CSI2_CHANNEL_1 BIT(5) -#define V4L2_MBUS_CSI2_CHANNEL_2 BIT(6) -#define V4L2_MBUS_CSI2_CHANNEL_3 BIT(7) /* Clock non-continuous mode support. */ #define V4L2_MBUS_CSI2_CONTINUOUS_CLOCK BIT(8) #define V4L2_MBUS_CSI2_NONCONTINUOUS_CLOCK BIT(9) -#define V4L2_MBUS_CSI2_CHANNELS (V4L2_MBUS_CSI2_CHANNEL_0 | \ - V4L2_MBUS_CSI2_CHANNEL_1 | \ - V4L2_MBUS_CSI2_CHANNEL_2 | \ - V4L2_MBUS_CSI2_CHANNEL_3) - #define V4L2_MBUS_CSI2_MAX_DATA_LANES 8 /** -- cgit v1.2.3 From b9f7caa7753ad185e0dc7afb3ae4bd423d11f5c0 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 5 Jan 2022 21:15:58 +0100 Subject: media: v4l2-mediabus: Drop V4L2_MBUS_CSI2_CONTINUOUS_CLOCK flag MIPI CSI-2 continuous and non-continuous clock modes are mutually exclusive. Drop the V4L2_MBUS_CSI2_CONTINUOUS_CLOCK flag and use V4L2_MBUS_CSI2_NONCONTINUOUS_CLOCK only. Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-mediabus.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h index c6626a22b394..e0db3bcff9ed 100644 --- a/include/media/v4l2-mediabus.h +++ b/include/media/v4l2-mediabus.h @@ -68,8 +68,7 @@ /* Serial flags */ /* Clock non-continuous mode support. */ -#define V4L2_MBUS_CSI2_CONTINUOUS_CLOCK BIT(8) -#define V4L2_MBUS_CSI2_NONCONTINUOUS_CLOCK BIT(9) +#define V4L2_MBUS_CSI2_NONCONTINUOUS_CLOCK BIT(0) #define V4L2_MBUS_CSI2_MAX_DATA_LANES 8 -- cgit v1.2.3 From d4568fc8525897e683983806f813be1ae9eedaed Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 Jan 2022 18:29:52 +0100 Subject: media: omap3isp: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Wrap the target region in struct_group(). This additionally fixes a theoretical misalignment of the copy (since the size of "buf" changes between 64-bit and 32-bit, but this is likely never built for 64-bit). FWIW, I think this code is totally broken on 64-bit (which appears to not be a "real" build configuration): it would either always fail (with an uninitialized data->buf_size) or would cause corruption in userspace due to the copy_to_user() in the call path against an uninitialized data->buf value: omap3isp_stat_request_statistics_time32(...) struct omap3isp_stat_data data64; ... omap3isp_stat_request_statistics(stat, &data64); int omap3isp_stat_request_statistics(struct ispstat *stat, struct omap3isp_stat_data *data) ... buf = isp_stat_buf_get(stat, data); static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat, struct omap3isp_stat_data *data) ... if (buf->buf_size > data->buf_size) { ... return ERR_PTR(-EINVAL); } ... rval = copy_to_user(data->buf, buf->virt_addr, buf->buf_size); Regardless, additionally initialize data64 to be zero-filled to avoid undefined behavior. Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor Cc: Arnd Bergmann Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data") Cc: stable@vger.kernel.org Reviewed-by: Gustavo A. R. Silva Signed-off-by: Kees Cook Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/omap3isp.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h index 87b55755f4ff..d9db7ad43890 100644 --- a/include/uapi/linux/omap3isp.h +++ b/include/uapi/linux/omap3isp.h @@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { * struct omap3isp_stat_data - Statistic data sent to or received from user * @ts: Timestamp of returned framestats. * @buf: Pointer to pass to user. + * @buf_size: Size of buffer. * @frame_number: Frame number of requested stats. * @cur_frame: Current frame number being processed. * @config_counter: Number of the configuration associated with the data. @@ -176,10 +177,12 @@ struct omap3isp_stat_data { struct timeval ts; #endif void __user *buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #ifdef __KERNEL__ @@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { __s32 tv_usec; } ts; __u32 buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #endif -- cgit v1.2.3 From 5cadbd897221431ea7f2c50510ed63042de9285f Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 23 Jan 2022 16:36:19 +0100 Subject: media: Define MIPI CSI-2 data types in a shared header file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are many CSI-2-related drivers in the media subsystem that come with their own macros to handle the CSI-2 data types (or just hardcode the numerical values). Provide a shared header with definitions for those data types that driver can use. Signed-off-by: Laurent Pinchart Reviewed-by: Niklas Söderlund Reviewed-by: Kieran Bingham Signed-off-by: Mauro Carvalho Chehab --- include/media/mipi-csi2.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 include/media/mipi-csi2.h (limited to 'include') diff --git a/include/media/mipi-csi2.h b/include/media/mipi-csi2.h new file mode 100644 index 000000000000..392794e5badd --- /dev/null +++ b/include/media/mipi-csi2.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * MIPI CSI-2 Data Types + * + * Copyright (C) 2022 Laurent Pinchart + */ + +#ifndef _MEDIA_MIPI_CSI2_H +#define _MEDIA_MIPI_CSI2_H + +/* Short packet data types */ +#define MIPI_CSI2_DT_FS 0x00 +#define MIPI_CSI2_DT_FE 0x01 +#define MIPI_CSI2_DT_LS 0x02 +#define MIPI_CSI2_DT_LE 0x03 +#define MIPI_CSI2_DT_GENERIC_SHORT(n) (0x08 + (n)) /* 0..7 */ + +/* Long packet data types */ +#define MIPI_CSI2_DT_NULL 0x10 +#define MIPI_CSI2_DT_BLANKING 0x11 +#define MIPI_CSI2_DT_EMBEDDED_8B 0x12 +#define MIPI_CSI2_DT_YUV420_8B 0x18 +#define MIPI_CSI2_DT_YUV420_10B 0x19 +#define MIPI_CSI2_DT_YUV420_8B_LEGACY 0x1a +#define MIPI_CSI2_DT_YUV420_8B_CS 0x1c +#define MIPI_CSI2_DT_YUV420_10B_CS 0x1d +#define MIPI_CSI2_DT_YUV422_8B 0x1e +#define MIPI_CSI2_DT_YUV422_10B 0x1f +#define MIPI_CSI2_DT_RGB444 0x20 +#define MIPI_CSI2_DT_RGB555 0x21 +#define MIPI_CSI2_DT_RGB565 0x22 +#define MIPI_CSI2_DT_RGB666 0x23 +#define MIPI_CSI2_DT_RGB888 0x24 +#define MIPI_CSI2_DT_RAW24 0x27 +#define MIPI_CSI2_DT_RAW6 0x28 +#define MIPI_CSI2_DT_RAW7 0x29 +#define MIPI_CSI2_DT_RAW8 0x2a +#define MIPI_CSI2_DT_RAW10 0x2b +#define MIPI_CSI2_DT_RAW12 0x2c +#define MIPI_CSI2_DT_RAW14 0x2d +#define MIPI_CSI2_DT_RAW16 0x2e +#define MIPI_CSI2_DT_RAW20 0x2f +#define MIPI_CSI2_DT_USER_DEFINED(n) (0x30 + (n)) /* 0..7 */ + +#endif /* _MEDIA_MIPI_CSI2_H */ -- cgit v1.2.3 From 5224f79096170bf7b92cc8fe42a12f44b91e5f62 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 14 Feb 2022 19:11:44 -0600 Subject: treewide: Replace zero-length arrays with flexible-array members MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. This code was transformed with the help of Coccinelle: (next-20220214$ spatch --jobs $(getconf _NPROCESSORS_ONLN) --sp-file script.cocci --include-headers --dir . > output.patch) @@ identifier S, member, array; type T1, T2; @@ struct S { ... T1 member; T2 array[ - 0 ]; }; UAPI and wireless changes were intentionally excluded from this patch and will be sent out separately. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.16/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/78 Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- include/asm-generic/tlb.h | 4 ++-- include/linux/greybus/greybus_manifest.h | 4 ++-- include/linux/greybus/hd.h | 2 +- include/linux/greybus/module.h | 2 +- include/linux/i3c/ccc.h | 6 +++--- include/linux/platform_data/brcmfmac.h | 2 +- include/linux/platform_data/cros_ec_commands.h | 2 +- include/net/bluetooth/mgmt.h | 2 +- include/net/ioam6.h | 2 +- include/sound/sof/channel_map.h | 4 ++-- 10 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 2c68a545ffa7..fd7feb5c7894 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -180,7 +180,7 @@ struct mmu_table_batch { struct rcu_head rcu; #endif unsigned int nr; - void *tables[0]; + void *tables[]; }; #define MAX_TABLE_BATCH \ @@ -227,7 +227,7 @@ struct mmu_gather_batch { struct mmu_gather_batch *next; unsigned int nr; unsigned int max; - struct page *pages[0]; + struct page *pages[]; }; #define MAX_GATHER_BATCH \ diff --git a/include/linux/greybus/greybus_manifest.h b/include/linux/greybus/greybus_manifest.h index 6e62fe478712..bef9eb2093e9 100644 --- a/include/linux/greybus/greybus_manifest.h +++ b/include/linux/greybus/greybus_manifest.h @@ -100,7 +100,7 @@ enum { struct greybus_descriptor_string { __u8 length; __u8 id; - __u8 string[0]; + __u8 string[]; } __packed; /* @@ -175,7 +175,7 @@ struct greybus_manifest_header { struct greybus_manifest { struct greybus_manifest_header header; - struct greybus_descriptor descriptors[0]; + struct greybus_descriptor descriptors[]; } __packed; #endif /* __GREYBUS_MANIFEST_H */ diff --git a/include/linux/greybus/hd.h b/include/linux/greybus/hd.h index d3faf0c1a569..718e2857054e 100644 --- a/include/linux/greybus/hd.h +++ b/include/linux/greybus/hd.h @@ -58,7 +58,7 @@ struct gb_host_device { struct gb_svc *svc; /* Private data for the host driver */ - unsigned long hd_priv[0] __aligned(sizeof(s64)); + unsigned long hd_priv[] __aligned(sizeof(s64)); }; #define to_gb_host_device(d) container_of(d, struct gb_host_device, dev) diff --git a/include/linux/greybus/module.h b/include/linux/greybus/module.h index 47b839af145d..3efe2133acfd 100644 --- a/include/linux/greybus/module.h +++ b/include/linux/greybus/module.h @@ -23,7 +23,7 @@ struct gb_module { bool disconnected; - struct gb_interface *interfaces[0]; + struct gb_interface *interfaces[]; }; #define to_gb_module(d) container_of(d, struct gb_module, dev) diff --git a/include/linux/i3c/ccc.h b/include/linux/i3c/ccc.h index 73b0982cc519..ad59a4ae60d1 100644 --- a/include/linux/i3c/ccc.h +++ b/include/linux/i3c/ccc.h @@ -132,7 +132,7 @@ struct i3c_ccc_dev_desc { struct i3c_ccc_defslvs { u8 count; struct i3c_ccc_dev_desc master; - struct i3c_ccc_dev_desc slaves[0]; + struct i3c_ccc_dev_desc slaves[]; } __packed; /** @@ -240,7 +240,7 @@ struct i3c_ccc_bridged_slave_desc { */ struct i3c_ccc_setbrgtgt { u8 count; - struct i3c_ccc_bridged_slave_desc bslaves[0]; + struct i3c_ccc_bridged_slave_desc bslaves[]; } __packed; /** @@ -318,7 +318,7 @@ enum i3c_ccc_setxtime_subcmd { */ struct i3c_ccc_setxtime { u8 subcmd; - u8 data[0]; + u8 data[]; } __packed; #define I3C_CCC_GETXTIME_SYNC_MODE BIT(0) diff --git a/include/linux/platform_data/brcmfmac.h b/include/linux/platform_data/brcmfmac.h index 2b5676ff35be..f922a192fe58 100644 --- a/include/linux/platform_data/brcmfmac.h +++ b/include/linux/platform_data/brcmfmac.h @@ -178,7 +178,7 @@ struct brcmfmac_platform_data { void (*power_off)(void); char *fw_alternative_path; int device_count; - struct brcmfmac_pd_device devices[0]; + struct brcmfmac_pd_device devices[]; }; diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 271bd87bff0a..728735aed980 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -5644,7 +5644,7 @@ struct ec_response_typec_discovery { uint8_t svid_count; /* Number of SVIDs partner sent */ uint16_t reserved; uint32_t discovery_vdo[6]; /* Max VDOs allowed after VDM header is 6 */ - struct svid_mode_info svids[0]; + struct svid_mode_info svids[]; } __ec_align1; /* USB Type-C commands for AP-controlled device policy. */ diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 107b25deae68..9607ec289fd0 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -696,7 +696,7 @@ struct mgmt_cp_set_blocked_keys { #define MGMT_READ_CONTROLLER_CAP_SIZE 0 struct mgmt_rp_read_controller_cap { __le16 cap_len; - __u8 cap[0]; + __u8 cap[]; } __packed; #define MGMT_OP_READ_EXP_FEATURES_INFO 0x0049 diff --git a/include/net/ioam6.h b/include/net/ioam6.h index 3f45ba37a2c6..781d2d8b2f29 100644 --- a/include/net/ioam6.h +++ b/include/net/ioam6.h @@ -35,7 +35,7 @@ struct ioam6_schema { int len; __be32 hdr; - u8 data[0]; + u8 data[]; }; struct ioam6_pernet_data { diff --git a/include/sound/sof/channel_map.h b/include/sound/sof/channel_map.h index fd3a30fcf756..d363f0ca6979 100644 --- a/include/sound/sof/channel_map.h +++ b/include/sound/sof/channel_map.h @@ -39,7 +39,7 @@ struct sof_ipc_channel_map { uint32_t ext_id; uint32_t ch_mask; uint32_t reserved; - int32_t ch_coeffs[0]; + int32_t ch_coeffs[]; } __packed; /** @@ -55,7 +55,7 @@ struct sof_ipc_stream_map { struct sof_ipc_cmd_hdr hdr; uint32_t num_ch_map; uint32_t reserved[3]; - struct sof_ipc_channel_map ch_map[0]; + struct sof_ipc_channel_map ch_map[]; } __packed; #endif /* __IPC_CHANNEL_MAP_H__ */ -- cgit v1.2.3 From c518afec288351347dbe05ea3d49d18fb9a9fff1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 16 Feb 2022 16:30:05 +0200 Subject: net: mscc: ocelot: consolidate cookie allocation for private VCAP rules Every use case that needed VCAP filters (in order: DSA tag_8021q, MRP, PTP traps) has hardcoded filter identifiers that worked well enough for that use case alone. But when two or more of those use cases would be used together, some of those identifiers would overlap, leading to breakage. Add definitions for each cookie and centralize them in ocelot_vcap.h, such that the overlaps are more obvious. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot_vcap.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index 709cbc198fd2..562bcd972132 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -8,6 +8,22 @@ #include +/* Cookie definitions for private VCAP filters installed by the driver. + * Must be unique per VCAP block. + */ +#define OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port) (port) +#define OCELOT_VCAP_IS1_TAG_8021Q_TXVLAN(ocelot, port) (port) +#define OCELOT_VCAP_IS1_TAG_8021Q_PTP_MMIO(ocelot) ((ocelot)->num_phys_ports) +#define OCELOT_VCAP_IS2_TAG_8021Q_TXVLAN(ocelot, port) (port) +#define OCELOT_VCAP_IS2_TAG_8021Q_PTP_MMIO(ocelot) ((ocelot)->num_phys_ports) +#define OCELOT_VCAP_IS2_L2_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports + 1) +#define OCELOT_VCAP_IS2_IPV4_GEN_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports + 2) +#define OCELOT_VCAP_IS2_IPV4_EV_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports + 3) +#define OCELOT_VCAP_IS2_IPV6_GEN_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports + 4) +#define OCELOT_VCAP_IS2_IPV6_EV_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports + 5) +#define OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port) (port) +#define OCELOT_VCAP_IS2_MRP_TRAP(ocelot, port) ((ocelot)->num_phys_ports + (port)) + /* ================================================================= * VCAP Common * ================================================================= -- cgit v1.2.3 From 36fac35b29072e345d5fc485cf7841be265181b1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 16 Feb 2022 16:30:06 +0200 Subject: net: mscc: ocelot: delete OCELOT_MRP_CPUQ MRP frames are configured to be trapped to the CPU queue 7, and this number is reflected in the extraction header. However, the information isn't used anywhere, so just leave MRP frames to go to CPU queue 0 unless needed otherwise. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index cacb103e4bad..2d7456c0e77d 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -105,8 +105,6 @@ #define REG_RESERVED_ADDR 0xffffffff #define REG_RESERVED(reg) REG(reg, REG_RESERVED_ADDR) -#define OCELOT_MRP_CPUQ 7 - enum ocelot_target { ANA = 1, QS, -- cgit v1.2.3 From b9bace6e534d431871a9d69cbd06d3a798f5086d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 16 Feb 2022 16:30:07 +0200 Subject: net: mscc: ocelot: use a single VCAP filter for all MRP traps The MRP assist code installs a VCAP IS2 trapping rule for each port, but since the key and the action is the same, just the ingress port mask differs, there isn't any need to do this. We can save some space in the TCAM by using a single filter and adjusting the ingress port mask. Reuse the ocelot_trap_add() and ocelot_trap_del() functions for this purpose. Now that the cookies are no longer per port, we need to change the allocation scheme such that MRP traps use a fixed number. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot_vcap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index 562bcd972132..14ada097db0b 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -21,8 +21,8 @@ #define OCELOT_VCAP_IS2_IPV4_EV_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports + 3) #define OCELOT_VCAP_IS2_IPV6_GEN_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports + 4) #define OCELOT_VCAP_IS2_IPV6_EV_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports + 5) +#define OCELOT_VCAP_IS2_MRP_TRAP(ocelot) ((ocelot)->num_phys_ports + 6) #define OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port) (port) -#define OCELOT_VCAP_IS2_MRP_TRAP(ocelot, port) ((ocelot)->num_phys_ports + (port)) /* ================================================================= * VCAP Common -- cgit v1.2.3 From 85ea0daabe5abc6add440f86565b7b9f6aff5535 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 16 Feb 2022 16:30:08 +0200 Subject: net: mscc: ocelot: avoid overlap in VCAP IS2 between PTP and MRP traps OCELOT_VCAP_IS2_TAG_8021Q_TXVLAN overlaps with OCELOT_VCAP_IS2_MRP_REDIRECT. To avoid this, make OCELOT_VCAP_IS2_MRP_REDIRECT take the cookie region from N to 2 * N - 1 (where N is ocelot->num_phys_ports). To avoid any risk that the singleton (not per port) VCAP IS2 filters overlap with per-port VCAP IS2 filters, we must ensure that the number of singleton filters is smaller than the number of physical ports. This is true right now, but may change in the future as switches with less ports get supported, or more singleton filters get added. So to be future-proof, let's move the singleton filters at the end of the range, where they won't overlap with anything to their right. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot_vcap.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index 14ada097db0b..ae0eec7f5dd2 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -15,14 +15,14 @@ #define OCELOT_VCAP_IS1_TAG_8021Q_TXVLAN(ocelot, port) (port) #define OCELOT_VCAP_IS1_TAG_8021Q_PTP_MMIO(ocelot) ((ocelot)->num_phys_ports) #define OCELOT_VCAP_IS2_TAG_8021Q_TXVLAN(ocelot, port) (port) -#define OCELOT_VCAP_IS2_TAG_8021Q_PTP_MMIO(ocelot) ((ocelot)->num_phys_ports) -#define OCELOT_VCAP_IS2_L2_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports + 1) -#define OCELOT_VCAP_IS2_IPV4_GEN_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports + 2) -#define OCELOT_VCAP_IS2_IPV4_EV_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports + 3) -#define OCELOT_VCAP_IS2_IPV6_GEN_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports + 4) -#define OCELOT_VCAP_IS2_IPV6_EV_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports + 5) -#define OCELOT_VCAP_IS2_MRP_TRAP(ocelot) ((ocelot)->num_phys_ports + 6) -#define OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port) (port) +#define OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port) ((ocelot)->num_phys_ports + (port)) +#define OCELOT_VCAP_IS2_TAG_8021Q_PTP_MMIO(ocelot) ((ocelot)->num_phys_ports * 2) +#define OCELOT_VCAP_IS2_L2_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2 + 1) +#define OCELOT_VCAP_IS2_IPV4_GEN_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2 + 2) +#define OCELOT_VCAP_IS2_IPV4_EV_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2 + 3) +#define OCELOT_VCAP_IS2_IPV6_GEN_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2 + 4) +#define OCELOT_VCAP_IS2_IPV6_EV_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2 + 5) +#define OCELOT_VCAP_IS2_MRP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2 + 6) /* ================================================================= * VCAP Common -- cgit v1.2.3 From e42bd4ed09aaf57949f199ae02d20a7108ccf73b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 16 Feb 2022 16:30:10 +0200 Subject: net: mscc: ocelot: keep traps in a list When using the ocelot-8021q tagging protocol, the CPU port isn't configured as an NPI port, but is a regular port. So a "trap to CPU" operation is actually a "redirect" operation. So DSA needs to set up the trapping action one way or another, depending on the tagging protocol in use. To ease DSA's work of modifying the action, keep all currently installed traps in a list, so that DSA can live-patch them when the tagging protocol changes. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 1 + include/soc/mscc/ocelot_vcap.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 2d7456c0e77d..78f56502bc09 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -689,6 +689,7 @@ struct ocelot { u8 base_mac[ETH_ALEN]; struct list_head vlans; + struct list_head traps; /* Switches like VSC9959 have flooding per traffic class */ int num_flooding_pgids; diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index ae0eec7f5dd2..69b3d880302d 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -682,6 +682,7 @@ struct ocelot_vcap_id { struct ocelot_vcap_filter { struct list_head list; + struct list_head trap_list; enum ocelot_vcap_filter_type type; int block_id; -- cgit v1.2.3 From 9d75b8818537fe64e6eae033765b9dc1b3107c15 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 16 Feb 2022 16:30:11 +0200 Subject: net: mscc: ocelot: annotate which traps need PTP timestamping The ocelot switch library does not need this information, but the felix DSA driver does. As a reminder, the VSC9959 switch in LS1028A doesn't have an IRQ line for packet extraction, so to be notified that a PTP packet needs to be dequeued, it receives that packet also over Ethernet, by setting up a packet trap. The Felix driver needs to install special kinds of traps for packets in need of RX timestamps, such that the packets are replicated both over Ethernet and over the CPU port module. But the Ocelot switch library sets up more than one trap for PTP event messages; it also traps PTP general messages, MRP control messages etc. Those packets don't need PTP timestamps, so there's no reason for the Felix driver to send them to the CPU port module. By knowing which traps need PTP timestamps, the Felix driver can adjust the traps installed using ocelot_trap_add() such that only those will actually get delivered to the CPU port module. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot_vcap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index 69b3d880302d..50af64e2ca3c 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -695,6 +695,7 @@ struct ocelot_vcap_filter { struct ocelot_vcap_action action; struct ocelot_vcap_stats stats; /* For VCAP IS1 and IS2 */ + bool take_ts; unsigned long ingress_port_mask; /* For VCAP ES0 */ struct ocelot_vcap_port ingress_port; -- cgit v1.2.3 From 9934800436552d2a4af4aaca62d779b33d1f6a63 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 16 Feb 2022 16:30:13 +0200 Subject: net: dsa: felix: update destinations of existing traps with ocelot-8021q Historically, the felix DSA driver has installed special traps such that PTP over L2 works with the ocelot-8021q tagging protocol; commit 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping") has the details. Then the ocelot switch library also gained more comprehensive support for PTP traps through commit 96ca08c05838 ("net: mscc: ocelot: set up traps for PTP packets"). Right now, PTP over L2 works using ocelot-8021q via the traps it has set for itself, but nothing else does. Consolidating the two code blocks would make ocelot-8021q gain support for PTP over L4 and tc-flower traps, and at the same time avoid some code and TCAM duplication. The traps are similar in intent, but different in execution, so some explanation is required. The traps set up by felix_setup_mmio_filtering() are VCAP IS1 filters, which have a PAG that chains them to a VCAP IS2 filter, and the IS2 is where the 'trap' action resides. The traps set up by ocelot_trap_add(), on the other hand, have a single filter, in VCAP IS2. The reason for chaining VCAP IS1 and IS2 in Felix was to ensure that the hardcoded traps take precedence and cannot be overridden by the Ocelot switch library. So in principle, the PTP traps needed for ocelot-8021q in the Felix driver can rely on ocelot_trap_add(), but the filters need to be patched to account for a quirk that LS1028A has: the quirk_no_xtr_irq described in commit 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping"). Live-patching is done by iterating through the trap list every time we know it has been updated, and transforming a trap into a redirect + CPU copy if ocelot-8021q is in use. Making the DSA ocelot-8021q tagger work with the Ocelot traps means we can eliminate the dedicated OCELOT_VCAP_IS1_TAG_8021Q_PTP_MMIO and OCELOT_VCAP_IS2_TAG_8021Q_PTP_MMIO cookies. To minimize the patch delta, OCELOT_VCAP_IS2_MRP_TRAP takes the place of OCELOT_VCAP_IS2_TAG_8021Q_PTP_MMIO (the alternative would have been to left-shift all cookie numbers by 1). Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot_vcap.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index 50af64e2ca3c..deb2ad9eb0a5 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -13,16 +13,14 @@ */ #define OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port) (port) #define OCELOT_VCAP_IS1_TAG_8021Q_TXVLAN(ocelot, port) (port) -#define OCELOT_VCAP_IS1_TAG_8021Q_PTP_MMIO(ocelot) ((ocelot)->num_phys_ports) #define OCELOT_VCAP_IS2_TAG_8021Q_TXVLAN(ocelot, port) (port) #define OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port) ((ocelot)->num_phys_ports + (port)) -#define OCELOT_VCAP_IS2_TAG_8021Q_PTP_MMIO(ocelot) ((ocelot)->num_phys_ports * 2) +#define OCELOT_VCAP_IS2_MRP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2) #define OCELOT_VCAP_IS2_L2_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2 + 1) #define OCELOT_VCAP_IS2_IPV4_GEN_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2 + 2) #define OCELOT_VCAP_IS2_IPV4_EV_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2 + 3) #define OCELOT_VCAP_IS2_IPV6_GEN_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2 + 4) #define OCELOT_VCAP_IS2_IPV6_EV_PTP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2 + 5) -#define OCELOT_VCAP_IS2_MRP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2 + 6) /* ================================================================= * VCAP Common -- cgit v1.2.3 From 40efe139ff605d80cc829ddf8c50c71d20399bf8 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Thu, 27 Jan 2022 12:57:32 +0530 Subject: dt-bindings: Add Tegra234 APE support Add clocks, power-domain and memory bindings to support APE subsystem on Tegra234. Signed-off-by: Sameer Pujar Signed-off-by: Thierry Reding --- include/dt-bindings/clock/tegra234-clock.h | 74 +++++++++++++++++++++++++- include/dt-bindings/memory/tegra234-mc.h | 7 +++ include/dt-bindings/power/tegra234-powergate.h | 9 ++++ 3 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 include/dt-bindings/power/tegra234-powergate.h (limited to 'include') diff --git a/include/dt-bindings/clock/tegra234-clock.h b/include/dt-bindings/clock/tegra234-clock.h index 2529e7ec0bf4..5c201a8278ff 100644 --- a/include/dt-bindings/clock/tegra234-clock.h +++ b/include/dt-bindings/clock/tegra234-clock.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. */ +/* Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved. */ #ifndef DT_BINDINGS_CLOCK_TEGRA234_CLOCK_H #define DT_BINDINGS_CLOCK_TEGRA234_CLOCK_H @@ -9,6 +9,26 @@ * @defgroup bpmp_clock_ids Clock ID's * @{ */ +/** output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_AHUB */ +#define TEGRA234_CLK_AHUB 4U +/** @brief output of gate CLK_ENB_APB2APE */ +#define TEGRA234_CLK_APB2APE 5U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_APE */ +#define TEGRA234_CLK_APE 6U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_AUD_MCLK */ +#define TEGRA234_CLK_AUD_MCLK 7U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_DMIC1 */ +#define TEGRA234_CLK_DMIC1 15U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_DMIC2 */ +#define TEGRA234_CLK_DMIC2 16U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_DMIC3 */ +#define TEGRA234_CLK_DMIC3 17U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_DMIC4 */ +#define TEGRA234_CLK_DMIC4 18U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_DSPK1 */ +#define TEGRA234_CLK_DSPK1 29U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_DSPK2 */ +#define TEGRA234_CLK_DSPK2 30U /** * @brief controls the EMC clock frequency. * @details Doing a clk_set_rate on this clock will select the @@ -36,8 +56,36 @@ #define TEGRA234_CLK_I2C8 54U /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2C9 */ #define TEGRA234_CLK_I2C9 55U +/** output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2S1 */ +#define TEGRA234_CLK_I2S1 56U +/** @brief clock recovered from I2S1 input */ +#define TEGRA234_CLK_I2S1_SYNC_INPUT 57U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2S2 */ +#define TEGRA234_CLK_I2S2 58U +/** @brief clock recovered from I2S2 input */ +#define TEGRA234_CLK_I2S2_SYNC_INPUT 59U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2S3 */ +#define TEGRA234_CLK_I2S3 60U +/** @brief clock recovered from I2S3 input */ +#define TEGRA234_CLK_I2S3_SYNC_INPUT 61U +/** output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2S4 */ +#define TEGRA234_CLK_I2S4 62U +/** @brief clock recovered from I2S4 input */ +#define TEGRA234_CLK_I2S4_SYNC_INPUT 63U +/** output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2S5 */ +#define TEGRA234_CLK_I2S5 64U +/** @brief clock recovered from I2S5 input */ +#define TEGRA234_CLK_I2S5_SYNC_INPUT 65U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_I2S6 */ +#define TEGRA234_CLK_I2S6 66U +/** @brief clock recovered from I2S6 input */ +#define TEGRA234_CLK_I2S6_SYNC_INPUT 67U +/** PLL controlled by CLK_RST_CONTROLLER_PLLA_BASE for use by audio clocks */ +#define TEGRA234_CLK_PLLA 93U /** @brief PLLP clk output */ #define TEGRA234_CLK_PLLP_OUT0 102U +/** @brief output of the divider CLK_RST_CONTROLLER_PLLA_OUT */ +#define TEGRA234_CLK_PLLA_OUT0 104U /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_PWM1 */ #define TEGRA234_CLK_PWM1 105U /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_PWM2 */ @@ -56,6 +104,30 @@ #define TEGRA234_CLK_PWM8 112U /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_SDMMC4 */ #define TEGRA234_CLK_SDMMC4 123U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_DMIC1 */ +#define TEGRA234_CLK_SYNC_DMIC1 139U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_DMIC2 */ +#define TEGRA234_CLK_SYNC_DMIC2 140U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_DMIC3 */ +#define TEGRA234_CLK_SYNC_DMIC3 141U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_DMIC4 */ +#define TEGRA234_CLK_SYNC_DMIC4 142U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_DSPK1 */ +#define TEGRA234_CLK_SYNC_DSPK1 143U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_DSPK2 */ +#define TEGRA234_CLK_SYNC_DSPK2 144U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_I2S1 */ +#define TEGRA234_CLK_SYNC_I2S1 145U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_I2S2 */ +#define TEGRA234_CLK_SYNC_I2S2 146U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_I2S3 */ +#define TEGRA234_CLK_SYNC_I2S3 147U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_I2S4 */ +#define TEGRA234_CLK_SYNC_I2S4 148U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_I2S5 */ +#define TEGRA234_CLK_SYNC_I2S5 149U +/** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_I2S6 */ +#define TEGRA234_CLK_SYNC_I2S6 150U /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_UARTA */ #define TEGRA234_CLK_UARTA 155U /** @brief CLK_RST_CONTROLLER_CLK_SOURCE_SDMMC_LEGACY_TM switch divider output */ diff --git a/include/dt-bindings/memory/tegra234-mc.h b/include/dt-bindings/memory/tegra234-mc.h index 2662f70c15c6..42661dda1c53 100644 --- a/include/dt-bindings/memory/tegra234-mc.h +++ b/include/dt-bindings/memory/tegra234-mc.h @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved. */ #ifndef DT_BINDINGS_MEMORY_TEGRA234_MC_H #define DT_BINDINGS_MEMORY_TEGRA234_MC_H @@ -7,6 +8,8 @@ #define TEGRA234_SID_INVALID 0x00 #define TEGRA234_SID_PASSTHROUGH 0x7f +/* NISO0 stream IDs */ +#define TEGRA234_SID_APE 0x02 /* NISO1 stream IDs */ #define TEGRA234_SID_SDMMC4 0x02 @@ -28,5 +31,9 @@ #define TEGRA234_MEMORY_CLIENT_BPMPDMAR 0x95 /* BPMPDMA write client */ #define TEGRA234_MEMORY_CLIENT_BPMPDMAW 0x96 +/* APEDMA read client */ +#define TEGRA234_MEMORY_CLIENT_APEDMAR 0x9f +/* APEDMA write client */ +#define TEGRA234_MEMORY_CLIENT_APEDMAW 0xa0 #endif diff --git a/include/dt-bindings/power/tegra234-powergate.h b/include/dt-bindings/power/tegra234-powergate.h new file mode 100644 index 000000000000..8e28fcbdc038 --- /dev/null +++ b/include/dt-bindings/power/tegra234-powergate.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved. */ + +#ifndef __ABI_MACH_T234_POWERGATE_T234_H_ +#define __ABI_MACH_T234_POWERGATE_T234_H_ + +#define TEGRA234_POWER_DOMAIN_AUD 2U + +#endif -- cgit v1.2.3 From 7a5428dcb7902700b830e912feee4e845df7c019 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Feb 2022 08:52:31 +0100 Subject: block: fix surprise removal for drivers calling blk_set_queue_dying MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Various block drivers call blk_set_queue_dying to mark a disk as dead due to surprise removal events, but since commit 8e141f9eb803 that doesn't work given that the GD_DEAD flag needs to be set to stop I/O. Replace the driver calls to blk_set_queue_dying with a new (and properly documented) blk_mark_disk_dead API, and fold blk_set_queue_dying into the only remaining caller. Fixes: 8e141f9eb803 ("block: drain file system I/O on del_gendisk") Reported-by: Markus Blöchl Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Link: https://lore.kernel.org/r/20220217075231.1140-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f35aea98bc35..16b47035e4b0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -748,7 +748,8 @@ extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, bool __must_check blk_get_queue(struct request_queue *); extern void blk_put_queue(struct request_queue *); -extern void blk_set_queue_dying(struct request_queue *); + +void blk_mark_disk_dead(struct gendisk *disk); #ifdef CONFIG_BLOCK /* -- cgit v1.2.3 From 2e7dfb0e9cacad0f1adbc4b97f0b96ba35027f24 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Feb 2022 23:01:16 -0600 Subject: usb: typec: Factor out non-PD fwnode properties Basic programmable non-PD Type-C port controllers do not need the full TCPM library, but they share the same devicetree binding and the same typec_capability structure. Factor out a helper for parsing those properties which map to fields in struct typec_capability, so the code can be shared between TCPM and basic non-TCPM drivers. Reviewed-by: Heikki Krogerus Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20220214050118.61015-4-samuel@sholland.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 7ba45a97eeae..fdf737d48b3b 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -295,6 +295,9 @@ int typec_set_mode(struct typec_port *port, int mode); void *typec_get_drvdata(struct typec_port *port); +int typec_get_fw_cap(struct typec_capability *cap, + struct fwnode_handle *fwnode); + int typec_find_pwr_opmode(const char *name); int typec_find_orientation(const char *name); int typec_find_port_power_role(const char *name); -- cgit v1.2.3 From ebcbc6ea7d8a604ad8504dae70a6ac1b1e64a0b7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 14 Feb 2022 18:20:24 -0800 Subject: mm/munlock: delete page_mlock() and all its works We have recommended some applications to mlock their userspace, but that turns out to be counter-productive: when many processes mlock the same file, contention on rmap's i_mmap_rwsem can become intolerable at exit: it is needed for write, to remove any vma mapping that file from rmap's tree; but hogged for read by those with mlocks calling page_mlock() (formerly known as try_to_munlock()) on *each* page mapped from the file (the purpose being to find out whether another process has the page mlocked, so therefore it should not be unmlocked yet). Several optimizations have been made in the past: one is to skip page_mlock() when mapcount tells that nothing else has this page mapped; but that doesn't help at all when others do have it mapped. This time around, I initially intended to add a preliminary search of the rmap tree for overlapping VM_LOCKED ranges; but that gets messy with locking order, when in doubt whether a page is actually present; and risks adding even more contention on the i_mmap_rwsem. A solution would be much easier, if only there were space in struct page for an mlock_count... but actually, most of the time, there is space for it - an mlocked page spends most of its life on an unevictable LRU, but since 3.18 removed the scan_unevictable_pages sysctl, that "LRU" has been redundant. Let's try to reuse its page->lru. But leave that until a later patch: in this patch, clear the ground by removing page_mlock(), and all the infrastructure that has gathered around it - which mostly hinders understanding, and will make reviewing new additions harder. Don't mind those old comments about THPs, they date from before 4.5's refcounting rework: splitting is not a risk here. Just keep a minimal version of munlock_vma_page(), as reminder of what it should attend to (in particular, the odd way PGSTRANDED is counted out of PGMUNLOCKED), and likewise a stub for munlock_vma_pages_range(). Move unchanged __mlock_posix_error_return() out of the way, down to above its caller: this series then makes no further change after mlock_fixup(). After this and each following commit, the kernel builds, boots and runs; but with deficiencies which may show up in testing of mlock and munlock. The system calls succeed or fail as before, and mlock remains effective in preventing page reclaim; but meminfo's Unevictable and Mlocked amounts may be shown too low after mlock, grow, then stay too high after munlock: with previously mlocked pages remaining unevictable for too long, until finally unmapped and freed and counts corrected. Normal service will be resumed in "mm/munlock: mlock_pte_range() when mlocking or munlocking". Signed-off-by: Hugh Dickins Acked-by: Vlastimil Babka Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/rmap.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index e704b1a4c06c..dc48aa8c2c94 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -237,12 +237,6 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); */ int folio_mkclean(struct folio *); -/* - * called in munlock()/munmap() path to check for other vmas holding - * the page mlocked. - */ -void page_mlock(struct page *page); - void remove_migration_ptes(struct page *old, struct page *new, bool locked); /* -- cgit v1.2.3 From b67bf49ce7aae72f63739abee6ac25f64bf20081 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 14 Feb 2022 18:21:52 -0800 Subject: mm/munlock: delete FOLL_MLOCK and FOLL_POPULATE If counting page mlocks, we must not double-count: follow_page_pte() can tell if a page has already been Mlocked or not, but cannot tell if a pte has already been counted or not: that will have to be done when the pte is mapped in (which lru_cache_add_inactive_or_unevictable() already tracks for new anon pages, but there's no such tracking yet for others). Delete all the FOLL_MLOCK code - faulting in the missing pages will do all that is necessary, without special mlock_vma_page() calls from here. But then FOLL_POPULATE turns out to serve no purpose - it was there so that its absence would tell faultin_page() not to faultin page when setting up VM_LOCKONFAULT areas; but if there's no special work needed here for mlock, then there's no work at all here for VM_LOCKONFAULT. Have I got that right? I've not looked into the history, but see that FOLL_POPULATE goes back before VM_LOCKONFAULT: did it serve a different purpose before? Ah, yes, it was used to skip the old stack guard page. And is it intentional that COW is not broken on existing pages when setting up a VM_LOCKONFAULT area? I can see that being argued either way, and have no reason to disagree with current behaviour. Signed-off-by: Hugh Dickins Acked-by: Vlastimil Babka Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 213cc569b192..74ee50c2033b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2925,13 +2925,11 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO * and return without waiting upon it */ -#define FOLL_POPULATE 0x40 /* fault in pages (with FOLL_MLOCK) */ #define FOLL_NOFAULT 0x80 /* do not fault in pages */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ -#define FOLL_MLOCK 0x1000 /* lock present pages */ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ #define FOLL_COW 0x4000 /* internal GUP flag */ #define FOLL_ANON 0x8000 /* don't do file mappings */ -- cgit v1.2.3 From cea86fe246b694a191804b47378eb9d77aefabec Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 14 Feb 2022 18:26:39 -0800 Subject: mm/munlock: rmap call mlock_vma_page() munlock_vma_page() Add vma argument to mlock_vma_page() and munlock_vma_page(), make them inline functions which check (vma->vm_flags & VM_LOCKED) before calling mlock_page() and munlock_page() in mm/mlock.c. Add bool compound to mlock_vma_page() and munlock_vma_page(): this is because we have understandable difficulty in accounting pte maps of THPs, and if passed a PageHead page, mlock_page() and munlock_page() cannot tell whether it's a pmd map to be counted or a pte map to be ignored. Add vma arg to page_add_file_rmap() and page_remove_rmap(), like the others, and use that to call mlock_vma_page() at the end of the page adds, and munlock_vma_page() at the end of page_remove_rmap() (end or beginning? unimportant, but end was easier for assertions in testing). No page lock is required (although almost all adds happen to hold it): delete the "Serialize with page migration" BUG_ON(!PageLocked(page))s. Certainly page lock did serialize with page migration, but I'm having difficulty explaining why that was ever important. Mlock accounting on THPs has been hard to define, differed between anon and file, involved PageDoubleMap in some places and not others, required clear_page_mlock() at some points. Keep it simple now: just count the pmds and ignore the ptes, there is no reason for ptes to undo pmd mlocks. page_add_new_anon_rmap() callers unchanged: they have long been calling lru_cache_add_inactive_or_unevictable(), which does its own VM_LOCKED handling (it also checks for not VM_SPECIAL: I think that's overcautious, and inconsistent with other checks, that mmap_region() already prevents VM_LOCKED on VM_SPECIAL; but haven't quite convinced myself to change it). Signed-off-by: Hugh Dickins Acked-by: Vlastimil Babka Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/rmap.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index dc48aa8c2c94..ac29b076082b 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -167,18 +167,19 @@ struct anon_vma *page_get_anon_vma(struct page *page); */ void page_move_anon_rmap(struct page *, struct vm_area_struct *); void page_add_anon_rmap(struct page *, struct vm_area_struct *, - unsigned long, bool); + unsigned long address, bool compound); void do_page_add_anon_rmap(struct page *, struct vm_area_struct *, - unsigned long, int); + unsigned long address, int flags); void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, - unsigned long, bool); -void page_add_file_rmap(struct page *, bool); -void page_remove_rmap(struct page *, bool); - + unsigned long address, bool compound); +void page_add_file_rmap(struct page *, struct vm_area_struct *, + bool compound); +void page_remove_rmap(struct page *, struct vm_area_struct *, + bool compound); void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, - unsigned long); + unsigned long address); void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *, - unsigned long); + unsigned long address); static inline void page_dup_rmap(struct page *page, bool compound) { -- cgit v1.2.3 From 07ca760673088f262da57ff42c15558688565aa2 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 14 Feb 2022 18:29:54 -0800 Subject: mm/munlock: maintain page->mlock_count while unevictable Previous patches have been preparatory: now implement page->mlock_count. The ordering of the "Unevictable LRU" is of no significance, and there is no point holding unevictable pages on a list: place page->mlock_count to overlay page->lru.prev (since page->lru.next is overlaid by compound_head, which needs to be even so as not to satisfy PageTail - though 2 could be added instead of 1 for each mlock, if that's ever an improvement). But it's only safe to rely on or modify page->mlock_count while lruvec lock is held and page is on unevictable "LRU" - we can save lots of edits by continuing to pretend that there's an imaginary LRU here (there is an unevictable count which still needs to be maintained, but not a list). The mlock_count technique suffers from an unreliability much like with page_mlock(): while someone else has the page off LRU, not much can be done. As before, err on the safe side (behave as if mlock_count 0), and let try_to_unlock_one() move the page to unevictable if reclaim finds out later on - a few misplaced pages don't matter, what we want to avoid is imbalancing reclaim by flooding evictable lists with unevictable pages. I am not a fan of "if (!isolate_lru_page(page)) putback_lru_page(page);": if we have taken lruvec lock to get the page off its present list, then we save everyone trouble (and however many extra atomic ops) by putting it on its destination list immediately. Signed-off-by: Hugh Dickins Acked-by: Vlastimil Babka Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/mm_inline.h | 11 ++++++++--- include/linux/mm_types.h | 19 +++++++++++++++++-- 2 files changed, 25 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index b725839dfe71..884d6f6af05b 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -99,7 +99,8 @@ void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio) update_lru_size(lruvec, lru, folio_zonenum(folio), folio_nr_pages(folio)); - list_add(&folio->lru, &lruvec->lists[lru]); + if (lru != LRU_UNEVICTABLE) + list_add(&folio->lru, &lruvec->lists[lru]); } static __always_inline void add_page_to_lru_list(struct page *page, @@ -115,6 +116,7 @@ void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio) update_lru_size(lruvec, lru, folio_zonenum(folio), folio_nr_pages(folio)); + /* This is not expected to be used on LRU_UNEVICTABLE */ list_add_tail(&folio->lru, &lruvec->lists[lru]); } @@ -127,8 +129,11 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page, static __always_inline void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio) { - list_del(&folio->lru); - update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio), + enum lru_list lru = folio_lru_list(folio); + + if (lru != LRU_UNEVICTABLE) + list_del(&folio->lru); + update_lru_size(lruvec, lru, folio_zonenum(folio), -folio_nr_pages(folio)); } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5140e5feb486..475bdb282769 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -85,7 +85,16 @@ struct page { * lruvec->lru_lock. Sometimes used as a generic list * by the page owner. */ - struct list_head lru; + union { + struct list_head lru; + /* Or, for the Unevictable "LRU list" slot */ + struct { + /* Always even, to negate PageTail */ + void *__filler; + /* Count page's or folio's mlocks */ + unsigned int mlock_count; + }; + }; /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; pgoff_t index; /* Our offset within mapping. */ @@ -241,7 +250,13 @@ struct folio { struct { /* public: */ unsigned long flags; - struct list_head lru; + union { + struct list_head lru; + struct { + void *__filler; + unsigned int mlock_count; + }; + }; struct address_space *mapping; pgoff_t index; void *private; -- cgit v1.2.3 From 4fe6a63077a6d3c143d68f6b96e4051f1d0740ac Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Thu, 17 Feb 2022 14:27:55 +0100 Subject: ASoC: SOF: Replace zero-length array with flexible-array member There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use "flexible array members"[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. This helps with the ongoing efforts to globally enable -Warray-bounds and get us closer to being able to tighten the FORTIFY_SOURCE routines on memcpy(). [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/78 Link: https://github.com/KSPP/linux/issues/180 Suggested-by: Gustavo A. R. Silva Signed-off-by: Stephen Kitt Acked-by: Pierre-Louis Bossart Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20220217132755.1786130-1-steve@sk2.org Signed-off-by: Mark Brown --- include/sound/sof/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/sof/topology.h b/include/sound/sof/topology.h index d12736e14b69..adee6afd1490 100644 --- a/include/sound/sof/topology.h +++ b/include/sound/sof/topology.h @@ -237,7 +237,7 @@ struct sof_ipc_comp_process { /* reserved for future use */ uint32_t reserved[7]; - uint8_t data[0]; + uint8_t data[]; } __packed; /* frees components, buffers and pipelines -- cgit v1.2.3 From d95d6320ba7a51d61c097ffc3bcafcf70283414e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Feb 2022 09:32:17 -0800 Subject: ipv6: fix data-race in fib6_info_hw_flags_set / fib6_purge_rt Because fib6_info_hw_flags_set() is called without any synchronization, all accesses to gi6->offload, fi->trap and fi->offload_failed need some basic protection like READ_ONCE()/WRITE_ONCE(). BUG: KCSAN: data-race in fib6_info_hw_flags_set / fib6_purge_rt read to 0xffff8881087d5886 of 1 bytes by task 13953 on cpu 0: fib6_drop_pcpu_from net/ipv6/ip6_fib.c:1007 [inline] fib6_purge_rt+0x4f/0x580 net/ipv6/ip6_fib.c:1033 fib6_del_route net/ipv6/ip6_fib.c:1983 [inline] fib6_del+0x696/0x890 net/ipv6/ip6_fib.c:2028 __ip6_del_rt net/ipv6/route.c:3876 [inline] ip6_del_rt+0x83/0x140 net/ipv6/route.c:3891 __ipv6_dev_ac_dec+0x2b5/0x370 net/ipv6/anycast.c:374 ipv6_dev_ac_dec net/ipv6/anycast.c:387 [inline] __ipv6_sock_ac_close+0x141/0x200 net/ipv6/anycast.c:207 ipv6_sock_ac_close+0x79/0x90 net/ipv6/anycast.c:220 inet6_release+0x32/0x50 net/ipv6/af_inet6.c:476 __sock_release net/socket.c:650 [inline] sock_close+0x6c/0x150 net/socket.c:1318 __fput+0x295/0x520 fs/file_table.c:280 ____fput+0x11/0x20 fs/file_table.c:313 task_work_run+0x8e/0x110 kernel/task_work.c:164 tracehook_notify_resume include/linux/tracehook.h:189 [inline] exit_to_user_mode_loop kernel/entry/common.c:175 [inline] exit_to_user_mode_prepare+0x160/0x190 kernel/entry/common.c:207 __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline] syscall_exit_to_user_mode+0x20/0x40 kernel/entry/common.c:300 do_syscall_64+0x50/0xd0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x44/0xae write to 0xffff8881087d5886 of 1 bytes by task 1912 on cpu 1: fib6_info_hw_flags_set+0x155/0x3b0 net/ipv6/route.c:6230 nsim_fib6_rt_hw_flags_set drivers/net/netdevsim/fib.c:668 [inline] nsim_fib6_rt_add drivers/net/netdevsim/fib.c:691 [inline] nsim_fib6_rt_insert drivers/net/netdevsim/fib.c:756 [inline] nsim_fib6_event drivers/net/netdevsim/fib.c:853 [inline] nsim_fib_event drivers/net/netdevsim/fib.c:886 [inline] nsim_fib_event_work+0x284f/0x2cf0 drivers/net/netdevsim/fib.c:1477 process_one_work+0x3f6/0x960 kernel/workqueue.c:2307 worker_thread+0x616/0xa70 kernel/workqueue.c:2454 kthread+0x2c7/0x2e0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 value changed: 0x22 -> 0x2a Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 1912 Comm: kworker/1:3 Not tainted 5.16.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events nsim_fib_event_work Fixes: 0c5fcf9e249e ("IPv6: Add "offload failed" indication to routes") Fixes: bb3c4ab93e44 ("ipv6: Add "offload" and "trap" indications to routes") Signed-off-by: Eric Dumazet Cc: Amit Cohen Cc: Ido Schimmel Reported-by: syzbot Link: https://lore.kernel.org/r/20220216173217.3792411-2-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/ip6_fib.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 40ae8f1b18e5..2048bc8748cb 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -190,14 +190,16 @@ struct fib6_info { u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; + + u8 offload; + u8 trap; + u8 offload_failed; + u8 should_flush:1, dst_nocount:1, dst_nopolicy:1, fib6_destroying:1, - offload:1, - trap:1, - offload_failed:1, - unused:1; + unused:4; struct rcu_head rcu; struct nexthop *nh; -- cgit v1.2.3 From 874bfdfa4735cbb1b0d6e0c6157c712a312371a1 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Thu, 10 Feb 2022 14:42:09 -0500 Subject: drm/amdgpu: add gc 10.3.6 support this patch adds gc 10.3.6 support. Signed-off-by: Yifan Zhang Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 55fa660e35f4..1d65c1fbc4ec 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1151,6 +1151,7 @@ struct drm_amdgpu_info_video_caps { #define AMDGPU_FAMILY_NV 143 /* Navi10 */ #define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ #define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ +#define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ #define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #if defined(__cplusplus) -- cgit v1.2.3 From 904b10fb189cc15376e9bfce1ef0282e68b0b004 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 14 Feb 2022 12:41:08 +0100 Subject: PCI: Add defines for normal and subtractive PCI bridges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add these PCI class codes to pci_ids.h: PCI_CLASS_BRIDGE_PCI_NORMAL PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE Use these defines in all kernel code for describing PCI class codes for normal and subtractive PCI bridges. [bhelgaas: similar change in pci-mvebu.c] Link: https://lore.kernel.org/r/20220214114109.26809-1-pali@kernel.org Signed-off-by: Pali Rohár Signed-off-by: Bjorn Helgaas --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index aad54c666407..130949c3b486 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -60,6 +60,8 @@ #define PCI_CLASS_BRIDGE_EISA 0x0602 #define PCI_CLASS_BRIDGE_MC 0x0603 #define PCI_CLASS_BRIDGE_PCI 0x0604 +#define PCI_CLASS_BRIDGE_PCI_NORMAL 0x060400 +#define PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE 0x060401 #define PCI_CLASS_BRIDGE_PCMCIA 0x0605 #define PCI_CLASS_BRIDGE_NUBUS 0x0606 #define PCI_CLASS_BRIDGE_CARDBUS 0x0607 -- cgit v1.2.3 From d2b1d186ce2eac6b15d31db3e2750ee8e02bbe81 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 16 Feb 2022 21:37:26 +0200 Subject: net: dsa: delete unused exported symbols for ethtool PHY stats Introduced in commit cf963573039a ("net: dsa: Allow providing PHY statistics from CPU port"), it appears these were never used. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20220216193726.2926320-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 1456313a1faa..c8626dec970c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1257,9 +1257,6 @@ static inline bool dsa_slave_dev_check(const struct net_device *dev) #endif netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev); -int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data); -int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data); -int dsa_port_get_phy_sset_count(struct dsa_port *dp); void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up); struct dsa_tag_driver { -- cgit v1.2.3 From 091296d30917f6e63a9402974f546412dfb2a8e6 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sat, 5 Feb 2022 11:21:36 +0200 Subject: iwlwifi: mvm: refactor setting PPE thresholds in STA_HE_CTXT_CMD We are setting the PPE Thresholds in STA_HE_CTXT_CMD according to HE PHY Capabilities IE. As EHT is introduced, we will have to set this thresholds according to EHT PHY Capabilities IE if we're in an EHT connection. Some parts of the code can be used for both HE and EHT. Put this parts in functions which will be used in the patch which adds support for EHT PPE. Signed-off-by: Miri Korenblit Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20220205112029.48a508dfffef.If392e44d88f96ebed7fadf827e327194d4bd97b1@changeid Signed-off-by: Luca Coelho --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a4143466cb32..75d40acb60c1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2421,6 +2421,7 @@ ieee80211_he_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap) #define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK 0x78 #define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_POS (3) #define IEEE80211_PPE_THRES_INFO_PPET_SIZE (3) +#define IEEE80211_HE_PPE_THRES_INFO_HEADER_SIZE (7) /* * Calculate 802.11ax HE capabilities IE PPE field size -- cgit v1.2.3 From 8467fadc115cb08bb1cbc7885cb7b7ef1871cae4 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 17 Feb 2022 10:07:55 +0200 Subject: net: gro: Fix a 'directive in macro's argument list' sparse warning Following the cited commit, sparse started complaining about: ../include/net/gro.h:58:1: warning: directive in macro's argument list ../include/net/gro.h:59:1: warning: directive in macro's argument list Fix that by moving the defines out of the struct_group() macro. Fixes: de5a1f3ce4c8 ("net: gro: minor optimization for dev_gro_receive()") Reviewed-by: Maxim Mikityanskiy Signed-off-by: Gal Pressman Acked-by: Alexander Lobakin Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/gro.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/gro.h b/include/net/gro.h index a765fedda5c4..867656b0739c 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -35,6 +35,9 @@ struct napi_gro_cb { /* jiffies when first packet was created/queued */ unsigned long age; +/* Used in napi_gro_cb::free */ +#define NAPI_GRO_FREE 1 +#define NAPI_GRO_FREE_STOLEN_HEAD 2 /* portion of the cb set to zero at every gro iteration */ struct_group(zeroed, @@ -55,8 +58,6 @@ struct napi_gro_cb { /* Free the skb? */ u8 free:2; -#define NAPI_GRO_FREE 1 -#define NAPI_GRO_FREE_STOLEN_HEAD 2 /* Used in foo-over-udp, set in udp[46]_gro_receive */ u8 is_ipv6:1; -- cgit v1.2.3 From a1cdec57e03a1352e92fbbe7974039dda4efcec0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Feb 2022 09:05:02 -0800 Subject: net-timestamp: convert sk->sk_tskey to atomic_t UDP sendmsg() can be lockless, this is causing all kinds of data races. This patch converts sk->sk_tskey to remove one of these races. BUG: KCSAN: data-race in __ip_append_data / __ip_append_data read to 0xffff8881035d4b6c of 4 bytes by task 8877 on cpu 1: __ip_append_data+0x1c1/0x1de0 net/ipv4/ip_output.c:994 ip_make_skb+0x13f/0x2d0 net/ipv4/ip_output.c:1636 udp_sendmsg+0x12bd/0x14c0 net/ipv4/udp.c:1249 inet_sendmsg+0x5f/0x80 net/ipv4/af_inet.c:819 sock_sendmsg_nosec net/socket.c:705 [inline] sock_sendmsg net/socket.c:725 [inline] ____sys_sendmsg+0x39a/0x510 net/socket.c:2413 ___sys_sendmsg net/socket.c:2467 [inline] __sys_sendmmsg+0x267/0x4c0 net/socket.c:2553 __do_sys_sendmmsg net/socket.c:2582 [inline] __se_sys_sendmmsg net/socket.c:2579 [inline] __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2579 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae write to 0xffff8881035d4b6c of 4 bytes by task 8880 on cpu 0: __ip_append_data+0x1d8/0x1de0 net/ipv4/ip_output.c:994 ip_make_skb+0x13f/0x2d0 net/ipv4/ip_output.c:1636 udp_sendmsg+0x12bd/0x14c0 net/ipv4/udp.c:1249 inet_sendmsg+0x5f/0x80 net/ipv4/af_inet.c:819 sock_sendmsg_nosec net/socket.c:705 [inline] sock_sendmsg net/socket.c:725 [inline] ____sys_sendmsg+0x39a/0x510 net/socket.c:2413 ___sys_sendmsg net/socket.c:2467 [inline] __sys_sendmmsg+0x267/0x4c0 net/socket.c:2553 __do_sys_sendmmsg net/socket.c:2582 [inline] __se_sys_sendmmsg net/socket.c:2579 [inline] __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2579 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x0000054d -> 0x0000054e Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 8880 Comm: syz-executor.5 Not tainted 5.17.0-rc2-syzkaller-00167-gdcb85f85fa6f-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 09c2d251b707 ("net-timestamp: add key to disambiguate concurrent datagrams") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index ff9b508d9c5f..50aecd28b355 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -507,7 +507,7 @@ struct sock { #endif u16 sk_tsflags; u8 sk_shutdown; - u32 sk_tskey; + atomic_t sk_tskey; atomic_t sk_zckey; u8 sk_clockid; @@ -2667,7 +2667,7 @@ static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags, __sock_tx_timestamp(tsflags, tx_flags); if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey && tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) - *tskey = sk->sk_tskey++; + *tskey = atomic_inc_return(&sk->sk_tskey) - 1; } if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) *tx_flags |= SKBTX_WIFI_STATUS; -- cgit v1.2.3 From bde018222c6b084ac32933a9f933581dd83da18e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 17 Feb 2022 18:30:35 +0000 Subject: net: dsa: add support for phylink mac_select_pcs() Add DSA support for the phylink mac_select_pcs() method so DSA drivers can return provide phylink with the appropriate PCS for the PHY interface mode. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/net/dsa.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index c8626dec970c..bc6eef6af810 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -788,6 +788,9 @@ struct dsa_switch_ops { void (*phylink_validate)(struct dsa_switch *ds, int port, unsigned long *supported, struct phylink_link_state *state); + struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds, + int port, + phy_interface_t iface); int (*phylink_mac_link_state)(struct dsa_switch *ds, int port, struct phylink_link_state *state); void (*phylink_mac_config)(struct dsa_switch *ds, int port, -- cgit v1.2.3 From 6de74d1069b821e96460d0fc2edfc35785db04fb Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 9 Feb 2022 08:11:10 -0800 Subject: hv_utils: Add comment about max VMbus packet size in VSS driver The VSS driver allocates a VMbus receive buffer significantly larger than sizeof(hv_vss_msg), with no explanation. To help prevent future mistakes, add a #define and comment about why this is done. No functional change. Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1644423070-75125-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- include/uapi/linux/hyperv.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h index daf82a230c0e..aaa502a7bff4 100644 --- a/include/uapi/linux/hyperv.h +++ b/include/uapi/linux/hyperv.h @@ -90,6 +90,17 @@ struct hv_vss_check_dm_info { __u32 flags; } __attribute__((packed)); +/* + * struct hv_vss_msg encodes the fields that the Linux VSS + * driver accesses. However, FREEZE messages from Hyper-V contain + * additional LUN information that Linux doesn't use and are not + * represented in struct hv_vss_msg. A received FREEZE message may + * be as large as 6,260 bytes, so the driver must allocate at least + * that much space, not sizeof(struct hv_vss_msg). Other messages + * such as AUTO_RECOVER may be as large as 12,500 bytes. However, + * because the Linux VSS driver responds that it doesn't support + * auto-recovery, it should not receive such messages. + */ struct hv_vss_msg { union { struct hv_vss_hdr vss_hdr; -- cgit v1.2.3 From 988f0a9045b0058a43ccee764a671dfab81e6d15 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 10 Nov 2021 12:59:52 +0200 Subject: soc: fsl: Replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Signed-off-by: Andy Shevchenko Signed-off-by: Li Yang --- include/soc/fsl/dpaa2-fd.h | 3 ++- include/soc/fsl/qe/immap_qe.h | 3 ++- include/soc/fsl/qe/qe_tdm.h | 4 +++- include/soc/fsl/qe/ucc_fast.h | 2 +- include/soc/fsl/qe/ucc_slow.h | 2 +- 5 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/soc/fsl/dpaa2-fd.h b/include/soc/fsl/dpaa2-fd.h index 90ae8d191f1a..bae490cac0aa 100644 --- a/include/soc/fsl/dpaa2-fd.h +++ b/include/soc/fsl/dpaa2-fd.h @@ -7,7 +7,8 @@ #ifndef __FSL_DPAA2_FD_H #define __FSL_DPAA2_FD_H -#include +#include +#include /** * DOC: DPAA2 FD - Frame Descriptor APIs for DPAA2 diff --git a/include/soc/fsl/qe/immap_qe.h b/include/soc/fsl/qe/immap_qe.h index 7614fee532f1..edd601f53f5d 100644 --- a/include/soc/fsl/qe/immap_qe.h +++ b/include/soc/fsl/qe/immap_qe.h @@ -13,7 +13,8 @@ #define _ASM_POWERPC_IMMAP_QE_H #ifdef __KERNEL__ -#include +#include + #include #define QE_IMMAP_SIZE (1024 * 1024) /* 1MB from 1MB+IMMR */ diff --git a/include/soc/fsl/qe/qe_tdm.h b/include/soc/fsl/qe/qe_tdm.h index b6febe225071..43ea830cfe1f 100644 --- a/include/soc/fsl/qe/qe_tdm.h +++ b/include/soc/fsl/qe/qe_tdm.h @@ -10,8 +10,8 @@ #ifndef _QE_TDM_H_ #define _QE_TDM_H_ -#include #include +#include #include #include @@ -19,6 +19,8 @@ #include #include +struct device_node; + /* SI RAM entries */ #define SIR_LAST 0x0001 #define SIR_BYTE 0x0002 diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h index 9696a5b9b5d1..ad60b87a3c69 100644 --- a/include/soc/fsl/qe/ucc_fast.h +++ b/include/soc/fsl/qe/ucc_fast.h @@ -10,7 +10,7 @@ #ifndef __UCC_FAST_H__ #define __UCC_FAST_H__ -#include +#include #include #include diff --git a/include/soc/fsl/qe/ucc_slow.h b/include/soc/fsl/qe/ucc_slow.h index 11a216e4e919..7548ce8a202d 100644 --- a/include/soc/fsl/qe/ucc_slow.h +++ b/include/soc/fsl/qe/ucc_slow.h @@ -11,7 +11,7 @@ #ifndef __UCC_SLOW_H__ #define __UCC_SLOW_H__ -#include +#include #include #include -- cgit v1.2.3 From ec87cf3782f7b05ae15e061d36c763c35488f292 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Wed, 2 Feb 2022 23:58:21 +0300 Subject: ata: libata: make ata_host_suspend() *void* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ata_host_suspend() always returns 0, so the result checks in many drivers look pointless. Let's make this function return *void* instead of *int*. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Signed-off-by: Sergey Shtylyov Acked-by: Uwe Kleine-König Reviewed-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 605756f645be..a49e75c4206d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1080,7 +1080,7 @@ extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev, extern bool ata_link_online(struct ata_link *link); extern bool ata_link_offline(struct ata_link *link); #ifdef CONFIG_PM -extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg); +extern void ata_host_suspend(struct ata_host *host, pm_message_t mesg); extern void ata_host_resume(struct ata_host *host); extern void ata_sas_port_suspend(struct ata_port *ap); extern void ata_sas_port_resume(struct ata_port *ap); -- cgit v1.2.3 From 47f0bd5032106469827cf56c8b45bb9101112105 Mon Sep 17 00:00:00 2001 From: Jacques de Laval Date: Thu, 17 Feb 2022 16:02:02 +0100 Subject: net: Add new protocol attribute to IP addresses This patch adds a new protocol attribute to IPv4 and IPv6 addresses. Inspiration was taken from the protocol attribute of routes. User space applications like iproute2 can set/get the protocol with the Netlink API. The attribute is stored as an 8-bit unsigned integer. The protocol attribute is set by kernel for these categories: - IPv4 and IPv6 loopback addresses - IPv6 addresses generated from router announcements - IPv6 link local addresses User space may pass custom protocols, not defined by the kernel. Grouping addresses on their origin is useful in scenarios where you want to distinguish between addresses based on who added them, e.g. kernel vs. user space. Tagging addresses with a string label is an existing feature that could be used as a solution. Unfortunately the max length of a label is 15 characters, and for compatibility reasons the label must be prefixed with the name of the device followed by a colon. Since device names also have a max length of 15 characters, only -1 characters is guaranteed to be available for any origin tag, which is not that much. A reference implementation of user space setting and getting protocols is available for iproute2: https://github.com/westermo/iproute2/commit/9a6ea18bd79f47f293e5edc7780f315ea42ff540 Signed-off-by: Jacques de Laval Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220217150202.80802-1-Jacques.De.Laval@westermo.com Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 1 + include/net/addrconf.h | 2 ++ include/net/if_inet6.h | 2 ++ include/uapi/linux/if_addr.h | 9 ++++++++- 4 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 674aeead6260..ead323243e7b 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -150,6 +150,7 @@ struct in_ifaddr { __be32 ifa_broadcast; unsigned char ifa_scope; unsigned char ifa_prefixlen; + unsigned char ifa_proto; __u32 ifa_flags; char ifa_label[IFNAMSIZ]; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 59940e230b78..f7506f08e505 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -64,6 +64,8 @@ struct ifa6_config { const struct in6_addr *pfx; unsigned int plen; + u8 ifa_proto; + const struct in6_addr *peer_pfx; u32 rt_priority; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f026cf08a8e8..4cfdef6ca4f6 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -71,6 +71,8 @@ struct inet6_ifaddr { bool tokenized; + u8 ifa_proto; + struct rcu_head rcu; struct in6_addr peer_addr; }; diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h index dfcf3ce0097f..1c392dd95a5e 100644 --- a/include/uapi/linux/if_addr.h +++ b/include/uapi/linux/if_addr.h @@ -33,8 +33,9 @@ enum { IFA_CACHEINFO, IFA_MULTICAST, IFA_FLAGS, - IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ + IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ IFA_TARGET_NETNSID, + IFA_PROTO, /* u8, address protocol */ __IFA_MAX, }; @@ -69,4 +70,10 @@ struct ifa_cacheinfo { #define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg)) #endif +/* ifa_proto */ +#define IFAPROT_UNSPEC 0 +#define IFAPROT_KERNEL_LO 1 /* loopback */ +#define IFAPROT_KERNEL_RA 2 /* set by kernel from router announcement */ +#define IFAPROT_KERNEL_LL 3 /* link-local set by kernel */ + #endif -- cgit v1.2.3 From cb196b725936f6b776ad1d073f66fbe92aa798fa Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 18 Feb 2022 12:25:53 +0800 Subject: mctp: replace mctp_address_ok with more fine-grained helpers Currently, we have mctp_address_ok(), which checks if an EID is in the "valid" range of 8-254 inclusive. However, 0 and 255 may also be valid addresses, depending on context. 0 is the NULL EID, which may be set when physical addressing is used. 255 is valid as a destination address for broadcasts. This change renames mctp_address_ok to mctp_address_unicast, and adds similar helpers for broadcast and null EIDs, which will be used in an upcoming commit. Signed-off-by: Jeremy Kerr Signed-off-by: Jakub Kicinski --- include/net/mctp.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mctp.h b/include/net/mctp.h index e80a4baf8379..d37268fe6825 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -40,11 +40,21 @@ struct mctp_hdr { #define MCTP_INITIAL_DEFAULT_NET 1 -static inline bool mctp_address_ok(mctp_eid_t eid) +static inline bool mctp_address_unicast(mctp_eid_t eid) { return eid >= 8 && eid < 255; } +static inline bool mctp_address_broadcast(mctp_eid_t eid) +{ + return eid == 255; +} + +static inline bool mctp_address_null(mctp_eid_t eid) +{ + return eid == 0; +} + static inline bool mctp_address_matches(mctp_eid_t match, mctp_eid_t eid) { return match == eid || match == MCTP_ADDR_ANY; -- cgit v1.2.3 From b1e8206582f9d680cff7d04828708c8b6ab32957 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Feb 2022 10:16:57 +0100 Subject: sched: Fix yet more sched_fork() races Where commit 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid sched_task_group") fixed a fork race vs cgroup, it opened up a race vs syscalls by not placing the task on the runqueue before it gets exposed through the pidhash. Commit 13765de8148f ("sched/fair: Fix fault in reweight_entity") is trying to fix a single instance of this, instead fix the whole class of issues, effectively reverting this commit. Fixes: 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid sched_task_group") Reported-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Tested-by: Tadeusz Struk Tested-by: Zhang Qiao Tested-by: Dietmar Eggemann Link: https://lkml.kernel.org/r/YgoeCbwj5mbCR0qA@hirez.programming.kicks-ass.net --- include/linux/sched/task.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index b9198a1b3a84..e84e54d1b490 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -54,8 +54,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -extern void sched_post_fork(struct task_struct *p, - struct kernel_clone_args *kargs); +extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs); +extern void sched_post_fork(struct task_struct *p); extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); -- cgit v1.2.3 From 8a69fe0be143b0a1af829f85f0e9a1ae7d6a04db Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 14 Feb 2022 16:52:11 +0000 Subject: sched/preempt: Refactor sched_dynamic_update() Currently sched_dynamic_update needs to open-code the enabled/disabled function names for each preemption model it supports, when in practice this is a boolean enabled/disabled state for each function. Make this clearer and avoid repetition by defining the enabled/disabled states at the function definition, and using helper macros to perform the static_call_update(). Where x86 currently overrides the enabled function, it is made to provide both the enabled and disabled states for consistency, with defaults provided by the core code otherwise. In subsequent patches this will allow us to support PREEMPT_DYNAMIC without static calls. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Acked-by: Ard Biesheuvel Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20220214165216.2231574-3-mark.rutland@arm.com --- include/linux/entry-common.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 2e2b8d6140ed..a01ac1a0a292 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -456,6 +456,8 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); */ void irqentry_exit_cond_resched(void); #ifdef CONFIG_PREEMPT_DYNAMIC +#define irqentry_exit_cond_resched_dynamic_enabled irqentry_exit_cond_resched +#define irqentry_exit_cond_resched_dynamic_disabled NULL DECLARE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched); #endif -- cgit v1.2.3 From 4624a14f4daa8ab4578d274555fd8847254ce339 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 14 Feb 2022 16:52:12 +0000 Subject: sched/preempt: Simplify irqentry_exit_cond_resched() callers Currently callers of irqentry_exit_cond_resched() need to be aware of whether the function should be indirected via a static call, leading to ugly ifdeffery in callers. Save them the hassle with a static inline wrapper that does the right thing. The raw_irqentry_exit_cond_resched() will also be useful in subsequent patches which will add conditional wrappers for preemption functions. Note: in arch/x86/entry/common.c, xen_pv_evtchn_do_upcall() always calls irqentry_exit_cond_resched() directly, even when PREEMPT_DYNAMIC is in use. I believe this is a latent bug (which this patch corrects), but I'm not entirely certain this wasn't deliberate. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Acked-by: Ard Biesheuvel Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20220214165216.2231574-4-mark.rutland@arm.com --- include/linux/entry-common.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index a01ac1a0a292..dfd84c59b144 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -454,11 +454,14 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); * * Conditional reschedule with additional sanity checks. */ -void irqentry_exit_cond_resched(void); +void raw_irqentry_exit_cond_resched(void); #ifdef CONFIG_PREEMPT_DYNAMIC -#define irqentry_exit_cond_resched_dynamic_enabled irqentry_exit_cond_resched +#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched #define irqentry_exit_cond_resched_dynamic_disabled NULL -DECLARE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched); +DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); +#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() +#else +#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() #endif /** -- cgit v1.2.3 From 99cf983cc8bca4adb461b519664c939a565cfd4d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 14 Feb 2022 16:52:14 +0000 Subject: sched/preempt: Add PREEMPT_DYNAMIC using static keys Where an architecture selects HAVE_STATIC_CALL but not HAVE_STATIC_CALL_INLINE, each static call has an out-of-line trampoline which will either branch to a callee or return to the caller. On such architectures, a number of constraints can conspire to make those trampolines more complicated and potentially less useful than we'd like. For example: * Hardware and software control flow integrity schemes can require the addition of "landing pad" instructions (e.g. `BTI` for arm64), which will also be present at the "real" callee. * Limited branch ranges can require that trampolines generate or load an address into a register and perform an indirect branch (or at least have a slow path that does so). This loses some of the benefits of having a direct branch. * Interaction with SW CFI schemes can be complicated and fragile, e.g. requiring that we can recognise idiomatic codegen and remove indirections understand, at least until clang proves more helpful mechanisms for dealing with this. For PREEMPT_DYNAMIC, we don't need the full power of static calls, as we really only need to enable/disable specific preemption functions. We can achieve the same effect without a number of the pain points above by using static keys to fold early returns into the preemption functions themselves rather than in an out-of-line trampoline, effectively inlining the trampoline into the start of the function. For arm64, this results in good code generation. For example, the dynamic_cond_resched() wrapper looks as follows when enabled. When disabled, the first `B` is replaced with a `NOP`, resulting in an early return. | : | bti c | b // or `nop` | mov w0, #0x0 | ret | mrs x0, sp_el0 | ldr x0, [x0, #8] | cbnz x0, | paciasp | stp x29, x30, [sp, #-16]! | mov x29, sp | bl | mov w0, #0x1 | ldp x29, x30, [sp], #16 | autiasp | ret ... compared to the regular form of the function: | <__cond_resched>: | bti c | mrs x0, sp_el0 | ldr x1, [x0, #8] | cbz x1, <__cond_resched+0x18> | mov w0, #0x0 | ret | paciasp | stp x29, x30, [sp, #-16]! | mov x29, sp | bl | mov w0, #0x1 | ldp x29, x30, [sp], #16 | autiasp | ret Any architecture which implements static keys should be able to use this to implement PREEMPT_DYNAMIC with similar cost to non-inlined static calls. Since this is likely to have greater overhead than (inlined) static calls, PREEMPT_DYNAMIC is only defaulted to enabled when HAVE_PREEMPT_DYNAMIC_CALL is selected. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Acked-by: Ard Biesheuvel Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20220214165216.2231574-6-mark.rutland@arm.com --- include/linux/entry-common.h | 10 ++++++++-- include/linux/kernel.h | 7 ++++++- include/linux/sched.h | 10 +++++++++- 3 files changed, 23 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index dfd84c59b144..141952f4fee8 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -456,13 +456,19 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); */ void raw_irqentry_exit_cond_resched(void); #ifdef CONFIG_PREEMPT_DYNAMIC +#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) #define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched #define irqentry_exit_cond_resched_dynamic_disabled NULL DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); #define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() -#else -#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() +#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +void dynamic_irqentry_exit_cond_resched(void); +#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched() #endif +#else /* CONFIG_PREEMPT_DYNAMIC */ +#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() +#endif /* CONFIG_PREEMPT_DYNAMIC */ /** * irqentry_exit - Handle return from exception that used irqentry_enter() diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 33f47a996513..a890428bcc1a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -99,7 +99,7 @@ struct user; extern int __cond_resched(void); # define might_resched() __cond_resched() -#elif defined(CONFIG_PREEMPT_DYNAMIC) +#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) extern int __cond_resched(void); @@ -110,6 +110,11 @@ static __always_inline void might_resched(void) static_call_mod(might_resched)(); } +#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) + +extern int dynamic_might_resched(void); +# define might_resched() dynamic_might_resched() + #else # define might_resched() do { } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 508b91d57470..de03ddeb064b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2020,7 +2020,7 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) extern int __cond_resched(void); -#ifdef CONFIG_PREEMPT_DYNAMIC +#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) DECLARE_STATIC_CALL(cond_resched, __cond_resched); @@ -2029,6 +2029,14 @@ static __always_inline int _cond_resched(void) return static_call_mod(cond_resched)(); } +#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +extern int dynamic_cond_resched(void); + +static __always_inline int _cond_resched(void) +{ + return dynamic_cond_resched(); +} + #else static inline int _cond_resched(void) -- cgit v1.2.3 From 4b340a5a726dafba15b366c4009aa0a8f77631ac Mon Sep 17 00:00:00 2001 From: Mobashshera Rasool Date: Thu, 17 Feb 2022 07:46:40 +0000 Subject: net: ip6mr: add support for passing full packet on wrong mif This patch adds support for MRT6MSG_WRMIFWHOLE which is used to pass full packet and real vif id when the incoming interface is wrong. While the RP and FHR are setting up state we need to be sending the registers encapsulated with all the data inside otherwise we lose it. The RP then decapsulates it and forwards it to the interested parties. Currently with WRONGMIF we can only be sending empty register packets and will lose that data. This behaviour can be enabled by using MRT_PIM with val == MRT6MSG_WRMIFWHOLE. This doesn't prevent MRT6MSG_WRONGMIF from happening, it happens in addition to it, also it is controlled by the same throttling parameters as WRONGMIF (i.e. 1 packet per 3 seconds currently). Both messages are generated to keep backwards compatibily and avoid breaking someone who was enabling MRT_PIM with val == 4, since any positive val is accepted and treated the same. Signed-off-by: Mobashshera Rasool Signed-off-by: David S. Miller --- include/uapi/linux/mroute6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index a1fd6173e2db..1d90c21a6251 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -134,6 +134,7 @@ struct mrt6msg { #define MRT6MSG_NOCACHE 1 #define MRT6MSG_WRONGMIF 2 #define MRT6MSG_WHOLEPKT 3 /* used for use level encap */ +#define MRT6MSG_WRMIFWHOLE 4 /* For PIM Register and assert processing */ __u8 im6_mbz; /* must be zero */ __u8 im6_msgtype; /* what type of message */ __u16 im6_mif; /* mif rec'd on */ -- cgit v1.2.3 From 5486f5bf790b5c664913076c3194b8f916a5c7ad Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Feb 2022 14:35:49 +0100 Subject: net: Force inlining of checksum functions in net/checksum.h All functions defined as static inline in net/checksum.h are meant to be inlined for performance reason. But since commit ac7c3e4ff401 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly") the compiler is allowed to uninline functions when it wants. Fair enough in the general case, but for tiny performance critical checksum helpers that's counter-productive. The problem mainly arises when selecting CONFIG_CC_OPTIMISE_FOR_SIZE, Those helpers being 'static inline' in header files you suddenly find them duplicated many times in the resulting vmlinux. Here is a typical exemple when building powerpc pmac32_defconfig with CONFIG_CC_OPTIMISE_FOR_SIZE. csum_sub() appears 4 times: c04a23cc : c04a23cc: 7c 84 20 f8 not r4,r4 c04a23d0: 7c 63 20 14 addc r3,r3,r4 c04a23d4: 7c 63 01 94 addze r3,r3 c04a23d8: 4e 80 00 20 blr ... c04a2ce8: 4b ff f6 e5 bl c04a23cc ... c04a2d2c: 4b ff f6 a1 bl c04a23cc ... c04a2d54: 4b ff f6 79 bl c04a23cc ... c04a754c : c04a754c: 7c 84 20 f8 not r4,r4 c04a7550: 7c 63 20 14 addc r3,r3,r4 c04a7554: 7c 63 01 94 addze r3,r3 c04a7558: 4e 80 00 20 blr ... c04ac930: 4b ff ac 1d bl c04a754c ... c04ad264: 4b ff a2 e9 bl c04a754c ... c04e3b08 : c04e3b08: 7c 84 20 f8 not r4,r4 c04e3b0c: 7c 63 20 14 addc r3,r3,r4 c04e3b10: 7c 63 01 94 addze r3,r3 c04e3b14: 4e 80 00 20 blr ... c04e5788: 4b ff e3 81 bl c04e3b08 ... c04e65c8: 4b ff d5 41 bl c04e3b08 ... c0512d34 : c0512d34: 7c 84 20 f8 not r4,r4 c0512d38: 7c 63 20 14 addc r3,r3,r4 c0512d3c: 7c 63 01 94 addze r3,r3 c0512d40: 4e 80 00 20 blr ... c0512dfc: 4b ff ff 39 bl c0512d34 ... c05138bc: 4b ff f4 79 bl c0512d34 ... Restore the expected behaviour by using __always_inline for all functions defined in net/checksum.h vmlinux size is even reduced by 256 bytes with this patch: text data bss dec hex filename 6980022 2515362 194384 9689768 93daa8 vmlinux.before 6979862 2515266 194384 9689512 93d9a8 vmlinux.now Fixes: ac7c3e4ff401 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly") Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Andrew Morton Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller --- include/net/checksum.h | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/checksum.h b/include/net/checksum.h index 5218041e5c8f..02d0c2d01014 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -22,7 +22,7 @@ #include #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER -static inline +static __always_inline __wsum csum_and_copy_from_user (const void __user *src, void *dst, int len) { @@ -33,7 +33,7 @@ __wsum csum_and_copy_from_user (const void __user *src, void *dst, #endif #ifndef HAVE_CSUM_COPY_USER -static __inline__ __wsum csum_and_copy_to_user +static __always_inline __wsum csum_and_copy_to_user (const void *src, void __user *dst, int len) { __wsum sum = csum_partial(src, len, ~0U); @@ -45,7 +45,7 @@ static __inline__ __wsum csum_and_copy_to_user #endif #ifndef _HAVE_ARCH_CSUM_AND_COPY -static inline __wsum +static __always_inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len) { memcpy(dst, src, len); @@ -54,7 +54,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len) #endif #ifndef HAVE_ARCH_CSUM_ADD -static inline __wsum csum_add(__wsum csum, __wsum addend) +static __always_inline __wsum csum_add(__wsum csum, __wsum addend) { u32 res = (__force u32)csum; res += (__force u32)addend; @@ -62,12 +62,12 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) } #endif -static inline __wsum csum_sub(__wsum csum, __wsum addend) +static __always_inline __wsum csum_sub(__wsum csum, __wsum addend) { return csum_add(csum, ~addend); } -static inline __sum16 csum16_add(__sum16 csum, __be16 addend) +static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend) { u16 res = (__force u16)csum; @@ -75,12 +75,12 @@ static inline __sum16 csum16_add(__sum16 csum, __be16 addend) return (__force __sum16)(res + (res < (__force u16)addend)); } -static inline __sum16 csum16_sub(__sum16 csum, __be16 addend) +static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend) { return csum16_add(csum, ~addend); } -static inline __wsum csum_shift(__wsum sum, int offset) +static __always_inline __wsum csum_shift(__wsum sum, int offset) { /* rotate sum to align it with a 16b boundary */ if (offset & 1) @@ -88,42 +88,43 @@ static inline __wsum csum_shift(__wsum sum, int offset) return sum; } -static inline __wsum +static __always_inline __wsum csum_block_add(__wsum csum, __wsum csum2, int offset) { return csum_add(csum, csum_shift(csum2, offset)); } -static inline __wsum +static __always_inline __wsum csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) { return csum_block_add(csum, csum2, offset); } -static inline __wsum +static __always_inline __wsum csum_block_sub(__wsum csum, __wsum csum2, int offset) { return csum_block_add(csum, ~csum2, offset); } -static inline __wsum csum_unfold(__sum16 n) +static __always_inline __wsum csum_unfold(__sum16 n) { return (__force __wsum)n; } -static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) +static __always_inline +__wsum csum_partial_ext(const void *buff, int len, __wsum sum) { return csum_partial(buff, len, sum); } #define CSUM_MANGLED_0 ((__force __sum16)0xffff) -static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) +static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) { *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); } -static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) +static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); @@ -136,7 +137,7 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) * m : old value of a 16bit field * m' : new value of a 16bit field */ -static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) +static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) { *sum = ~csum16_add(csum16_sub(~(*sum), old), new); } @@ -150,16 +151,16 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, __wsum diff, bool pseudohdr); -static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, - __be16 from, __be16 to, - bool pseudohdr) +static __always_inline +void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, + __be16 from, __be16 to, bool pseudohdr) { inet_proto_csum_replace4(sum, skb, (__force __be32)from, (__force __be32)to, pseudohdr); } -static inline __wsum remcsum_adjust(void *ptr, __wsum csum, - int start, int offset) +static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum, + int start, int offset) { __sum16 *psum = (__sum16 *)(ptr + offset); __wsum delta; @@ -175,12 +176,12 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum, return delta; } -static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) +static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta) { *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); } -static inline __wsum wsum_negate(__wsum val) +static __always_inline __wsum wsum_negate(__wsum val) { return (__force __wsum)-((__force u32)val); } -- cgit v1.2.3 From ccfbf44d4c7fb7c64cf79b3f2a5ae522e5165878 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 19 Feb 2022 11:47:17 +0000 Subject: net: dsa: remove pcs_poll With drivers converted over to using phylink PCS, there is no need for the struct dsa_switch member "pcs_poll" to exist anymore - there is a flag in the struct phylink_pcs which indicates whether this PCS needs to be polled which supersedes this. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/net/dsa.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index bc6eef6af810..f13de2d8aef3 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -391,11 +391,6 @@ struct dsa_switch { */ u32 vlan_filtering:1; - /* MAC PCS does not provide link state change interrupt, and requires - * polling. Flag passed on to PHYLINK. - */ - u32 pcs_poll:1; - /* For switches that only have the MRU configurable. To ensure the * configured MTU is not exceeded, normalization of MRU on all bridged * interfaces is needed. -- cgit v1.2.3 From 64b4a0f8b51b20e0c9dbff7748365994364d5f01 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 19 Feb 2022 11:47:22 +0000 Subject: net: phylink: remove phylink_config's pcs_poll phylink_config's pcs_poll is no longer used, let's get rid of it. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/phylink.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index cca149f78d35..9ef9b7047f19 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -86,7 +86,6 @@ enum phylink_op_type { * @type: operation type of PHYLINK instance * @legacy_pre_march2020: driver has not been updated for March 2020 updates * (See commit 7cceb599d15d ("net: phylink: avoid mac_config calls") - * @pcs_poll: MAC PCS cannot provide link change interrupt * @poll_fixed_state: if true, starts link_poll, * if MAC link is at %MLO_AN_FIXED mode. * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND @@ -100,7 +99,6 @@ struct phylink_config { struct device *dev; enum phylink_op_type type; bool legacy_pre_march2020; - bool pcs_poll; bool poll_fixed_state; bool ovr_an_inband; void (*get_fixed_state)(struct phylink_config *config, -- cgit v1.2.3 From 1d6049a3b14feb39c8d6c8f538a711dcc54f88e0 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 17 Feb 2022 23:42:30 +0800 Subject: scsi: libsas: Use enum for response frame DATAPRES field As defined in table 126 of the SAS spec 1.1, use an enum for the DATAPRES field, which makes reading the code easier. Also change sas_ssp_task_response() to use a switch statement, which is more suitable (than if-else), as suggested by Christoph. Link: https://lore.kernel.org/r/1645112566-115804-3-git-send-email-john.garry@huawei.com Suggested-by: Xiang Chen Tested-by: Yihang Li Tested-by: Damien Le Moal Reviewed-by: Jack Wang Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/sas.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/scsi/sas.h b/include/scsi/sas.h index 64154c1fed02..332a463d08ef 100644 --- a/include/scsi/sas.h +++ b/include/scsi/sas.h @@ -191,6 +191,13 @@ enum sas_gpio_reg_type { SAS_GPIO_REG_TX_GP = 4, }; +/* Response frame DATAPRES field */ +enum { + SAS_DATAPRES_NO_DATA = 0, + SAS_DATAPRES_RESPONSE_DATA = 1, + SAS_DATAPRES_SENSE_DATA = 2, +}; + struct dev_to_host_fis { u8 fis_type; /* 0x34 */ u8 flags; -- cgit v1.2.3 From 25882c82f850e3e972a973e0af310b3e58de38fd Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 17 Feb 2022 23:42:31 +0800 Subject: scsi: libsas: Delete lldd_clear_aca callback This callback is never called, so remove support. Link: https://lore.kernel.org/r/1645112566-115804-4-git-send-email-john.garry@huawei.com Tested-by: Yihang Li Tested-by: Damien Le Moal Reviewed-by: Jack Wang Reviewed-by: Christoph Hellwig Reviewed-by: Xiang Chen Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index fad328d3a551..8026c1bb57ba 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -636,7 +636,6 @@ struct sas_domain_function_template { /* Task Management Functions. Must be called from process context. */ int (*lldd_abort_task)(struct sas_task *); int (*lldd_abort_task_set)(struct domain_device *, u8 *lun); - int (*lldd_clear_aca)(struct domain_device *, u8 *lun); int (*lldd_clear_task_set)(struct domain_device *, u8 *lun); int (*lldd_I_T_nexus_reset)(struct domain_device *); int (*lldd_ata_check_ready)(struct domain_device *); -- cgit v1.2.3 From 2dd6801a671c0a82e756355d20669ad5bbe89073 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 17 Feb 2022 23:42:32 +0800 Subject: scsi: libsas: Delete SAS_SG_ERR No LLDD sets exec status as SAS_SG_ERR, so remove support. Link: https://lore.kernel.org/r/1645112566-115804-5-git-send-email-john.garry@huawei.com Tested-by: Damien Le Moal Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 8026c1bb57ba..cd2b2b67bf93 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -492,7 +492,6 @@ enum exec_status { SAS_INTERRUPTED, SAS_QUEUE_FULL, SAS_DEVICE_UNKNOWN, - SAS_SG_ERR, SAS_OPEN_REJECT, SAS_OPEN_TO, SAS_PROTO_RESPONSE, -- cgit v1.2.3 From bbfe82cdbaf84e6622ceb6f3447c8c4bb7dde7ab Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 17 Feb 2022 23:42:35 +0800 Subject: scsi: libsas: Add struct sas_tmf_task Some of the LLDDs which use libsas have their own definition of a struct to hold TMF info, so add a common struct for libsas. Also add an interim force phy id field for hisi_sas driver, which will be removed once the STP "TMF" code is factored out. Even though some LLDDs (pm8001) use a u32 for the tag, u16 will be adequate, as that named driver only uses tags in range [0, 1024). Link: https://lore.kernel.org/r/1645112566-115804-8-git-send-email-john.garry@huawei.com Tested-by: Yihang Li Tested-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index cd2b2b67bf93..7a55853fad7b 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -576,6 +576,15 @@ struct sas_ssp_task { struct scsi_cmnd *cmd; }; +struct sas_tmf_task { + u8 tmf; + u16 tag_of_task_to_be_managed; + + /* Temp */ + int force_phy; + int phy_id; +}; + struct sas_task { struct domain_device *dev; -- cgit v1.2.3 From 96e54376a8b27066d32ca36800318c43e6b6d2c5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 17 Feb 2022 23:42:36 +0800 Subject: scsi: libsas: Add sas_task.tmf Add a pointer to a sas_tmf_task to the sas_task struct, as this will be used when the common LLDD TMF code is factored out. Also set it for the LLDDs to store per-sas_task TMF info. Link: https://lore.kernel.org/r/1645112566-115804-9-git-send-email-john.garry@huawei.com Tested-by: Yihang Li Tested-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 7a55853fad7b..9c181ebccfee 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -610,6 +610,7 @@ struct sas_task { void *lldd_task; /* for use by LLDDs */ void *uldd_task; struct sas_task_slow *slow_task; + struct sas_tmf_task *tmf; }; struct sas_task_slow { -- cgit v1.2.3 From 2037a340314f4be8977563006476bd15c859eda2 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 17 Feb 2022 23:42:39 +0800 Subject: scsi: libsas: Add TMF handler exec complete callback The pm8001 TMF handler has some special processing when the TMF completes, so add a callback and fill it in for the pm8001 driver. Link: https://lore.kernel.org/r/1645112566-115804-12-git-send-email-john.garry@huawei.com Tested-by: Yihang Li Tested-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 9c181ebccfee..dd6551e809a0 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -652,6 +652,9 @@ struct sas_domain_function_template { int (*lldd_lu_reset)(struct domain_device *, u8 *lun); int (*lldd_query_task)(struct sas_task *); + /* Special TMF callbacks */ + void (*lldd_tmf_exec_complete)(struct domain_device *dev); + /* Port and Adapter management */ int (*lldd_clear_nexus_port)(struct asd_sas_port *); int (*lldd_clear_nexus_ha)(struct sas_ha_struct *); -- cgit v1.2.3 From 693e66a0a6ac56322687f614ba6e8bfbc43a1530 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 17 Feb 2022 23:42:40 +0800 Subject: scsi: libsas: Add TMF handler aborted callback The hisi_sas and pm8001 TMF handlers have some special processing for when the TMF is aborted, so add a callback and fill it in for those drivers. Link: https://lore.kernel.org/r/1645112566-115804-13-git-send-email-john.garry@huawei.com Tested-by: Yihang Li Tested-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index dd6551e809a0..c44de478e314 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -654,6 +654,7 @@ struct sas_domain_function_template { /* Special TMF callbacks */ void (*lldd_tmf_exec_complete)(struct domain_device *dev); + void (*lldd_tmf_aborted)(struct sas_task *task); /* Port and Adapter management */ int (*lldd_clear_nexus_port)(struct asd_sas_port *); -- cgit v1.2.3 From 69b80a0ed0b5d0c54ee1618eb5a015699e8c47c5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 17 Feb 2022 23:42:41 +0800 Subject: scsi: libsas: Add sas_abort_task_set() Add a generic implementation of abort task set TMF handler, and use in LLDDs. Link: https://lore.kernel.org/r/1645112566-115804-14-git-send-email-john.garry@huawei.com Tested-by: Yihang Li Tested-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index c44de478e314..53fdc18bdd09 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -722,6 +722,8 @@ struct sas_phy *sas_get_local_phy(struct domain_device *dev); int sas_request_addr(struct Scsi_Host *shost, u8 *addr); +int sas_abort_task_set(struct domain_device *dev, u8 *lun); + int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event, gfp_t gfp_flags); int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event, -- cgit v1.2.3 From e8585452953a040a6d1d901e5b2e8c327f09e219 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 17 Feb 2022 23:42:42 +0800 Subject: scsi: libsas: Add sas_clear_task_set() Add a generic implementation of clear task set TMF handler, and use in LLDDs. Link: https://lore.kernel.org/r/1645112566-115804-15-git-send-email-john.garry@huawei.com Tested-by: Yihang Li Tested-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 53fdc18bdd09..f71a47740ff8 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -723,6 +723,7 @@ struct sas_phy *sas_get_local_phy(struct domain_device *dev); int sas_request_addr(struct Scsi_Host *shost, u8 *addr); int sas_abort_task_set(struct domain_device *dev, u8 *lun); +int sas_clear_task_set(struct domain_device *dev, u8 *lun); int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event, gfp_t gfp_flags); -- cgit v1.2.3 From 29d7769055a21968c0bbfe866affe1640d90bd1d Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 17 Feb 2022 23:42:43 +0800 Subject: scsi: libsas: Add sas_lu_reset() Add a generic implementation of LU reset TMF handler, and use in LLDDs. Link: https://lore.kernel.org/r/1645112566-115804-16-git-send-email-john.garry@huawei.com Tested-by: Yihang Li Tested-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index f71a47740ff8..7b1e2e7f5a6c 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -724,6 +724,7 @@ int sas_request_addr(struct Scsi_Host *shost, u8 *addr); int sas_abort_task_set(struct domain_device *dev, u8 *lun); int sas_clear_task_set(struct domain_device *dev, u8 *lun); +int sas_lu_reset(struct domain_device *dev, u8 *lun); int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event, gfp_t gfp_flags); -- cgit v1.2.3 From 72f8810e1fdcd52deedfd294497fa8337703a632 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 17 Feb 2022 23:42:44 +0800 Subject: scsi: libsas: Add sas_query_task() Add a generic implementation of query task TMF handler, and use in LLDDs. Link: https://lore.kernel.org/r/1645112566-115804-17-git-send-email-john.garry@huawei.com Tested-by: Yihang Li Tested-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 7b1e2e7f5a6c..bf8613fb1c4e 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -725,6 +725,7 @@ int sas_request_addr(struct Scsi_Host *shost, u8 *addr); int sas_abort_task_set(struct domain_device *dev, u8 *lun); int sas_clear_task_set(struct domain_device *dev, u8 *lun); int sas_lu_reset(struct domain_device *dev, u8 *lun); +int sas_query_task(struct sas_task *task, u16 tag); int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event, gfp_t gfp_flags); -- cgit v1.2.3 From 4fea759edfa795b170a72bfd3be7b7601012ce4b Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 17 Feb 2022 23:42:45 +0800 Subject: scsi: libsas: Add sas_abort_task() Add a generic implementation of abort task TMF handler, and use in LLDDs. With that, some LLDDs custom TMF functions can now be deleted. Link: https://lore.kernel.org/r/1645112566-115804-18-git-send-email-john.garry@huawei.com Tested-by: Yihang Li Tested-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index bf8613fb1c4e..4ea964d33600 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -726,6 +726,7 @@ int sas_abort_task_set(struct domain_device *dev, u8 *lun); int sas_clear_task_set(struct domain_device *dev, u8 *lun); int sas_lu_reset(struct domain_device *dev, u8 *lun); int sas_query_task(struct sas_task *task, u16 tag); +int sas_abort_task(struct sas_task *task, u16 tag); int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event, gfp_t gfp_flags); -- cgit v1.2.3 From efcef265fd83d9a68a68926abecb3e1dd3e260a8 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Tue, 15 Feb 2022 21:49:26 +0300 Subject: ata: add/use ata_taskfile::{error|status} fields Add the explicit error and status register fields to 'struct ata_taskfile' using the anonymous *union*s ('struct ide_taskfile' had that for ages!) and update the libata taskfile code accordingly. There should be no object code changes resulting from that... Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- include/linux/libata.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index a49e75c4206d..0619ae462ecd 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -518,7 +518,10 @@ struct ata_taskfile { u8 hob_lbam; u8 hob_lbah; - u8 feature; + union { + u8 error; + u8 feature; + }; u8 nsect; u8 lbal; u8 lbam; @@ -526,7 +529,10 @@ struct ata_taskfile { u8 device; - u8 command; /* IO operation */ + union { + u8 status; + u8 command; + }; u32 auxiliary; /* auxiliary field */ /* from SATA 3.1 and */ -- cgit v1.2.3 From b1a5983f56e371046dcf164f90bfaf704d2b89f6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 17 Feb 2022 23:41:20 +0100 Subject: netfilter: nf_tables_offload: incorrect flow offload action array size immediate verdict expression needs to allocate one slot in the flow offload action array, however, immediate data expression does not need to do so. fwd and dup expression need to allocate one slot, this is missing. Add a new offload_action interface to report if this expression needs to allocate one slot in the flow offload action array. Fixes: be2861dc36d7 ("netfilter: nft_{fwd,dup}_netdev: add offload support") Reported-and-tested-by: Nick Gregory Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- include/net/netfilter/nf_tables_offload.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index eaf55da9a205..c4c0861deac1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -905,9 +905,9 @@ struct nft_expr_ops { int (*offload)(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr); + bool (*offload_action)(const struct nft_expr *expr); void (*offload_stats)(struct nft_expr *expr, const struct flow_stats *stats); - u32 offload_flags; const struct nft_expr_type *type; void *data; }; diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index f9d95ff82df8..797147843958 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -67,8 +67,6 @@ struct nft_flow_rule { struct flow_rule *rule; }; -#define NFT_OFFLOAD_F_ACTION (1 << 0) - void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, enum flow_dissector_key_id addr_type); -- cgit v1.2.3 From cccd73d607fee52f35b4b030408fa5f6c21ef503 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:32 -0800 Subject: iosys-map: Add offset to iosys_map_memcpy_to() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In certain situations it's useful to be able to write to an offset of the mapping. Add a dst_offset to iosys_map_memcpy_to(). Cc: Sumit Semwal Cc: Christian König Cc: Thomas Zimmermann Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Lucas De Marchi Reviewed-by: Christian König Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-2-lucas.demarchi@intel.com --- include/linux/iosys-map.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index f4186f91caa6..edd730b1e899 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -220,22 +220,23 @@ static inline void iosys_map_clear(struct iosys_map *map) } /** - * iosys_map_memcpy_to - Memcpy into iosys mapping + * iosys_map_memcpy_to - Memcpy into offset of iosys_map * @dst: The iosys_map structure + * @dst_offset: The offset from which to copy * @src: The source buffer * @len: The number of byte in src * - * Copies data into a iosys mapping. The source buffer is in system - * memory. Depending on the buffer's location, the helper picks the correct - * method of accessing the memory. + * Copies data into a iosys_map with an offset. The source buffer is in + * system memory. Depending on the buffer's location, the helper picks the + * correct method of accessing the memory. */ -static inline void iosys_map_memcpy_to(struct iosys_map *dst, const void *src, - size_t len) +static inline void iosys_map_memcpy_to(struct iosys_map *dst, size_t dst_offset, + const void *src, size_t len) { if (dst->is_iomem) - memcpy_toio(dst->vaddr_iomem, src, len); + memcpy_toio(dst->vaddr_iomem + dst_offset, src, len); else - memcpy(dst->vaddr, src, len); + memcpy(dst->vaddr + dst_offset, src, len); } /** -- cgit v1.2.3 From e62f25e8b3cdd29224c27938addba817aedd4b54 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:33 -0800 Subject: iosys-map: Add a few more helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First the simplest ones: - iosys_map_memset(): when abstracting system and I/O memory, just like the memcpy() use case, memset() also has dedicated functions to be called for using IO memory. - iosys_map_memcpy_from(): we may need to copy data from I/O memory, not only to. In certain situations it's useful to be able to read or write to an offset that is calculated by having the memory layout given by a struct declaration. Usually we are going to read/write a u8, u16, u32 or u64. As a pre-requisite for the implementation, add iosys_map_memcpy_from() to be the equivalent of iosys_map_memcpy_to(), but in the other direction. Then add 2 pairs of macros: - iosys_map_rd() / iosys_map_wr() - iosys_map_rd_field() / iosys_map_wr_field() The first pair takes the C-type and offset to read/write. The second pair uses a struct describing the layout of the mapping in order to calculate the offset and size being read/written. We could use readb, readw, readl, readq and the write* counterparts, however due to alignment issues this may not work on all architectures. If alignment needs to be checked to call the right function, it's not possible to decide at compile-time which function to call: so just leave the decision to the memcpy function that will do exactly that. Finally, in order to use the above macros with a map derived from another, add another initializer: IOSYS_MAP_INIT_OFFSET(). v2: - Rework IOSYS_MAP_INIT_OFFSET() so it doesn't rely on aliasing rules within the union - Add offset to both iosys_map_rd_field() and iosys_map_wr_field() to allow the struct itself to be at an offset from the mapping - Add documentation to iosys_map_rd_field() with example and expected memory layout v3: - Drop kernel.h include as it's not needed anymore Cc: Sumit Semwal Cc: Christian König Cc: Thomas Zimmermann Cc: Mauro Carvalho Chehab Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Lucas De Marchi Reviewed-by: Mauro Carvalho Chehab Reviewed-by: Matt Atwood Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-3-lucas.demarchi@intel.com --- include/linux/iosys-map.h | 201 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) (limited to 'include') diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index edd730b1e899..e69a002d5aa4 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -120,6 +120,45 @@ struct iosys_map { .is_iomem = false, \ } +/** + * IOSYS_MAP_INIT_OFFSET - Initializes struct iosys_map from another iosys_map + * @map_: The dma-buf mapping structure to copy from + * @offset_: Offset to add to the other mapping + * + * Initializes a new iosys_map struct based on another passed as argument. It + * does a shallow copy of the struct so it's possible to update the back storage + * without changing where the original map points to. It is the equivalent of + * doing: + * + * .. code-block:: c + * + * iosys_map map = other_map; + * iosys_map_incr(&map, &offset); + * + * Example usage: + * + * .. code-block:: c + * + * void foo(struct device *dev, struct iosys_map *base_map) + * { + * ... + * struct iosys_map map = IOSYS_MAP_INIT_OFFSET(base_map, FIELD_OFFSET); + * ... + * } + * + * The advantage of using the initializer over just increasing the offset with + * iosys_map_incr() like above is that the new map will always point to the + * right place of the buffer during its scope. It reduces the risk of updating + * the wrong part of the buffer and having no compiler warning about that. If + * the assignment to IOSYS_MAP_INIT_OFFSET() is forgotten, the compiler can warn + * about the use of uninitialized variable. + */ +#define IOSYS_MAP_INIT_OFFSET(map_, offset_) ({ \ + struct iosys_map copy = *map_; \ + iosys_map_incr(©, offset_); \ + copy; \ +}) + /** * iosys_map_set_vaddr - Sets a iosys mapping structure to an address in system memory * @map: The iosys_map structure @@ -239,6 +278,26 @@ static inline void iosys_map_memcpy_to(struct iosys_map *dst, size_t dst_offset, memcpy(dst->vaddr + dst_offset, src, len); } +/** + * iosys_map_memcpy_from - Memcpy from iosys_map into system memory + * @dst: Destination in system memory + * @src: The iosys_map structure + * @src_offset: The offset from which to copy + * @len: The number of byte in src + * + * Copies data from a iosys_map with an offset. The dest buffer is in + * system memory. Depending on the mapping location, the helper picks the + * correct method of accessing the memory. + */ +static inline void iosys_map_memcpy_from(void *dst, const struct iosys_map *src, + size_t src_offset, size_t len) +{ + if (src->is_iomem) + memcpy_fromio(dst, src->vaddr_iomem + src_offset, len); + else + memcpy(dst, src->vaddr + src_offset, len); +} + /** * iosys_map_incr - Increments the address stored in a iosys mapping * @map: The iosys_map structure @@ -255,4 +314,146 @@ static inline void iosys_map_incr(struct iosys_map *map, size_t incr) map->vaddr += incr; } +/** + * iosys_map_memset - Memset iosys_map + * @dst: The iosys_map structure + * @offset: Offset from dst where to start setting value + * @value: The value to set + * @len: The number of bytes to set in dst + * + * Set value in iosys_map. Depending on the buffer's location, the helper + * picks the correct method of accessing the memory. + */ +static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, + int value, size_t len) +{ + if (dst->is_iomem) + memset_io(dst->vaddr_iomem + offset, value, len); + else + memset(dst->vaddr + offset, value, len); +} + +/** + * iosys_map_rd - Read a C-type value from the iosys_map + * + * @map__: The iosys_map structure + * @offset__: The offset from which to read + * @type__: Type of the value being read + * + * Read a C type value from iosys_map, handling possible un-aligned accesses to + * the mapping. + * + * Returns: + * The value read from the mapping. + */ +#define iosys_map_rd(map__, offset__, type__) ({ \ + type__ val; \ + iosys_map_memcpy_from(&val, map__, offset__, sizeof(val)); \ + val; \ +}) + +/** + * iosys_map_wr - Write a C-type value to the iosys_map + * + * @map__: The iosys_map structure + * @offset__: The offset from the mapping to write to + * @type__: Type of the value being written + * @val__: Value to write + * + * Write a C-type value to the iosys_map, handling possible un-aligned accesses + * to the mapping. + */ +#define iosys_map_wr(map__, offset__, type__, val__) ({ \ + type__ val = (val__); \ + iosys_map_memcpy_to(map__, offset__, &val, sizeof(val)); \ +}) + +/** + * iosys_map_rd_field - Read a member from a struct in the iosys_map + * + * @map__: The iosys_map structure + * @struct_offset__: Offset from the beggining of the map, where the struct + * is located + * @struct_type__: The struct describing the layout of the mapping + * @field__: Member of the struct to read + * + * Read a value from iosys_map considering its layout is described by a C struct + * starting at @struct_offset__. The field offset and size is calculated and its + * value read handling possible un-aligned memory accesses. For example: suppose + * there is a @struct foo defined as below and the value ``foo.field2.inner2`` + * needs to be read from the iosys_map: + * + * .. code-block:: c + * + * struct foo { + * int field1; + * struct { + * int inner1; + * int inner2; + * } field2; + * int field3; + * } __packed; + * + * This is the expected memory layout of a buffer using iosys_map_rd_field(): + * + * +------------------------------+--------------------------+ + * | Address | Content | + * +==============================+==========================+ + * | buffer + 0000 | start of mmapped buffer | + * | | pointed by iosys_map | + * +------------------------------+--------------------------+ + * | ... | ... | + * +------------------------------+--------------------------+ + * | buffer + ``struct_offset__`` | start of ``struct foo`` | + * +------------------------------+--------------------------+ + * | ... | ... | + * +------------------------------+--------------------------+ + * | buffer + wwww | ``foo.field2.inner2`` | + * +------------------------------+--------------------------+ + * | ... | ... | + * +------------------------------+--------------------------+ + * | buffer + yyyy | end of ``struct foo`` | + * +------------------------------+--------------------------+ + * | ... | ... | + * +------------------------------+--------------------------+ + * | buffer + zzzz | end of mmaped buffer | + * +------------------------------+--------------------------+ + * + * Values automatically calculated by this macro or not needed are denoted by + * wwww, yyyy and zzzz. This is the code to read that value: + * + * .. code-block:: c + * + * x = iosys_map_rd_field(&map, offset, struct foo, field2.inner2); + * + * Returns: + * The value read from the mapping. + */ +#define iosys_map_rd_field(map__, struct_offset__, struct_type__, field__) ({ \ + struct_type__ *s; \ + iosys_map_rd(map__, struct_offset__ + offsetof(struct_type__, field__), \ + typeof(s->field__)); \ +}) + +/** + * iosys_map_wr_field - Write to a member of a struct in the iosys_map + * + * @map__: The iosys_map structure + * @struct_offset__: Offset from the beggining of the map, where the struct + * is located + * @struct_type__: The struct describing the layout of the mapping + * @field__: Member of the struct to read + * @val__: Value to write + * + * Write a value to the iosys_map considering its layout is described by a C struct + * starting at @struct_offset__. The field offset and size is calculated and the + * @val__ is written handling possible un-aligned memory accesses. Refer to + * iosys_map_rd_field() for expected usage and memory layout. + */ +#define iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__) ({ \ + struct_type__ *s; \ + iosys_map_wr(map__, struct_offset__ + offsetof(struct_type__, field__), \ + typeof(s->field__), val__); \ +}) + #endif /* __IOSYS_MAP_H__ */ -- cgit v1.2.3 From caa574ffc4aaf4f29b890223878c63e2e7772f62 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Sat, 19 Feb 2022 00:17:49 +0530 Subject: drm/i915/uapi: document behaviour for DG2 64K support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On discrete platforms like DG2, we need to support a minimum page size of 64K when dealing with device local-memory. This is quite tricky for various reasons, so try to document the new implicit uapi for this. v4: Kdoc modification. v3: fix typos and less emphasis v2: Fixed suggestions on formatting [Daniel] Signed-off-by: Matthew Auld Signed-off-by: Ramalingam C Signed-off-by: Robert Beckett Acked-by: Jordan Justen Reviewed-by: Ramalingam C Reviewed-by: Thomas Hellström cc: Simon Ser cc: Pekka Paalanen Cc: Jordan Justen Cc: Kenneth Graunke Cc: mesa-dev@lists.freedesktop.org Cc: Tony Ye Cc: Slawomir Milczarek Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220218184752.7524-13-ramalingam.c@intel.com --- include/uapi/drm/i915_drm.h | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 914ebd9290e5..05c3642aaece 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1118,10 +1118,16 @@ struct drm_i915_gem_exec_object2 { /** * When the EXEC_OBJECT_PINNED flag is specified this is populated by * the user with the GTT offset at which this object will be pinned. + * * When the I915_EXEC_NO_RELOC flag is specified this must contain the * presumed_offset of the object. + * * During execbuffer2 the kernel populates it with the value of the * current GTT offset of the object, for future presumed_offset writes. + * + * See struct drm_i915_gem_create_ext for the rules when dealing with + * alignment restrictions with I915_MEMORY_CLASS_DEVICE, on devices with + * minimum page sizes, like DG2. */ __u64 offset; @@ -3144,11 +3150,40 @@ struct drm_i915_gem_create_ext { * * The (page-aligned) allocated size for the object will be returned. * - * Note that for some devices we have might have further minimum - * page-size restrictions(larger than 4K), like for device local-memory. - * However in general the final size here should always reflect any - * rounding up, if for example using the I915_GEM_CREATE_EXT_MEMORY_REGIONS - * extension to place the object in device local-memory. + * + * DG2 64K min page size implications: + * + * On discrete platforms, starting from DG2, we have to contend with GTT + * page size restrictions when dealing with I915_MEMORY_CLASS_DEVICE + * objects. Specifically the hardware only supports 64K or larger GTT + * page sizes for such memory. The kernel will already ensure that all + * I915_MEMORY_CLASS_DEVICE memory is allocated using 64K or larger page + * sizes underneath. + * + * Note that the returned size here will always reflect any required + * rounding up done by the kernel, i.e 4K will now become 64K on devices + * such as DG2. + * + * Special DG2 GTT address alignment requirement: + * + * The GTT alignment will also need to be at least 2M for such objects. + * + * Note that due to how the hardware implements 64K GTT page support, we + * have some further complications: + * + * 1) The entire PDE (which covers a 2MB virtual address range), must + * contain only 64K PTEs, i.e mixing 4K and 64K PTEs in the same + * PDE is forbidden by the hardware. + * + * 2) We still need to support 4K PTEs for I915_MEMORY_CLASS_SYSTEM + * objects. + * + * To keep things simple for userland, we mandate that any GTT mappings + * must be aligned to and rounded up to 2MB. The kernel will internally + * pad them out to the next 2MB boundary. As this only wastes virtual + * address space and avoids userland having to copy any needlessly + * complicated PDE sharing scheme (coloring) and only affects DG2, this + * is deemed to be a good compromise. */ __u64 size; /** -- cgit v1.2.3 From 643b622b51f1f0015e0a80f90b4ef9032e6ddb1b Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 20 Feb 2022 15:06:32 +0800 Subject: net: tcp: add skb drop reasons to tcp_v{4,6}_inbound_md5_hash() Pass the address of drop reason to tcp_v4_inbound_md5_hash() and tcp_v6_inbound_md5_hash() to store the reasons for skb drops when this function fails. Therefore, the drop reason can be passed to kfree_skb_reason() when the skb needs to be freed. Following drop reasons are added: SKB_DROP_REASON_TCP_MD5NOTFOUND SKB_DROP_REASON_TCP_MD5UNEXPECTED SKB_DROP_REASON_TCP_MD5FAILURE SKB_DROP_REASON_TCP_MD5* above correspond to LINUX_MIB_TCPMD5* Reviewed-by: Mengen Sun Reviewed-by: Hao Peng Signed-off-by: Menglong Dong Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++++++++++++ include/trace/events/skb.h | 4 ++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a5adbf6b51e8..46678eb587ff 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -346,6 +346,18 @@ enum skb_drop_reason { * udp packet drop out of * udp_memory_allocated. */ + SKB_DROP_REASON_TCP_MD5NOTFOUND, /* no MD5 hash and one + * expected, corresponding + * to LINUX_MIB_TCPMD5NOTFOUND + */ + SKB_DROP_REASON_TCP_MD5UNEXPECTED, /* MD5 hash and we're not + * expecting one, corresponding + * to LINUX_MIB_TCPMD5UNEXPECTED + */ + SKB_DROP_REASON_TCP_MD5FAILURE, /* MD5 hash and its wrong, + * corresponding to + * LINUX_MIB_TCPMD5FAILURE + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index cfcfd26399f7..46c06b0be850 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -27,6 +27,10 @@ EM(SKB_DROP_REASON_IP_NOPROTO, IP_NOPROTO) \ EM(SKB_DROP_REASON_SOCKET_RCVBUFF, SOCKET_RCVBUFF) \ EM(SKB_DROP_REASON_PROTO_MEM, PROTO_MEM) \ + EM(SKB_DROP_REASON_TCP_MD5NOTFOUND, TCP_MD5NOTFOUND) \ + EM(SKB_DROP_REASON_TCP_MD5UNEXPECTED, \ + TCP_MD5UNEXPECTED) \ + EM(SKB_DROP_REASON_TCP_MD5FAILURE, TCP_MD5FAILURE) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 7a26dc9e7b43f5a24c4b843713e728582adf1c38 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 20 Feb 2022 15:06:33 +0800 Subject: net: tcp: add skb drop reasons to tcp_add_backlog() Pass the address of drop_reason to tcp_add_backlog() to store the reasons for skb drops when fails. Following drop reasons are introduced: SKB_DROP_REASON_SOCKET_BACKLOG Reviewed-by: Mengen Sun Reviewed-by: Hao Peng Signed-off-by: Menglong Dong Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ include/net/tcp.h | 3 ++- include/trace/events/skb.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 46678eb587ff..f7f33c79945b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -358,6 +358,10 @@ enum skb_drop_reason { * corresponding to * LINUX_MIB_TCPMD5FAILURE */ + SKB_DROP_REASON_SOCKET_BACKLOG, /* failed to add skb to socket + * backlog (see + * LINUX_MIB_TCPBACKLOGDROP) + */ SKB_DROP_REASON_MAX, }; diff --git a/include/net/tcp.h b/include/net/tcp.h index eff2487d972d..04f4650e0ff0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1367,7 +1367,8 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) __skb_checksum_complete(skb); } -bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); +bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason *reason); #ifdef CONFIG_INET void __sk_defer_free_flush(struct sock *sk); diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 46c06b0be850..bfccd77e9071 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -31,6 +31,7 @@ EM(SKB_DROP_REASON_TCP_MD5UNEXPECTED, \ TCP_MD5UNEXPECTED) \ EM(SKB_DROP_REASON_TCP_MD5FAILURE, TCP_MD5FAILURE) \ + EM(SKB_DROP_REASON_SOCKET_BACKLOG, SOCKET_BACKLOG) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 2a968ef60e1fac4e694d9f60ce19a3b66b40e8c3 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 20 Feb 2022 15:06:35 +0800 Subject: net: tcp: use tcp_drop_reason() for tcp_rcv_established() Replace tcp_drop() used in tcp_rcv_established() with tcp_drop_reason(). Following drop reasons are added: SKB_DROP_REASON_TCP_FLAGS Reviewed-by: Mengen Sun Reviewed-by: Hao Peng Signed-off-by: Menglong Dong Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/trace/events/skb.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f7f33c79945b..671db9f49efe 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -362,6 +362,7 @@ enum skb_drop_reason { * backlog (see * LINUX_MIB_TCPBACKLOGDROP) */ + SKB_DROP_REASON_TCP_FLAGS, /* TCP flags invalid */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index bfccd77e9071..d332e7313a61 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -32,6 +32,7 @@ TCP_MD5UNEXPECTED) \ EM(SKB_DROP_REASON_TCP_MD5FAILURE, TCP_MD5FAILURE) \ EM(SKB_DROP_REASON_SOCKET_BACKLOG, SOCKET_BACKLOG) \ + EM(SKB_DROP_REASON_TCP_FLAGS, TCP_FLAGS) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From a7ec381049c0d1f03e342063d75f5c3b314d0ec2 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 20 Feb 2022 15:06:36 +0800 Subject: net: tcp: use tcp_drop_reason() for tcp_data_queue() Replace tcp_drop() used in tcp_data_queue() with tcp_drop_reason(). Following drop reasons are introduced: SKB_DROP_REASON_TCP_ZEROWINDOW SKB_DROP_REASON_TCP_OLD_DATA SKB_DROP_REASON_TCP_OVERWINDOW SKB_DROP_REASON_TCP_OLD_DATA is used for the case that end_seq of skb less than the left edges of receive window. (Maybe there is a better name?) Reviewed-by: Mengen Sun Reviewed-by: Hao Peng Signed-off-by: Menglong Dong Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 13 +++++++++++++ include/trace/events/skb.h | 3 +++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 671db9f49efe..554ef2c848ee 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -363,6 +363,19 @@ enum skb_drop_reason { * LINUX_MIB_TCPBACKLOGDROP) */ SKB_DROP_REASON_TCP_FLAGS, /* TCP flags invalid */ + SKB_DROP_REASON_TCP_ZEROWINDOW, /* TCP receive window size is zero, + * see LINUX_MIB_TCPZEROWINDOWDROP + */ + SKB_DROP_REASON_TCP_OLD_DATA, /* the TCP data reveived is already + * received before (spurious retrans + * may happened), see + * LINUX_MIB_DELAYEDACKLOST + */ + SKB_DROP_REASON_TCP_OVERWINDOW, /* the TCP data is out of window, + * the seq of the first byte exceed + * the right edges of receive + * window + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index d332e7313a61..cc1c8f7eaf72 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -33,6 +33,9 @@ EM(SKB_DROP_REASON_TCP_MD5FAILURE, TCP_MD5FAILURE) \ EM(SKB_DROP_REASON_SOCKET_BACKLOG, SOCKET_BACKLOG) \ EM(SKB_DROP_REASON_TCP_FLAGS, TCP_FLAGS) \ + EM(SKB_DROP_REASON_TCP_ZEROWINDOW, TCP_ZEROWINDOW) \ + EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \ + EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From d25e481be0c519d1a458b14191dc8c2a8bb3e24a Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 20 Feb 2022 15:06:37 +0800 Subject: net: tcp: use tcp_drop_reason() for tcp_data_queue_ofo() Replace tcp_drop() used in tcp_data_queue_ofo with tcp_drop_reason(). Following drop reasons are introduced: SKB_DROP_REASON_TCP_OFOMERGE Reviewed-by: Mengen Sun Reviewed-by: Hao Peng Signed-off-by: Menglong Dong Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ include/trace/events/skb.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 554ef2c848ee..a3e90efe6586 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -376,6 +376,10 @@ enum skb_drop_reason { * the right edges of receive * window */ + SKB_DROP_REASON_TCP_OFOMERGE, /* the data of skb is already in + * the ofo queue, corresponding to + * LINUX_MIB_TCPOFOMERGE + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index cc1c8f7eaf72..2ab7193313aa 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -36,6 +36,7 @@ EM(SKB_DROP_REASON_TCP_ZEROWINDOW, TCP_ZEROWINDOW) \ EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \ EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \ + EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From a1dc6308865df719efb2a2f8a5f0f5979602d267 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Fri, 21 Jan 2022 16:08:16 +1030 Subject: fsi: sbefifo: Implement FSI_SBEFIFO_READ_TIMEOUT_SECONDS ioctl FSI_SBEFIFO_READ_TIMEOUT_SECONDS ioctl sets the read timeout (in seconds) for the response received by sbefifo device from sbe. The timeout affects only the read operation on current sbefifo device fd. Certain SBE operations can take long time to complete and the default timeout of 10 seconds might not be sufficient to start receiving response from SBE. In such cases, allow the timeout to be set to the maximum of 120 seconds. The kernel does not contain the definition of the various SBE operations, so we must expose an interface to userspace to set the timeout for the given operation. Signed-off-by: Amitay Isaacs Signed-off-by: Joel Stanley Reviewed-by: Eddie James Link: https://lore.kernel.org/r/20220121053816.82253-3-joel@jms.id.au Signed-off-by: Joel Stanley --- include/uapi/linux/fsi.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/fsi.h b/include/uapi/linux/fsi.h index da577ecd90e7..b2f1977378c7 100644 --- a/include/uapi/linux/fsi.h +++ b/include/uapi/linux/fsi.h @@ -55,4 +55,18 @@ struct scom_access { #define FSI_SCOM_WRITE _IOWR('s', 0x02, struct scom_access) #define FSI_SCOM_RESET _IOW('s', 0x03, __u32) +/* + * /dev/sbefifo* ioctl interface + */ + +/** + * FSI_SBEFIFO_READ_TIMEOUT sets the read timeout for response from SBE. + * + * The read timeout is specified in seconds. The minimum value of read + * timeout is 10 seconds (default) and the maximum value of read timeout is + * 120 seconds. A read timeout of 0 will reset the value to the default of + * (10 seconds). + */ +#define FSI_SBEFIFO_READ_TIMEOUT_SECONDS _IOW('s', 0x00, __u32) + #endif /* _UAPI_LINUX_FSI_H */ -- cgit v1.2.3 From f2af60bb7ce2fa5397f401cbf65725d9c87329a4 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Mon, 7 Feb 2022 10:16:40 -0600 Subject: fsi: Add trace events in initialization path Add definitions for trace events to show the scanning flow. Signed-off-by: Eddie James Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20220207161640.35605-1-eajames@linux.ibm.com Signed-off-by: Joel Stanley --- include/trace/events/fsi.h | 86 ++++++++++++++++++++++++++++++++ include/trace/events/fsi_master_aspeed.h | 12 +++++ 2 files changed, 98 insertions(+) (limited to 'include') diff --git a/include/trace/events/fsi.h b/include/trace/events/fsi.h index 9832cb8e0eb0..c9a72e8432b8 100644 --- a/include/trace/events/fsi.h +++ b/include/trace/events/fsi.h @@ -122,6 +122,92 @@ TRACE_EVENT(fsi_master_break, ) ); +TRACE_EVENT(fsi_slave_init, + TP_PROTO(const struct fsi_slave *slave), + TP_ARGS(slave), + TP_STRUCT__entry( + __field(int, master_idx) + __field(int, master_n_links) + __field(int, idx) + __field(int, link) + __field(int, chip_id) + __field(__u32, cfam_id) + __field(__u32, size) + ), + TP_fast_assign( + __entry->master_idx = slave->master->idx; + __entry->master_n_links = slave->master->n_links; + __entry->idx = slave->cdev_idx; + __entry->link = slave->link; + __entry->chip_id = slave->chip_id; + __entry->cfam_id = slave->cfam_id; + __entry->size = slave->size; + ), + TP_printk("fsi%d: idx:%d link:%d/%d cid:%d cfam:%08x %08x", + __entry->master_idx, + __entry->idx, + __entry->link, + __entry->master_n_links, + __entry->chip_id, + __entry->cfam_id, + __entry->size + ) +); + +TRACE_EVENT(fsi_slave_invalid_cfam, + TP_PROTO(const struct fsi_master *master, int link, uint32_t cfam_id), + TP_ARGS(master, link, cfam_id), + TP_STRUCT__entry( + __field(int, master_idx) + __field(int, master_n_links) + __field(int, link) + __field(__u32, cfam_id) + ), + TP_fast_assign( + __entry->master_idx = master->idx; + __entry->master_n_links = master->n_links; + __entry->link = link; + __entry->cfam_id = cfam_id; + ), + TP_printk("fsi%d: cfam:%08x link:%d/%d", + __entry->master_idx, + __entry->cfam_id, + __entry->link, + __entry->master_n_links + ) +); + +TRACE_EVENT(fsi_dev_init, + TP_PROTO(const struct fsi_device *dev), + TP_ARGS(dev), + TP_STRUCT__entry( + __field(int, master_idx) + __field(int, link) + __field(int, type) + __field(int, unit) + __field(int, version) + __field(__u32, addr) + __field(__u32, size) + ), + TP_fast_assign( + __entry->master_idx = dev->slave->master->idx; + __entry->link = dev->slave->link; + __entry->type = dev->engine_type; + __entry->unit = dev->unit; + __entry->version = dev->version; + __entry->addr = dev->addr; + __entry->size = dev->size; + ), + TP_printk("fsi%d: slv%d: t:%02x u:%02x v:%02x %08x@%08x", + __entry->master_idx, + __entry->link, + __entry->type, + __entry->unit, + __entry->version, + __entry->size, + __entry->addr + ) +); #endif /* _TRACE_FSI_H */ diff --git a/include/trace/events/fsi_master_aspeed.h b/include/trace/events/fsi_master_aspeed.h index a355ceacc33f..0fff873775f1 100644 --- a/include/trace/events/fsi_master_aspeed.h +++ b/include/trace/events/fsi_master_aspeed.h @@ -72,6 +72,18 @@ TRACE_EVENT(fsi_master_aspeed_opb_error, ) ); +TRACE_EVENT(fsi_master_aspeed_cfam_reset, + TP_PROTO(bool start), + TP_ARGS(start), + TP_STRUCT__entry( + __field(bool, start) + ), + TP_fast_assign( + __entry->start = start; + ), + TP_printk("%s", __entry->start ? "start" : "end") +); + #endif #include -- cgit v1.2.3 From 44a3918c8245ab10c6c9719dd12e7a8d291980d8 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 18 Feb 2022 11:49:08 -0800 Subject: x86/speculation: Include unprivileged eBPF status in Spectre v2 mitigation reporting With unprivileged eBPF enabled, eIBRS (without retpoline) is vulnerable to Spectre v2 BHB-based attacks. When both are enabled, print a warning message and report it in the 'spectre_v2' sysfs vulnerabilities file. Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner --- include/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fa517ae604ad..1f56806d8eb9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1793,6 +1793,11 @@ struct bpf_core_ctx { int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, int relo_idx, void *insn); +static inline bool unprivileged_ebpf_enabled(void) +{ + return !sysctl_unprivileged_bpf_disabled; +} + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2012,6 +2017,12 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog, { return NULL; } + +static inline bool unprivileged_ebpf_enabled(void) +{ + return false; +} + #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, -- cgit v1.2.3 From fefee95488412796b293d28c948be6fce63d149b Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 14 Feb 2022 11:14:01 +0100 Subject: ALSA: hda: Add snd_hdac_ext_bus_link_at() helper This patch exposes a new helper to directly retrieve the link from the codec address, and makes use of this helper when retrieving the link from the codec name. Signed-off-by: Cezary Rojewski Link: https://lore.kernel.org/r/20220214101404.4074026-2-cezary.rojewski@intel.com Signed-off-by: Takashi Iwai --- include/sound/hdaudio_ext.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index 77123c3e4095..b0c8e4936168 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -28,6 +28,7 @@ void snd_hdac_ext_stream_spbcap_enable(struct hdac_bus *chip, bool enable, int index); int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_bus *bus); +struct hdac_ext_link *snd_hdac_ext_bus_link_at(struct hdac_bus *bus, int addr); struct hdac_ext_link *snd_hdac_ext_bus_get_link(struct hdac_bus *bus, const char *codec_name); -- cgit v1.2.3 From 595511a3ab80d9ec6649bbf22a99bee169026fd1 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 14 Feb 2022 11:14:02 +0100 Subject: ALSA: hda: Update and expose snd_hda_codec_device_init() With few changes, snd_hda_codec_device_init() can be re-used by ASoC drivers. While at it, provide kernel doc for the exposed function. Signed-off-by: Cezary Rojewski Link: https://lore.kernel.org/r/20220214101404.4074026-3-cezary.rojewski@intel.com Signed-off-by: Takashi Iwai --- include/sound/hda_codec.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index 82d9daa17851..5e3cbcca42f0 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -306,6 +306,9 @@ struct hda_codec { /* * constructors */ +__printf(3, 4) struct hda_codec * +snd_hda_codec_device_init(struct hda_bus *bus, unsigned int codec_addr, + const char *fmt, ...); int snd_hda_codec_new(struct hda_bus *bus, struct snd_card *card, unsigned int codec_addr, struct hda_codec **codecp); int snd_hda_codec_device_new(struct hda_bus *bus, struct snd_card *card, -- cgit v1.2.3 From 17e0c4cbb748fca764ee184c50b60b3a7b0e0dc7 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 14 Feb 2022 11:14:03 +0100 Subject: ALSA: hda: Update and expose codec register procedures With few changes, snd_hda_codec_register() and its unregister-counterpart can be re-used by ASoC drivers. While at it, provide kernel doc for the exposed functions. Due to ALSA-device vs ASoC-component organization differences, new 'snddev_managed' argument is specified allowing for better control over codec registration process. Signed-off-by: Cezary Rojewski Link: https://lore.kernel.org/r/20220214101404.4074026-4-cezary.rojewski@intel.com Signed-off-by: Takashi Iwai --- include/sound/hda_codec.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index 5e3cbcca42f0..f74abc13414f 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -312,9 +312,12 @@ snd_hda_codec_device_init(struct hda_bus *bus, unsigned int codec_addr, int snd_hda_codec_new(struct hda_bus *bus, struct snd_card *card, unsigned int codec_addr, struct hda_codec **codecp); int snd_hda_codec_device_new(struct hda_bus *bus, struct snd_card *card, - unsigned int codec_addr, struct hda_codec *codec); + unsigned int codec_addr, struct hda_codec *codec, + bool snddev_managed); int snd_hda_codec_configure(struct hda_codec *codec); int snd_hda_codec_update_widgets(struct hda_codec *codec); +void snd_hda_codec_register(struct hda_codec *codec); +void snd_hda_codec_unregister(struct hda_codec *codec); /* * low level functions -- cgit v1.2.3 From bb682f7a91af08f1fdb669d3911978d7166a65d1 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 14 Feb 2022 11:14:04 +0100 Subject: ALSA: hda: Expose codec cleanup and power-save functions With few changes, snd_hda_codec_set_power_save() and snd_hda_codec_cleanup_for_unbind() can be re-used by ASoC drivers. While at it, provide kernel doc for the exposed functions. Signed-off-by: Cezary Rojewski Link: https://lore.kernel.org/r/20220214101404.4074026-5-cezary.rojewski@intel.com Signed-off-by: Takashi Iwai --- include/sound/hda_codec.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index f74abc13414f..77426ff58338 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -318,6 +318,7 @@ int snd_hda_codec_configure(struct hda_codec *codec); int snd_hda_codec_update_widgets(struct hda_codec *codec); void snd_hda_codec_register(struct hda_codec *codec); void snd_hda_codec_unregister(struct hda_codec *codec); +void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec); /* * low level functions @@ -496,9 +497,11 @@ int hda_call_check_power_status(struct hda_codec *codec, hda_nid_t nid) #define snd_hda_power_down(codec) snd_hdac_power_down(&(codec)->core) #define snd_hda_power_down_pm(codec) snd_hdac_power_down_pm(&(codec)->core) #ifdef CONFIG_PM +void snd_hda_codec_set_power_save(struct hda_codec *codec, int delay); void snd_hda_set_power_save(struct hda_bus *bus, int delay); void snd_hda_update_power_acct(struct hda_codec *codec); #else +static inline void snd_hda_codec_set_power_save(struct hda_codec *codec, int delay) {} static inline void snd_hda_set_power_save(struct hda_bus *bus, int delay) {} #endif -- cgit v1.2.3 From 509853f9e1e7b1490dc79f735a5dbafc9298f40d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 11 Feb 2022 19:14:54 +0100 Subject: genirq: Provide generic_handle_irq_safe() Provide generic_handle_irq_safe() which can used from any context. Suggested-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Reviewed-by: Hans de Goede Reviewed-by: Oleksandr Natalenko Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/20220211181500.1856198-2-bigeasy@linutronix.de --- include/linux/irqdesc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 93d270ca0c56..a77584593f7d 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -160,6 +160,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc) int handle_irq_desc(struct irq_desc *desc); int generic_handle_irq(unsigned int irq); +int generic_handle_irq_safe(unsigned int irq); #ifdef CONFIG_IRQ_DOMAIN /* -- cgit v1.2.3 From 93dd04ab0b2b32ae6e70284afc764c577156658e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 18 Feb 2022 14:13:58 +0100 Subject: slab: remove __alloc_size attribute from __kmalloc_track_caller Commit c37495d6254c ("slab: add __alloc_size attributes for better bounds checking") added __alloc_size attributes to a bunch of kmalloc function prototypes. Unfortunately the change to __kmalloc_track_caller seems to cause clang to generate broken code and the first time this is called when booting, the box will crash. While the compiler problems are being reworked and attempted to be solved [1], let's just drop the attribute to solve the issue now. Once it is resolved it can be added back. [1] https://github.com/ClangBuiltLinux/linux/issues/1599 Fixes: c37495d6254c ("slab: add __alloc_size attributes for better bounds checking") Cc: stable Cc: Kees Cook Cc: Daniel Micay Cc: Nick Desaulniers Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Andrew Morton Cc: Vlastimil Babka Cc: Nathan Chancellor Cc: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org Cc: llvm@lists.linux.dev Signed-off-by: Greg Kroah-Hartman Acked-by: Nick Desaulniers Acked-by: David Rientjes Acked-by: Kees Cook Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/20220218131358.3032912-1-gregkh@linuxfoundation.org --- include/linux/slab.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 37bde99b74af..5b6193fd8bd9 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -660,8 +660,7 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag * allocator where we care about the real place the memory allocation * request comes from. */ -extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller) - __alloc_size(1); +extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller); #define kmalloc_track_caller(size, flags) \ __kmalloc_track_caller(size, flags, _RET_IP_) -- cgit v1.2.3 From 05976c5f3bff8f8b5230da4b39f7cd6dfba9943e Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 17 Feb 2022 13:12:31 +0000 Subject: firmware: arm_scmi: Support optional system wide atomic-threshold-us An SCMI agent can be configured system-wide with a well-defined atomic threshold: only SCMI synchronous command whose latency has been advertised by the SCMI platform to be lower or equal to this configured threshold will be considered for atomic operations, when requested and if supported by the underlying transport at all. Link: https://lore.kernel.org/r/20220217131234.50328-6-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 9f895cb81818..fdf6bd83cc59 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -619,6 +619,8 @@ struct scmi_notify_ops { * be interested to know if they can assume SCMI * command transactions associated to this handle will * never sleep and act accordingly. + * An optional atomic threshold value could be returned + * where configured. * @notify_ops: pointer to set of notifications related operations */ struct scmi_handle { @@ -629,7 +631,8 @@ struct scmi_handle { (*devm_protocol_get)(struct scmi_device *sdev, u8 proto, struct scmi_protocol_handle **ph); void (*devm_protocol_put)(struct scmi_device *sdev, u8 proto); - bool (*is_transport_atomic)(const struct scmi_handle *handle); + bool (*is_transport_atomic)(const struct scmi_handle *handle, + unsigned int *atomic_threshold); const struct scmi_notify_ops *notify_ops; }; -- cgit v1.2.3 From b7bd36f2e9430a58aefdc326f8e6653e9b000243 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 17 Feb 2022 13:12:32 +0000 Subject: firmware: arm_scmi: Add atomic support to clock protocol Introduce new _atomic variant for SCMI clock protocol operations related to enable disable operations: when an atomic operation is required the xfer poll_completion flag is set for that transaction. Link: https://lore.kernel.org/r/20220217131234.50328-7-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index fdf6bd83cc59..306e576835f8 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -82,6 +82,9 @@ struct scmi_clk_proto_ops { u64 rate); int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id); int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id); + int (*enable_atomic)(const struct scmi_protocol_handle *ph, u32 clk_id); + int (*disable_atomic)(const struct scmi_protocol_handle *ph, + u32 clk_id); }; /** -- cgit v1.2.3 From 18f295b758b227857133f680a397a42e83c62f3f Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 17 Feb 2022 13:12:33 +0000 Subject: firmware: arm_scmi: Add support for clock_enable_latency An SCMI platform can optionally advertise an enable latency typically associated with a specific clock resource: add support for parsing such optional message field and export such information in the usual publicly accessible clock descriptor. Link: https://lore.kernel.org/r/20220217131234.50328-8-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 306e576835f8..b87551f41f9f 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -42,6 +42,7 @@ struct scmi_revision_info { struct scmi_clock_info { char name[SCMI_MAX_STR_SIZE]; + unsigned int enable_latency; bool rate_discrete; union { struct { -- cgit v1.2.3 From 0c51e12e218f20b7d976158fdc18019627326f7a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sat, 19 Feb 2022 17:45:19 +0200 Subject: ipv4: Invalidate neighbour for broadcast address upon address addition In case user space sends a packet destined to a broadcast address when a matching broadcast route is not configured, the kernel will create a unicast neighbour entry that will never be resolved [1]. When the broadcast route is configured, the unicast neighbour entry will not be invalidated and continue to linger, resulting in packets being dropped. Solve this by invalidating unresolved neighbour entries for broadcast addresses after routes for these addresses are internally configured by the kernel. This allows the kernel to create a broadcast neighbour entry following the next route lookup. Another possible solution that is more generic but also more complex is to have the ARP code register a listener to the FIB notification chain and invalidate matching neighbour entries upon the addition of broadcast routes. It is also possible to wave off the issue as a user space problem, but it seems a bit excessive to expect user space to be that intimately familiar with the inner workings of the FIB/neighbour kernel code. [1] https://lore.kernel.org/netdev/55a04a8f-56f3-f73c-2aea-2195923f09d1@huawei.com/ Reported-by: Wang Hai Signed-off-by: Ido Schimmel Tested-by: Wang Hai Signed-off-by: David S. Miller --- include/net/arp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/arp.h b/include/net/arp.h index 031374ac2f22..d7ef4ec71dfe 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -65,6 +65,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, const unsigned char *src_hw, const unsigned char *th); int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir); void arp_ifdown(struct net_device *dev); +int arp_invalidate(struct net_device *dev, __be32 ip, bool force); struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, -- cgit v1.2.3 From 696c65444120cfe98ae704e86a59df8666fabf1d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 21 Feb 2022 13:54:53 +0800 Subject: ipv6: separate ndisc_ns_create() from ndisc_send_ns() This patch separate NS message allocation steps from ndisc_send_ns(), so it could be used in other places, like bonding, to allocate and send IPv6 NS message. Also export ndisc_send_skb() and ndisc_ns_create() for later bonding usage. Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/net/ndisc.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 53cb8de0e589..aac3a42de432 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -447,10 +447,15 @@ void ndisc_cleanup(void); int ndisc_rcv(struct sk_buff *skb); +struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit, + const struct in6_addr *saddr, u64 nonce); void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, const struct in6_addr *daddr, const struct in6_addr *saddr, u64 nonce); +void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, + const struct in6_addr *saddr); + void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, -- cgit v1.2.3 From 841e95641e4cb695586b036f330d6d272fcd7173 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 21 Feb 2022 13:54:55 +0800 Subject: bonding: add extra field for bond_opt_value Adding an extra storage field for bond_opt_value so we can set large bytes of data for bonding options in future, e.g. IPv6 address. Define a new call bond_opt_initextra(). Also change the checking order of __bond_opt_init() and check values first. Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/net/bond_options.h | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/bond_options.h b/include/net/bond_options.h index dd75c071f67e..286b29c6c451 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -71,14 +71,18 @@ enum { /* This structure is used for storing option values and for passing option * values when changing an option. The logic when used as an arg is as follows: - * - if string != NULL -> parse it, if the opt is RAW type then return it, else - * return the parse result - * - if string == NULL -> parse value + * - if value != ULLONG_MAX -> parse value + * - if string != NULL -> parse string + * - if the opt is RAW data and length less than maxlen, + * copy the data to extra storage */ + +#define BOND_OPT_EXTRA_MAXLEN 16 struct bond_opt_value { char *string; u64 value; u32 flags; + char extra[BOND_OPT_EXTRA_MAXLEN]; }; struct bonding; @@ -118,17 +122,22 @@ const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val); * When value is ULLONG_MAX then string will be used. */ static inline void __bond_opt_init(struct bond_opt_value *optval, - char *string, u64 value) + char *string, u64 value, + void *extra, size_t extra_len) { memset(optval, 0, sizeof(*optval)); optval->value = ULLONG_MAX; - if (value == ULLONG_MAX) - optval->string = string; - else + if (value != ULLONG_MAX) optval->value = value; + else if (string) + optval->string = string; + else if (extra_len <= BOND_OPT_EXTRA_MAXLEN) + memcpy(optval->extra, extra, extra_len); } -#define bond_opt_initval(optval, value) __bond_opt_init(optval, NULL, value) -#define bond_opt_initstr(optval, str) __bond_opt_init(optval, str, ULLONG_MAX) +#define bond_opt_initval(optval, value) __bond_opt_init(optval, NULL, value, NULL, 0) +#define bond_opt_initstr(optval, str) __bond_opt_init(optval, str, ULLONG_MAX, NULL, 0) +#define bond_opt_initextra(optval, extra, extra_len) \ + __bond_opt_init(optval, NULL, ULLONG_MAX, extra, extra_len) void bond_option_arp_ip_targets_clear(struct bonding *bond); -- cgit v1.2.3 From 4e24be018eb9dbcefa4b01c07e298b147dc1a4d7 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 21 Feb 2022 13:54:56 +0800 Subject: bonding: add new parameter ns_targets Add a new bonding parameter ns_targets to store IPv6 address. Add required bond_ns_send/rcv functions first before adding IPv6 address option setting. Add two functions bond_send/rcv_validate so we can send/recv ARP and NS at the same time. Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/net/bonding.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bonding.h b/include/net/bonding.h index 7dead855a72d..f3b986f6b6e4 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -29,8 +29,11 @@ #include #include #include +#include +#include #define BOND_MAX_ARP_TARGETS 16 +#define BOND_MAX_NS_TARGETS BOND_MAX_ARP_TARGETS #define BOND_DEFAULT_MIIMON 100 @@ -146,6 +149,7 @@ struct bond_params { struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; u16 ad_user_port_key; + struct in6_addr ns_targets[BOND_MAX_NS_TARGETS]; /* 2 bytes of padding : see ether_addr_equal_64bits() */ u8 ad_actor_system[ETH_ALEN + 2]; @@ -628,7 +632,7 @@ struct bond_net { struct class_attribute class_attr_bonding_masters; }; -int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); +int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); int bond_create(struct net *net, const char *name); int bond_create_sysfs(struct bond_net *net); @@ -735,6 +739,19 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip) return -1; } +static inline int bond_get_targets_ip6(struct in6_addr *targets, struct in6_addr *ip) +{ + int i; + + for (i = 0; i < BOND_MAX_NS_TARGETS; i++) + if (ipv6_addr_equal(&targets[i], ip)) + return i; + else if (ipv6_addr_any(&targets[i])) + break; + + return -1; +} + /* exported from bond_main.c */ extern unsigned int bond_net_id; -- cgit v1.2.3 From 129e3c1bab24d27d0fa6e505a472345a92d7a2b0 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 21 Feb 2022 13:54:57 +0800 Subject: bonding: add new option ns_ip6_target This patch add a new bonding option ns_ip6_target, which correspond to the arp_ip_target. With this we set IPv6 targets and send IPv6 NS request to determine the health of the link. For other related options like the validation, we still use arp_validate, and will change to ns_validate later. Note: the sysfs configuration support was removed based on https://lore.kernel.org/netdev/8863.1645071997@famine Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/net/bond_options.h | 4 ++++ include/net/bonding.h | 7 +++++++ include/uapi/linux/if_link.h | 1 + 3 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 286b29c6c451..61b49063791c 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -66,6 +66,7 @@ enum { BOND_OPT_PEER_NOTIF_DELAY, BOND_OPT_LACP_ACTIVE, BOND_OPT_MISSED_MAX, + BOND_OPT_NS_TARGETS, BOND_OPT_LAST }; @@ -140,5 +141,8 @@ static inline void __bond_opt_init(struct bond_opt_value *optval, __bond_opt_init(optval, NULL, ULLONG_MAX, extra, extra_len) void bond_option_arp_ip_targets_clear(struct bonding *bond); +#if IS_ENABLED(CONFIG_IPV6) +void bond_option_ns_ip6_targets_clear(struct bonding *bond); +#endif #endif /* _NET_BOND_OPTIONS_H */ diff --git a/include/net/bonding.h b/include/net/bonding.h index f3b986f6b6e4..d0dfe727e0b1 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -503,6 +503,13 @@ static inline int bond_is_ip_target_ok(__be32 addr) return !ipv4_is_lbcast(addr) && !ipv4_is_zeronet(addr); } +static inline int bond_is_ip6_target_ok(struct in6_addr *addr) +{ + return !ipv6_addr_any(addr) && + !ipv6_addr_loopback(addr) && + !ipv6_addr_is_multicast(addr); +} + /* Get the oldest arp which we've received on this slave for bond's * arp_targets. */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6218f93f5c1a..e1ba2d51b717 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -860,6 +860,7 @@ enum { IFLA_BOND_PEER_NOTIF_DELAY, IFLA_BOND_AD_LACP_ACTIVE, IFLA_BOND_MISSED_MAX, + IFLA_BOND_NS_IP6_TARGET, __IFLA_BOND_MAX, }; -- cgit v1.2.3 From c086df4902573e2f06c6a2a83452c13a8bc603f5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 10 Jan 2022 18:52:52 -0500 Subject: fuse: move FUSE_SUPER_MAGIC definition to magic.h ...to help userland apps that need to identify FUSE mounts. Signed-off-by: Jeff Layton Signed-off-by: Miklos Szeredi --- include/uapi/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 0425cd79af9a..f724129c0425 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -36,6 +36,7 @@ #define EFIVARFS_MAGIC 0xde5e81e4 #define HOSTFS_SUPER_MAGIC 0x00c0ffee #define OVERLAYFS_SUPER_MAGIC 0x794c7630 +#define FUSE_SUPER_MAGIC 0x65735546 #define MINIX_SUPER_MAGIC 0x137F /* minix v1 fs, 14 char names */ #define MINIX_SUPER_MAGIC2 0x138F /* minix v1 fs, 30 char names */ -- cgit v1.2.3 From 9c07f57869e90140080cfc282cc628d123e27704 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 2 Feb 2022 13:30:03 +0100 Subject: random: simplify entropy debiting Our pool is 256 bits, and we only ever use all of it or don't use it at all, which is decided by whether or not it has at least 128 bits in it. So we can drastically simplify the accounting and cmpxchg loop to do exactly this. While we're at it, we move the minimum bit size into a constant so it can be shared between the two places where it matters. The reason we want any of this is for the case in which an attacker has compromised the current state, and then bruteforces small amounts of entropy added to it. By demanding a particular minimum amount of entropy be present before reseeding, we make that bruteforcing difficult. Note that this rationale no longer includes anything about /dev/random blocking at the right moment, since /dev/random no longer blocks (except for at ~boot), but rather uses the crng. In a former life, /dev/random was different and therefore required a more nuanced account(), but this is no longer. Behaviorally, nothing changes here. This is just a simplification of the code. Cc: Theodore Ts'o Cc: Greg Kroah-Hartman Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- include/trace/events/random.h | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/trace/events/random.h b/include/trace/events/random.h index a2d9aa16a5d7..ad149aeaf42c 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -79,22 +79,6 @@ TRACE_EVENT(credit_entropy_bits, __entry->bits, __entry->entropy_count, (void *)__entry->IP) ); -TRACE_EVENT(debit_entropy, - TP_PROTO(int debit_bits), - - TP_ARGS( debit_bits), - - TP_STRUCT__entry( - __field( int, debit_bits ) - ), - - TP_fast_assign( - __entry->debit_bits = debit_bits; - ), - - TP_printk("input pool: debit_bits %d", __entry->debit_bits) -); - TRACE_EVENT(add_input_randomness, TP_PROTO(int input_bits), @@ -161,31 +145,29 @@ DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, ); DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(int nbytes, int entropy_count, unsigned long IP), + TP_PROTO(int nbytes, int entropy_count), - TP_ARGS(nbytes, entropy_count, IP), + TP_ARGS(nbytes, entropy_count), TP_STRUCT__entry( __field( int, nbytes ) __field( int, entropy_count ) - __field(unsigned long, IP ) ), TP_fast_assign( __entry->nbytes = nbytes; __entry->entropy_count = entropy_count; - __entry->IP = IP; ), - TP_printk("input pool: nbytes %d entropy_count %d caller %pS", - __entry->nbytes, __entry->entropy_count, (void *)__entry->IP) + TP_printk("input pool: nbytes %d entropy_count %d", + __entry->nbytes, __entry->entropy_count) ); DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(int nbytes, int entropy_count, unsigned long IP), + TP_PROTO(int nbytes, int entropy_count), - TP_ARGS(nbytes, entropy_count, IP) + TP_ARGS(nbytes, entropy_count) ); TRACE_EVENT(urandom_read, -- cgit v1.2.3 From 512865d83fd9685a4d5aab26f898737b57d3187e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 Feb 2022 15:35:22 +0000 Subject: KVM: arm64: Bump guest PSCI version to 1.1 Expose PSCI version v1.1 to the guest by default. The only difference for now is that an updated version number is reported by PSCI_VERSION. Cc: Marc Zyngier Cc: James Morse Cc: Alexandru Elisei Cc: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220221153524.15397-2-will@kernel.org --- include/kvm/arm_psci.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index 5b58bd2fe088..4a1003323a0c 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -13,8 +13,9 @@ #define KVM_ARM_PSCI_0_1 PSCI_VERSION(0, 1) #define KVM_ARM_PSCI_0_2 PSCI_VERSION(0, 2) #define KVM_ARM_PSCI_1_0 PSCI_VERSION(1, 0) +#define KVM_ARM_PSCI_1_1 PSCI_VERSION(1, 1) -#define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_0 +#define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_1 /* * We need the KVM pointer independently from the vcpu as we can call -- cgit v1.2.3 From d43583b890e7cb0078d13d056753a56602b92406 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 Feb 2022 15:35:23 +0000 Subject: KVM: arm64: Expose PSCI SYSTEM_RESET2 call to the guest PSCI v1.1 introduces the optional SYSTEM_RESET2 call, which allows the caller to provide a vendor-specific "reset type" and "cookie" to request a particular form of reset or shutdown. Expose this call to the guest and handle it in the same way as PSCI SYSTEM_RESET, along with some basic range checking on the type argument. Cc: Marc Zyngier Cc: James Morse Cc: Alexandru Elisei Cc: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220221153524.15397-3-will@kernel.org --- include/uapi/linux/psci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 2fcad1dd0b0e..2bf93c0d6354 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -82,6 +82,10 @@ #define PSCI_0_2_TOS_UP_NO_MIGRATE 1 #define PSCI_0_2_TOS_MP 2 +/* PSCI v1.1 reset type encoding for SYSTEM_RESET2 */ +#define PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET 0 +#define PSCI_1_1_RESET_TYPE_VENDOR_START 0x80000000U + /* PSCI version decoding (independent of PSCI version) */ #define PSCI_VERSION_MAJOR_SHIFT 16 #define PSCI_VERSION_MINOR_MASK \ -- cgit v1.2.3 From f6c052afe6f802d87c74153b7a57c43b2e9faf07 Mon Sep 17 00:00:00 2001 From: Christophe Kerello Date: Sun, 20 Feb 2022 15:14:31 +0000 Subject: nvmem: core: Fix a conflict between MTD and NVMEM on wp-gpios property Wp-gpios property can be used on NVMEM nodes and the same property can be also used on MTD NAND nodes. In case of the wp-gpios property is defined at NAND level node, the GPIO management is done at NAND driver level. Write protect is disabled when the driver is probed or resumed and is enabled when the driver is released or suspended. When no partitions are defined in the NAND DT node, then the NAND DT node will be passed to NVMEM framework. If wp-gpios property is defined in this node, the GPIO resource is taken twice and the NAND controller driver fails to probe. It would be possible to set config->wp_gpio at MTD level before calling nvmem_register function but NVMEM framework will toggle this GPIO on each write when this GPIO should only be controlled at NAND level driver to ensure that the Write Protect has not been enabled. A way to fix this conflict is to add a new boolean flag in nvmem_config named ignore_wp. In case ignore_wp is set, the GPIO resource will be managed by the provider. Fixes: 2a127da461a9 ("nvmem: add support for the write-protect pin") Cc: stable@vger.kernel.org Signed-off-by: Christophe Kerello Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220220151432.16605-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-provider.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 98efb7b5660d..c9a3ac9efeaa 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -70,7 +70,8 @@ struct nvmem_keepout { * @word_size: Minimum read/write access granularity. * @stride: Minimum read/write access stride. * @priv: User context passed to read/write callbacks. - * @wp-gpio: Write protect pin + * @wp-gpio: Write protect pin + * @ignore_wp: Write Protect pin is managed by the provider. * * Note: A default "nvmem" name will be assigned to the device if * no name is specified in its configuration. In such case "" is @@ -92,6 +93,7 @@ struct nvmem_config { enum nvmem_type type; bool read_only; bool root_only; + bool ignore_wp; struct device_node *of_node; bool no_of_node; nvmem_reg_read_t reg_read; -- cgit v1.2.3 From 190fae468592bc2f0efc8b928920f8f712b5831e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sun, 20 Feb 2022 15:15:15 +0000 Subject: nvmem: core: Remove unused devm_nvmem_unregister() There are no users and seems no will come of the devm_nvmem_unregister(). Remove the function and remove the unused devm_nvmem_match() along with it. Signed-off-by: Andy Shevchenko Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220220151527.17216-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-provider.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 98efb7b5660d..99c01c43d7a8 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -133,8 +133,6 @@ void nvmem_unregister(struct nvmem_device *nvmem); struct nvmem_device *devm_nvmem_register(struct device *dev, const struct nvmem_config *cfg); -int devm_nvmem_unregister(struct device *dev, struct nvmem_device *nvmem); - void nvmem_add_cell_table(struct nvmem_cell_table *table); void nvmem_del_cell_table(struct nvmem_cell_table *table); @@ -153,12 +151,6 @@ devm_nvmem_register(struct device *dev, const struct nvmem_config *c) return nvmem_register(c); } -static inline int -devm_nvmem_unregister(struct device *dev, struct nvmem_device *nvmem) -{ - return -EOPNOTSUPP; -} - static inline void nvmem_add_cell_table(struct nvmem_cell_table *table) {} static inline void nvmem_del_cell_table(struct nvmem_cell_table *table) {} -- cgit v1.2.3 From a603ca60cebff8589882427a67f870ed946b3fc8 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Thu, 10 Feb 2022 16:42:03 -0800 Subject: serial: 8250_aspeed_vuart: add PORT_ASPEED_VUART port type Commit 54da3e381c2b ("serial: 8250_aspeed_vuart: use UPF_IOREMAP to set up register mapping") fixed a bug that had, as a side-effect, prevented the 8250_aspeed_vuart driver from enabling the VUART's FIFOs. However, fixing that (and hence enabling the FIFOs) has in turn revealed what appears to be a hardware bug in the ASPEED VUART in which the host-side THRE bit doesn't get if the BMC-side receive FIFO trigger level is set to anything but one byte. This causes problems for polled-mode writes from the host -- for example, Linux kernel console writes proceed at a glacial pace (less than 100 bytes per second) because the write path waits for a 10ms timeout to expire after every character instead of being able to continue on to the next character upon seeing THRE asserted. (GRUB behaves similarly.) As a workaround, introduce a new port type for the ASPEED VUART that's identical to PORT_16550A as it had previously been using, but with UART_FCR_R_TRIG_00 instead to set the receive FIFO trigger level to one byte, which (experimentally) seems to avoid the problematic THRE behavior. Fixes: 54da3e381c2b ("serial: 8250_aspeed_vuart: use UPF_IOREMAP to set up register mapping") Tested-by: Konstantin Aladyshev Reviewed-by: Andy Shevchenko Signed-off-by: Zev Weiss Link: https://lore.kernel.org/r/20220211004203.14915-1-zev@bewilderbeest.net Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index c4042dcfdc0c..8885e69178bd 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -68,6 +68,9 @@ /* NVIDIA Tegra Combined UART */ #define PORT_TEGRA_TCU 41 +/* ASPEED AST2x00 virtual UART */ +#define PORT_ASPEED_VUART 42 + /* Intel EG20 */ #define PORT_PCH_8LINE 44 #define PORT_PCH_2LINE 45 -- cgit v1.2.3 From a1a5cfe70cd29a59a9a85290dfe95ed1c8df1193 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Mon, 14 Feb 2022 09:38:06 +0200 Subject: iio: introduce mag_referenced Some accelerometers that support activity and inactivity events also support a referenced mode, in which the gravitational acceleration is taken as a point of reference before comparing the acceleration to the specified activity and inactivity magnitude. For example, in the case of the ADXL367, for activity detection, the formula is: abs(acceleration - reference) > magnitude Add a new event type that makes this behavior clear. Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20220214073810.781016-2-cosmin.tanislav@analog.com Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 48c13147c0a8..472cead10d8d 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -104,6 +104,7 @@ enum iio_event_type { IIO_EV_TYPE_THRESH_ADAPTIVE, IIO_EV_TYPE_MAG_ADAPTIVE, IIO_EV_TYPE_CHANGE, + IIO_EV_TYPE_MAG_REFERENCED, }; enum iio_event_direction { -- cgit v1.2.3 From 04ec96b768c9dd43946b047c3da60dcc66431370 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 9 Feb 2022 14:43:25 +0100 Subject: random: make more consistent use of integer types We've been using a flurry of int, unsigned int, size_t, and ssize_t. Let's unify all of this into size_t where it makes sense, as it does in most places, and leave ssize_t for return values with possible errors. In addition, keeping with the convention of other functions in this file, functions that are dealing with raw bytes now take void * consistently instead of a mix of that and u8 *, because much of the time we're actually passing some other structure that is then interpreted as bytes by the function. We also take the opportunity to fix the outdated and incorrect comment in get_random_bytes_arch(). Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Jann Horn Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- include/linux/hw_random.h | 2 +- include/linux/random.h | 10 +++--- include/trace/events/random.h | 79 +++++++++++++++++++++---------------------- 3 files changed, 44 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 8e6dd908da21..1a9fc38f8938 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -61,6 +61,6 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); /** Feed random bits into the pool. */ -extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy); +extern void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy); #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/random.h b/include/linux/random.h index c45b2693e51f..e92efb39779c 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -20,8 +20,8 @@ struct random_ready_callback { struct module *owner; }; -extern void add_device_randomness(const void *, unsigned int); -extern void add_bootloader_randomness(const void *, unsigned int); +extern void add_device_randomness(const void *, size_t); +extern void add_bootloader_randomness(const void *, size_t); #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) static inline void add_latent_entropy(void) @@ -37,13 +37,13 @@ extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; extern void add_interrupt_randomness(int irq) __latent_entropy; -extern void get_random_bytes(void *buf, int nbytes); +extern void get_random_bytes(void *buf, size_t nbytes); extern int wait_for_random_bytes(void); extern int __init rand_initialize(void); extern bool rng_is_initialized(void); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); -extern int __must_check get_random_bytes_arch(void *buf, int nbytes); +extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; @@ -87,7 +87,7 @@ static inline unsigned long get_random_canary(void) /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). * Returns the result of the call to wait_for_random_bytes. */ -static inline int get_random_bytes_wait(void *buf, int nbytes) +static inline int get_random_bytes_wait(void *buf, size_t nbytes) { int ret = wait_for_random_bytes(); get_random_bytes(buf, nbytes); diff --git a/include/trace/events/random.h b/include/trace/events/random.h index ad149aeaf42c..0609a2810a12 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -9,13 +9,13 @@ #include TRACE_EVENT(add_device_randomness, - TP_PROTO(int bytes, unsigned long IP), + TP_PROTO(size_t bytes, unsigned long IP), TP_ARGS(bytes, IP), TP_STRUCT__entry( - __field( int, bytes ) - __field(unsigned long, IP ) + __field(size_t, bytes ) + __field(unsigned long, IP ) ), TP_fast_assign( @@ -23,18 +23,18 @@ TRACE_EVENT(add_device_randomness, __entry->IP = IP; ), - TP_printk("bytes %d caller %pS", + TP_printk("bytes %zu caller %pS", __entry->bytes, (void *)__entry->IP) ); DECLARE_EVENT_CLASS(random__mix_pool_bytes, - TP_PROTO(int bytes, unsigned long IP), + TP_PROTO(size_t bytes, unsigned long IP), TP_ARGS(bytes, IP), TP_STRUCT__entry( - __field( int, bytes ) - __field(unsigned long, IP ) + __field(size_t, bytes ) + __field(unsigned long, IP ) ), TP_fast_assign( @@ -42,12 +42,12 @@ DECLARE_EVENT_CLASS(random__mix_pool_bytes, __entry->IP = IP; ), - TP_printk("input pool: bytes %d caller %pS", + TP_printk("input pool: bytes %zu caller %pS", __entry->bytes, (void *)__entry->IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, - TP_PROTO(int bytes, unsigned long IP), + TP_PROTO(size_t bytes, unsigned long IP), TP_ARGS(bytes, IP) ); @@ -59,13 +59,13 @@ DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, ); TRACE_EVENT(credit_entropy_bits, - TP_PROTO(int bits, int entropy_count, unsigned long IP), + TP_PROTO(size_t bits, size_t entropy_count, unsigned long IP), TP_ARGS(bits, entropy_count, IP), TP_STRUCT__entry( - __field( int, bits ) - __field( int, entropy_count ) + __field(size_t, bits ) + __field(size_t, entropy_count ) __field(unsigned long, IP ) ), @@ -75,34 +75,34 @@ TRACE_EVENT(credit_entropy_bits, __entry->IP = IP; ), - TP_printk("input pool: bits %d entropy_count %d caller %pS", + TP_printk("input pool: bits %zu entropy_count %zu caller %pS", __entry->bits, __entry->entropy_count, (void *)__entry->IP) ); TRACE_EVENT(add_input_randomness, - TP_PROTO(int input_bits), + TP_PROTO(size_t input_bits), TP_ARGS(input_bits), TP_STRUCT__entry( - __field( int, input_bits ) + __field(size_t, input_bits ) ), TP_fast_assign( __entry->input_bits = input_bits; ), - TP_printk("input_pool_bits %d", __entry->input_bits) + TP_printk("input_pool_bits %zu", __entry->input_bits) ); TRACE_EVENT(add_disk_randomness, - TP_PROTO(dev_t dev, int input_bits), + TP_PROTO(dev_t dev, size_t input_bits), TP_ARGS(dev, input_bits), TP_STRUCT__entry( - __field( dev_t, dev ) - __field( int, input_bits ) + __field(dev_t, dev ) + __field(size_t, input_bits ) ), TP_fast_assign( @@ -110,17 +110,17 @@ TRACE_EVENT(add_disk_randomness, __entry->input_bits = input_bits; ), - TP_printk("dev %d,%d input_pool_bits %d", MAJOR(__entry->dev), + TP_printk("dev %d,%d input_pool_bits %zu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->input_bits) ); DECLARE_EVENT_CLASS(random__get_random_bytes, - TP_PROTO(int nbytes, unsigned long IP), + TP_PROTO(size_t nbytes, unsigned long IP), TP_ARGS(nbytes, IP), TP_STRUCT__entry( - __field( int, nbytes ) + __field(size_t, nbytes ) __field(unsigned long, IP ) ), @@ -129,29 +129,29 @@ DECLARE_EVENT_CLASS(random__get_random_bytes, __entry->IP = IP; ), - TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP) + TP_printk("nbytes %zu caller %pS", __entry->nbytes, (void *)__entry->IP) ); DEFINE_EVENT(random__get_random_bytes, get_random_bytes, - TP_PROTO(int nbytes, unsigned long IP), + TP_PROTO(size_t nbytes, unsigned long IP), TP_ARGS(nbytes, IP) ); DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, - TP_PROTO(int nbytes, unsigned long IP), + TP_PROTO(size_t nbytes, unsigned long IP), TP_ARGS(nbytes, IP) ); DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(int nbytes, int entropy_count), + TP_PROTO(size_t nbytes, size_t entropy_count), TP_ARGS(nbytes, entropy_count), TP_STRUCT__entry( - __field( int, nbytes ) - __field( int, entropy_count ) + __field( size_t, nbytes ) + __field( size_t, entropy_count ) ), TP_fast_assign( @@ -159,37 +159,34 @@ DECLARE_EVENT_CLASS(random__extract_entropy, __entry->entropy_count = entropy_count; ), - TP_printk("input pool: nbytes %d entropy_count %d", + TP_printk("input pool: nbytes %zu entropy_count %zu", __entry->nbytes, __entry->entropy_count) ); DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(int nbytes, int entropy_count), + TP_PROTO(size_t nbytes, size_t entropy_count), TP_ARGS(nbytes, entropy_count) ); TRACE_EVENT(urandom_read, - TP_PROTO(int got_bits, int pool_left, int input_left), + TP_PROTO(size_t nbytes, size_t entropy_count), - TP_ARGS(got_bits, pool_left, input_left), + TP_ARGS(nbytes, entropy_count), TP_STRUCT__entry( - __field( int, got_bits ) - __field( int, pool_left ) - __field( int, input_left ) + __field( size_t, nbytes ) + __field( size_t, entropy_count ) ), TP_fast_assign( - __entry->got_bits = got_bits; - __entry->pool_left = pool_left; - __entry->input_left = input_left; + __entry->nbytes = nbytes; + __entry->entropy_count = entropy_count; ), - TP_printk("got_bits %d nonblocking_pool_entropy_left %d " - "input_entropy_left %d", __entry->got_bits, - __entry->pool_left, __entry->input_left) + TP_printk("reading: nbytes %zu entropy_count %zu", + __entry->nbytes, __entry->entropy_count) ); TRACE_EVENT(prandom_u32, -- cgit v1.2.3 From 14c174633f349cb41ea90c2c0aaddac157012f74 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 10 Feb 2022 16:40:44 +0100 Subject: random: remove unused tracepoints These explicit tracepoints aren't really used and show sign of aging. It's work to keep these up to date, and before I attempted to keep them up to date, they weren't up to date, which indicates that they're not really used. These days there are better ways of introspecting anyway. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- include/trace/events/random.h | 212 ------------------------------------------ 1 file changed, 212 deletions(-) delete mode 100644 include/trace/events/random.h (limited to 'include') diff --git a/include/trace/events/random.h b/include/trace/events/random.h deleted file mode 100644 index 0609a2810a12..000000000000 --- a/include/trace/events/random.h +++ /dev/null @@ -1,212 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM random - -#if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_RANDOM_H - -#include -#include - -TRACE_EVENT(add_device_randomness, - TP_PROTO(size_t bytes, unsigned long IP), - - TP_ARGS(bytes, IP), - - TP_STRUCT__entry( - __field(size_t, bytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->bytes = bytes; - __entry->IP = IP; - ), - - TP_printk("bytes %zu caller %pS", - __entry->bytes, (void *)__entry->IP) -); - -DECLARE_EVENT_CLASS(random__mix_pool_bytes, - TP_PROTO(size_t bytes, unsigned long IP), - - TP_ARGS(bytes, IP), - - TP_STRUCT__entry( - __field(size_t, bytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->bytes = bytes; - __entry->IP = IP; - ), - - TP_printk("input pool: bytes %zu caller %pS", - __entry->bytes, (void *)__entry->IP) -); - -DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, - TP_PROTO(size_t bytes, unsigned long IP), - - TP_ARGS(bytes, IP) -); - -DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, - TP_PROTO(int bytes, unsigned long IP), - - TP_ARGS(bytes, IP) -); - -TRACE_EVENT(credit_entropy_bits, - TP_PROTO(size_t bits, size_t entropy_count, unsigned long IP), - - TP_ARGS(bits, entropy_count, IP), - - TP_STRUCT__entry( - __field(size_t, bits ) - __field(size_t, entropy_count ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->bits = bits; - __entry->entropy_count = entropy_count; - __entry->IP = IP; - ), - - TP_printk("input pool: bits %zu entropy_count %zu caller %pS", - __entry->bits, __entry->entropy_count, (void *)__entry->IP) -); - -TRACE_EVENT(add_input_randomness, - TP_PROTO(size_t input_bits), - - TP_ARGS(input_bits), - - TP_STRUCT__entry( - __field(size_t, input_bits ) - ), - - TP_fast_assign( - __entry->input_bits = input_bits; - ), - - TP_printk("input_pool_bits %zu", __entry->input_bits) -); - -TRACE_EVENT(add_disk_randomness, - TP_PROTO(dev_t dev, size_t input_bits), - - TP_ARGS(dev, input_bits), - - TP_STRUCT__entry( - __field(dev_t, dev ) - __field(size_t, input_bits ) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->input_bits = input_bits; - ), - - TP_printk("dev %d,%d input_pool_bits %zu", MAJOR(__entry->dev), - MINOR(__entry->dev), __entry->input_bits) -); - -DECLARE_EVENT_CLASS(random__get_random_bytes, - TP_PROTO(size_t nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP), - - TP_STRUCT__entry( - __field(size_t, nbytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->nbytes = nbytes; - __entry->IP = IP; - ), - - TP_printk("nbytes %zu caller %pS", __entry->nbytes, (void *)__entry->IP) -); - -DEFINE_EVENT(random__get_random_bytes, get_random_bytes, - TP_PROTO(size_t nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP) -); - -DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, - TP_PROTO(size_t nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP) -); - -DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(size_t nbytes, size_t entropy_count), - - TP_ARGS(nbytes, entropy_count), - - TP_STRUCT__entry( - __field( size_t, nbytes ) - __field( size_t, entropy_count ) - ), - - TP_fast_assign( - __entry->nbytes = nbytes; - __entry->entropy_count = entropy_count; - ), - - TP_printk("input pool: nbytes %zu entropy_count %zu", - __entry->nbytes, __entry->entropy_count) -); - - -DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(size_t nbytes, size_t entropy_count), - - TP_ARGS(nbytes, entropy_count) -); - -TRACE_EVENT(urandom_read, - TP_PROTO(size_t nbytes, size_t entropy_count), - - TP_ARGS(nbytes, entropy_count), - - TP_STRUCT__entry( - __field( size_t, nbytes ) - __field( size_t, entropy_count ) - ), - - TP_fast_assign( - __entry->nbytes = nbytes; - __entry->entropy_count = entropy_count; - ), - - TP_printk("reading: nbytes %zu entropy_count %zu", - __entry->nbytes, __entry->entropy_count) -); - -TRACE_EVENT(prandom_u32, - - TP_PROTO(unsigned int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( unsigned int, ret) - ), - - TP_fast_assign( - __entry->ret = ret; - ), - - TP_printk("ret=%u" , __entry->ret) -); - -#endif /* _TRACE_RANDOM_H */ - -/* This part must be outside protection */ -#include -- cgit v1.2.3 From 6071a6c0fba2d747742cadcbb3ba26ed756ed73b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:28:33 +0100 Subject: random: remove useless header comment This really adds nothing at all useful. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- include/linux/random.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index e92efb39779c..37e1e8c43d7e 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -1,9 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * include/linux/random.h - * - * Include file for the random number generator. - */ + #ifndef _LINUX_RANDOM_H #define _LINUX_RANDOM_H -- cgit v1.2.3 From b777c38239fec5a528e59f55b379e31b1a187524 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 13 Feb 2022 16:17:01 +0100 Subject: random: pull add_hwgenerator_randomness() declaration into random.h add_hwgenerator_randomness() is a function implemented and documented inside of random.c. It is the way that hardware RNGs push data into it. Therefore, it should be declared in random.h. Otherwise sparse complains with: random.c:1137:6: warning: symbol 'add_hwgenerator_randomness' was not declared. Should it be static? The alternative would be to include hw_random.h into random.c, but that wouldn't really be good for anything except slowing down compile time. Cc: Matt Mackall Cc: Theodore Ts'o Acked-by: Herbert Xu Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- include/linux/hw_random.h | 2 -- include/linux/random.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 1a9fc38f8938..aa1d4da03538 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -60,7 +60,5 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); /** Unregister a Hardware Random Number Generator driver. */ extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); -/** Feed random bits into the pool. */ -extern void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy); #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/random.h b/include/linux/random.h index 37e1e8c43d7e..d7354de9351e 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -32,6 +32,8 @@ static inline void add_latent_entropy(void) {} extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; extern void add_interrupt_randomness(int irq) __latent_entropy; +extern void add_hwgenerator_randomness(const void *buffer, size_t count, + size_t entropy); extern void get_random_bytes(void *buf, size_t nbytes); extern int wait_for_random_bytes(void); -- cgit v1.2.3 From 3191dd5a1179ef0fad5a050a1702ae98b6251e8f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 13 Feb 2022 22:48:04 +0100 Subject: random: clear fast pool, crng, and batches in cpuhp bring up For the irq randomness fast pool, rather than having to use expensive atomics, which were visibly the most expensive thing in the entire irq handler, simply take care of the extreme edge case of resetting count to zero in the cpuhp online handler, just after workqueues have been reenabled. This simplifies the code a bit and lets us use vanilla variables rather than atomics, and performance should be improved. As well, very early on when the CPU comes up, while interrupts are still disabled, we clear out the per-cpu crng and its batches, so that it always starts with fresh randomness. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Theodore Ts'o Cc: Sultan Alsawaf Cc: Dominik Brodowski Acked-by: Sebastian Andrzej Siewior Signed-off-by: Jason A. Donenfeld --- include/linux/cpuhotplug.h | 2 ++ include/linux/random.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 411a428ace4d..481e565cc5c4 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -100,6 +100,7 @@ enum cpuhp_state { CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, CPUHP_AP_DTPM_CPU_DEAD, + CPUHP_RANDOM_PREPARE, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, @@ -240,6 +241,7 @@ enum cpuhp_state { CPUHP_AP_PERF_CSKY_ONLINE, CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, + CPUHP_AP_RANDOM_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_BASE_CACHEINFO_ONLINE, CPUHP_AP_ONLINE_DYN, diff --git a/include/linux/random.h b/include/linux/random.h index d7354de9351e..6148b8d1ccf3 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -156,4 +156,9 @@ static inline bool __init arch_get_random_long_early(unsigned long *v) } #endif +#ifdef CONFIG_SMP +extern int random_prepare_cpu(unsigned int cpu); +extern int random_online_cpu(unsigned int cpu); +#endif + #endif /* _LINUX_RANDOM_H */ -- cgit v1.2.3 From 0fbb4d93b38bce1f8235aacfa37e90ad8f011473 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:32 -0500 Subject: dm: add dm_submit_bio_remap interface Where possible, switch from early bio-based IO accounting (at the time DM clones each incoming bio) to late IO accounting just before each remapped bio is issued to underlying device via submit_bio_noacct(). Allows more precise bio-based IO accounting for DM targets that use their own workqueues to perform additional processing of each bio in conjunction with their DM_MAPIO_SUBMITTED return from their map function. When a target is updated to use dm_submit_bio_remap() they must also set ti->accounts_remapped_io to true. Use xchg() in start_io_acct(), as suggested by Mikulas, to ensure each IO is only started once. The xchg race only happens if __send_duplicate_bios() sends multiple bios -- that case is reflected via tio->is_duplicate_bio. Given the niche nature of this race, it is best to avoid any xchg performance penalty for normal IO. For IO that was never submitted with dm_bio_submit_remap(), but the target completes the clone with bio_endio, accounting is started then ended and pending_io counter decremented. Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 7 +++++++ include/uapi/linux/dm-ioctl.h | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index b26fecf6c8e8..7752d14d13f8 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -362,6 +362,12 @@ struct dm_target { * zone append operations using regular writes. */ bool emulate_zone_append:1; + + /* + * Set if the target will submit IO using dm_submit_bio_remap() + * after returning DM_MAPIO_SUBMITTED from its map function. + */ + bool accounts_remapped_io:1; }; void *dm_per_bio_data(struct bio *bio, size_t data_size); @@ -465,6 +471,7 @@ int dm_suspended(struct dm_target *ti); int dm_post_suspending(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); +void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone, bool from_wq); union map_info *dm_get_rq_mapinfo(struct request *rq); #ifdef CONFIG_BLK_DEV_ZONED diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index c12ce30b52df..2e9550fef90f 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -286,9 +286,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 45 +#define DM_VERSION_MINOR 46 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2021-03-22)" +#define DM_VERSION_EXTRA "-ioctl (2022-02-22)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 93b71801a8274cd9511557faf04365a5de487197 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Feb 2022 09:06:54 -0500 Subject: KVM: PPC: reserve capability 210 for KVM_CAP_PPC_AIL_MODE_3 Add KVM_CAP_PPC_AIL_MODE_3 to advertise the capability to set the AIL resource mode to 3 with the H_SET_MODE hypercall. This capability differs between processor types and KVM types (PR, HV, Nested HV), and affects guest-visible behaviour. QEMU will implement a cap-ail-mode-3 to control this behaviour[1], and use the KVM CAP if available to determine KVM support[2]. Reviewed-by: Fabiano Rosas Signed-off-by: Nicholas Piggin Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5191b57e1562..507ee1f2aa96 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1134,6 +1134,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 #define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_PPC_AIL_MODE_3 210 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From a8b9d116cda047f38ba29ce26df49c57479ffaa2 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 26 Jan 2022 05:18:26 -0800 Subject: dm: cleanup double word in comment Remove the second 'a'. Signed-off-by: Tom Rix Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 7752d14d13f8..70cd9449275a 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -358,7 +358,7 @@ struct dm_target { bool limit_swap_bios:1; /* - * Set if this target implements a a zoned device and needs emulation of + * Set if this target implements a zoned device and needs emulation of * zone append operations using regular writes. */ bool emulate_zone_append:1; -- cgit v1.2.3 From e0891269a8c25715bd9510dc355326b00ab42db2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Feb 2022 16:22:26 +0000 Subject: linkage: add SYM_FUNC_ALIAS{,_LOCAL,_WEAK}() Currently aliasing an asm function requires adding START and END annotations for each name, as per Documentation/asm-annotations.rst: SYM_FUNC_START_ALIAS(__memset) SYM_FUNC_START(memset) ... asm insns ... SYM_FUNC_END(memset) SYM_FUNC_END_ALIAS(__memset) This is more painful than necessary to maintain, especially where a function has many aliases, some of which we may wish to define conditionally. For example, arm64's memcpy/memmove implementation (which uses some arch-specific SYM_*() helpers) has: SYM_FUNC_START_ALIAS(__memmove) SYM_FUNC_START_ALIAS_WEAK_PI(memmove) SYM_FUNC_START_ALIAS(__memcpy) SYM_FUNC_START_WEAK_PI(memcpy) ... asm insns ... SYM_FUNC_END_PI(memcpy) EXPORT_SYMBOL(memcpy) SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(__memcpy) SYM_FUNC_END_ALIAS_PI(memmove) EXPORT_SYMBOL(memmove) SYM_FUNC_END_ALIAS(__memmove) EXPORT_SYMBOL(__memmove) SYM_FUNC_START(name) It would be much nicer if we could define the aliases *after* the standard function definition. This would avoid the need to specify each symbol name twice, and would make it easier to spot the canonical function definition. This patch adds new macros to allow us to do so, which allows the above example to be rewritten more succinctly as: SYM_FUNC_START(__pi_memcpy) ... asm insns ... SYM_FUNC_END(__pi_memcpy) SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) EXPORT_SYMBOL(__memcpy) SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy) SYM_FUNC_ALIAS(__memmove, __pi_memmove) EXPORT_SYMBOL(__memmove) SYM_FUNC_ALIAS_WEAK(memmove, __memmove) EXPORT_SYMBOL(memmove) The reduction in duplication will also make it possible to replace some uses of WEAK with more accurate Kconfig guards, e.g. #ifndef CONFIG_KASAN SYM_FUNC_ALIAS(memmove, __memmove) EXPORT_SYMBOL(memmove) #endif ... which should make it easier to ensure that symbols are neither used nor overidden unexpectedly. The existing SYM_FUNC_START_ALIAS() and SYM_FUNC_START_LOCAL_ALIAS() are marked as deprecated, and will be removed once existing users are moved over to the new scheme. The tools/perf/ copy of linkage.h is updated to match. A subsequent patch will depend upon this when updating the x86 asm annotations. Signed-off-by: Mark Rutland Acked-by: Ard Biesheuvel Acked-by: Josh Poimboeuf Acked-by: Mark Brown Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Slaby Cc: Peter Zijlstra Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220216162229.1076788-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- include/linux/linkage.h | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index dbf8506decca..e574a84d8b11 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -165,7 +165,18 @@ #ifndef SYM_END #define SYM_END(name, sym_type) \ .type name sym_type ASM_NL \ - .size name, .-name + .set .L__sym_size_##name, .-name ASM_NL \ + .size name, .L__sym_size_##name +#endif + +/* SYM_ALIAS -- use only if you have to */ +#ifndef SYM_ALIAS +#define SYM_ALIAS(alias, name, sym_type, linkage) \ + linkage(alias) ASM_NL \ + .set alias, name ASM_NL \ + .type alias sym_type ASM_NL \ + .set .L__sym_size_##alias, .L__sym_size_##name ASM_NL \ + .size alias, .L__sym_size_##alias #endif /* === code annotations === */ @@ -275,6 +286,30 @@ SYM_END(name, SYM_T_FUNC) #endif +/* + * SYM_FUNC_ALIAS -- define a global alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS +#define SYM_FUNC_ALIAS(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL) +#endif + +/* + * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS_LOCAL +#define SYM_FUNC_ALIAS_LOCAL(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL) +#endif + +/* + * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS_WEAK +#define SYM_FUNC_ALIAS_WEAK(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK) +#endif + /* SYM_CODE_START -- use for non-C (special) functions */ #ifndef SYM_CODE_START #define SYM_CODE_START(name) \ -- cgit v1.2.3 From be9aea74400433e03c2a8b0260fc9ffe2495f698 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Feb 2022 16:22:29 +0000 Subject: linkage: remove SYM_FUNC_{START,END}_ALIAS() Now that all aliases are defined using SYM_FUNC_ALIAS(), remove the old SYM_FUNC_{START,END}_ALIAS() macros. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Ard Biesheuvel Acked-by: Josh Poimboeuf Acked-by: Mark Brown Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Slaby Cc: Peter Zijlstra Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220216162229.1076788-5-mark.rutland@arm.com Signed-off-by: Will Deacon --- include/linux/linkage.h | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'include') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index e574a84d8b11..acb1ad2356f1 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -211,30 +211,8 @@ SYM_ENTRY(name, linkage, SYM_A_NONE) #endif -/* - * SYM_FUNC_START_LOCAL_ALIAS -- use where there are two local names for one - * function - */ -#ifndef SYM_FUNC_START_LOCAL_ALIAS -#define SYM_FUNC_START_LOCAL_ALIAS(name) \ - SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) -#endif - -/* - * SYM_FUNC_START_ALIAS -- use where there are two global names for one - * function - */ -#ifndef SYM_FUNC_START_ALIAS -#define SYM_FUNC_START_ALIAS(name) \ - SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) -#endif - /* SYM_FUNC_START -- use for global functions */ #ifndef SYM_FUNC_START -/* - * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two - * later. - */ #define SYM_FUNC_START(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif @@ -247,7 +225,6 @@ /* SYM_FUNC_START_LOCAL -- use for local functions */ #ifndef SYM_FUNC_START_LOCAL -/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_START_LOCAL(name) \ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) #endif @@ -270,18 +247,11 @@ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) #endif -/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */ -#ifndef SYM_FUNC_END_ALIAS -#define SYM_FUNC_END_ALIAS(name) \ - SYM_END(name, SYM_T_FUNC) -#endif - /* * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START, * SYM_FUNC_START_WEAK, ... */ #ifndef SYM_FUNC_END -/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_END(name) \ SYM_END(name, SYM_T_FUNC) #endif -- cgit v1.2.3 From 6e031ec0e5a2dda53e12e0d2a7e9b15b47a3c502 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 24 Jan 2022 16:11:37 -0700 Subject: vfio/pci: Stub vfio_pci_vga_rw when !CONFIG_VFIO_PCI_VGA Resolve build errors reported against UML build for undefined ioport_map() and ioport_unmap() functions. Without this config option a device cannot have vfio_pci_core_device.has_vga set, so the existing function would always return -EINVAL anyway. Reported-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20220123125737.2658758-1-geert@linux-m68k.org Link: https://lore.kernel.org/r/164306582968.3758255.15192949639574660648.stgit@omen Signed-off-by: Alex Williamson --- include/linux/vfio_pci_core.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index ef9a44b6cf5d..ae6f4838ab75 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -159,8 +159,17 @@ extern ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, extern ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); +#ifdef CONFIG_VFIO_PCI_VGA extern ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); +#else +static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, + char __user *buf, size_t count, + loff_t *ppos, bool iswrite) +{ + return -EINVAL; +} +#endif extern long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, uint64_t data, int count, int fd); -- cgit v1.2.3 From 1a03d3f13ffe5dd24142d6db629e72c11b704d99 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 17 Feb 2022 11:24:04 +0100 Subject: fork: Move task stack accounting to do_exit() There is no need to perform the stack accounting of the outgoing task in its final schedule() invocation which happens with preemption disabled. The task is leaving, the resources will be freed and the accounting can happen in do_exit() before the actual schedule invocation which frees the stack memory. Move the accounting of the stack memory from release_task_stack() to exit_task_stack_account() which then can be invoked from do_exit(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20220217102406.3697941-7-bigeasy@linutronix.de --- include/linux/sched/task_stack.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index d10150587d81..892562ebbd3a 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -79,6 +79,8 @@ static inline void *try_get_task_stack(struct task_struct *tsk) static inline void put_task_stack(struct task_struct *tsk) {} #endif +void exit_task_stack_account(struct task_struct *tsk); + #define task_stack_end_corrupted(task) \ (*(end_of_stack(task)) != STACK_END_MAGIC) -- cgit v1.2.3 From f9b5e46f4097eb298f68e5b02f70697a90a44739 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 18 Feb 2022 17:29:44 -0800 Subject: kasan: split kasan_*enabled() functions into a separate header In an upcoming commit we are going to need to call kasan_hw_tags_enabled() from arch/arm64/include/asm/mte.h. This would create a circular dependency between headers if KASAN_GENERIC or KASAN_SW_TAGS is enabled: linux/kasan.h -> linux/pgtable.h -> asm/pgtable.h -> asm/mte.h -> linux/kasan.h. Break the cycle by introducing a new header linux/kasan-enabled.h with the kasan_*enabled() functions that can be included from asm/mte.h. Link: https://linux-review.googlesource.com/id/I5b0d96c6ed0026fc790899e14d42b2fac6ab568e Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20220219012945.894950-1-pcc@google.com Signed-off-by: Will Deacon --- include/linux/kasan-enabled.h | 33 +++++++++++++++++++++++++++++++++ include/linux/kasan.h | 23 +---------------------- 2 files changed, 34 insertions(+), 22 deletions(-) create mode 100644 include/linux/kasan-enabled.h (limited to 'include') diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h new file mode 100644 index 000000000000..4b6615375022 --- /dev/null +++ b/include/linux/kasan-enabled.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KASAN_ENABLED_H +#define _LINUX_KASAN_ENABLED_H + +#ifdef CONFIG_KASAN_HW_TAGS + +DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); + +static __always_inline bool kasan_enabled(void) +{ + return static_branch_likely(&kasan_flag_enabled); +} + +static inline bool kasan_hw_tags_enabled(void) +{ + return kasan_enabled(); +} + +#else /* CONFIG_KASAN_HW_TAGS */ + +static inline bool kasan_enabled(void) +{ + return IS_ENABLED(CONFIG_KASAN); +} + +static inline bool kasan_hw_tags_enabled(void) +{ + return false; +} + +#endif /* CONFIG_KASAN_HW_TAGS */ + +#endif /* LINUX_KASAN_ENABLED_H */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 4a45562d8893..b6a93261c92a 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -3,6 +3,7 @@ #define _LINUX_KASAN_H #include +#include #include #include #include @@ -83,33 +84,11 @@ static inline void kasan_disable_current(void) {} #ifdef CONFIG_KASAN_HW_TAGS -DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); - -static __always_inline bool kasan_enabled(void) -{ - return static_branch_likely(&kasan_flag_enabled); -} - -static inline bool kasan_hw_tags_enabled(void) -{ - return kasan_enabled(); -} - void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags); void kasan_free_pages(struct page *page, unsigned int order); #else /* CONFIG_KASAN_HW_TAGS */ -static inline bool kasan_enabled(void) -{ - return IS_ENABLED(CONFIG_KASAN); -} - -static inline bool kasan_hw_tags_enabled(void) -{ - return false; -} - static __always_inline void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) { -- cgit v1.2.3 From 3f2e252ef727318f81588704461735617ad55b88 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 22 Feb 2022 20:50:59 +0800 Subject: scsi: libsas: Add sas_execute_ata_cmd() Add a function to execute an ATA command using the TMF code, and use in the hisi_sas driver. That driver needs to be able to issue the command on a specific phy, so add an interface for that. With that, hisi_sas_exec_internal_tmf_task() may be deleted. Link: https://lore.kernel.org/r/1645534259-27068-19-git-send-email-john.garry@huawei.com Tested-by: Yihang Li Tested-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 7 +++---- include/scsi/sas_ata.h | 8 ++++++++ 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 4ea964d33600..dc529cc92d65 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -552,6 +552,9 @@ struct sas_ata_task { u8 stp_affil_pol:1; u8 device_control_reg_update:1; + + bool force_phy; + int force_phy_id; }; struct sas_smp_task { @@ -579,10 +582,6 @@ struct sas_ssp_task { struct sas_tmf_task { u8 tmf; u16 tag_of_task_to_be_managed; - - /* Temp */ - int force_phy; - int phy_id; }; struct sas_task { diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index 21e7c10c6295..d47dea70855d 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -32,6 +32,8 @@ void sas_probe_sata(struct asd_sas_port *port); void sas_suspend_sata(struct asd_sas_port *port); void sas_resume_sata(struct asd_sas_port *port); void sas_ata_end_eh(struct ata_port *ap); +int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, + int force_phy_id); #else @@ -83,6 +85,12 @@ static inline int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy static inline void sas_ata_end_eh(struct ata_port *ap) { } + +static inline int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, + int force_phy_id) +{ + return 0; +} #endif #endif /* _SAS_ATA_H_ */ -- cgit v1.2.3 From db22de3eb0352d2f8e7cda08f3fa65690e3fd64d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 18 Feb 2022 11:50:53 -0800 Subject: scsi: iscsi: Stop using the SCSI pointer Instead of storing the iSCSI task pointer and the session age in the SCSI pointer, use command-private variables. This patch prepares for removal of the SCSI pointer from struct scsi_cmnd. The list of iSCSI drivers has been obtained as follows: $ git grep -lw iscsi_host_alloc drivers/infiniband/ulp/iser/iscsi_iser.c drivers/scsi/be2iscsi/be_main.c drivers/scsi/bnx2i/bnx2i_iscsi.c drivers/scsi/cxgbi/libcxgbi.c drivers/scsi/iscsi_tcp.c drivers/scsi/libiscsi.c drivers/scsi/qedi/qedi_main.c drivers/scsi/qla4xxx/ql4_os.c include/scsi/libiscsi.h Note: it is not clear to me how the qla4xxx driver can work without this patch since it uses the scsi_cmnd::SCp.ptr member for two different purposes: - The qla4xxx driver uses this member to store a struct srb pointer. - libiscsi uses this member to store a struct iscsi_task pointer. Reviewed-by: Lee Duncan Reviewed-by: Hannes Reinecke Reviewed-by: Himanshu Madhani Cc: Chris Leech Cc: Sagi Grimberg Cc: Nilesh Javali Cc: Manish Rangankar Cc: Karen Xie Cc: Ketan Mukadam Signed-off-by: Bart Van Assche iscsi Link: https://lore.kernel.org/r/20220218195117.25689-26-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- include/scsi/libiscsi.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 4ee233e5a6ff..cb805ed9cbf1 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -19,6 +19,7 @@ #include #include #include +#include #include struct scsi_transport_template; @@ -152,6 +153,17 @@ static inline bool iscsi_task_is_completed(struct iscsi_task *task) task->state == ISCSI_TASK_ABRT_SESS_RECOV; } +/* Private data associated with struct scsi_cmnd. */ +struct iscsi_cmd { + struct iscsi_task *task; + int age; +}; + +static inline struct iscsi_cmd *iscsi_cmd(struct scsi_cmnd *cmd) +{ + return scsi_cmd_priv(cmd); +} + /* Connection's states */ enum { ISCSI_CONN_INITIAL_STAGE, -- cgit v1.2.3 From 5d21aa3636fa8c7d3a2a69be93287da164054a3a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 18 Feb 2022 11:50:55 -0800 Subject: scsi: libfc: Stop using the SCSI pointer Move the fc_fcp_pkt pointer, the residual length and the SCSI status into the new data structure libfc_cmd_priv. This patch prepares for removal of the SCSI pointer from struct scsi_cmnd. The user of the libfc data path functions have been identified as follows: $ git grep -lw fc_queuecommand | grep -v scsi/libfc/ drivers/scsi/fcoe/fcoe.c Link: https://lore.kernel.org/r/20220218195117.25689-28-bvanassche@acm.org Cc: Saurav Kashyap Cc: Javed Hasan Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/scsi/libfc.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index eeb8d689ff6b..6e29e1719db1 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -351,6 +351,15 @@ struct fc_fcp_pkt { struct completion tm_done; } ____cacheline_aligned_in_smp; +/* + * @fsp should be tested and set under the scsi_pkt_queue lock + */ +struct libfc_cmd_priv { + struct fc_fcp_pkt *fsp; + u32 resid_len; + u8 status; +}; + /* * Structure and function definitions for managing Fibre Channel Exchanges * and Sequences -- cgit v1.2.3 From 8264aee803a2168124d1eb867a893047dc2977a0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 18 Feb 2022 11:51:17 -0800 Subject: scsi: core: Remove struct scsi_pointer from struct scsi_cmnd Remove struct scsi_pointer from struct scsi_cmnd since the previous patches removed all users of that member of struct scsi_cmnd. Additionally, reorder the members of struct scsi_cmnd such that the statement that the field below can be modified by the SCSI LLD is again correct. Link: https://lore.kernel.org/r/20220218195117.25689-50-bvanassche@acm.org Cc: Ming Lei Cc: Hannes Reinecke Cc: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/scsi/scsi_cmnd.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index ff1c4b51f7ae..7a19c8bbaed9 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -123,11 +123,15 @@ struct scsi_cmnd { * command (auto-sense). Length must be * SCSI_SENSE_BUFFERSIZE bytes. */ + int flags; /* Command flags */ + unsigned long state; /* Command completion state */ + + unsigned int extra_len; /* length of alignment and padding */ + /* - * The following fields can be written to by the host specific code. - * Everything else should be left alone. + * The fields below can be modified by the LLD but the fields above + * must not be modified. */ - struct scsi_pointer SCp; /* Scratchpad used by some host adapters */ unsigned char *host_scribble; /* The host adapter is allowed to * call scsi_malloc and get some memory @@ -138,10 +142,6 @@ struct scsi_cmnd { * to be at an address < 16Mb). */ int result; /* Status code from lower level driver */ - int flags; /* Command flags */ - unsigned long state; /* Command completion state */ - - unsigned int extra_len; /* length of alignment and padding */ }; /* Variant of blk_mq_rq_from_pdu() that verifies the type of its argument. */ -- cgit v1.2.3 From a773187e37fa5c3bcd92ffda9085707df34f903a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 9 Feb 2022 09:28:27 +0100 Subject: scsi: dm: Remove WRITE_SAME support There are no more end-users of REQ_OP_WRITE_SAME left, so we can start deleting it. Link: https://lore.kernel.org/r/20220209082828.2629273-7-hch@lst.de Reviewed-by: Mike Snitzer Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/linux/device-mapper.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index b26fecf6c8e8..721db99f4f63 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -316,12 +316,6 @@ struct dm_target { */ unsigned num_secure_erase_bios; - /* - * The number of WRITE SAME bios that will be submitted to the target. - * The bio number can be accessed with dm_bio_get_target_bio_nr. - */ - unsigned num_write_same_bios; - /* * The number of WRITE ZEROES bios that will be submitted to the target. * The bio number can be accessed with dm_bio_get_target_bio_nr. -- cgit v1.2.3 From 73bd66d9c834220579c881a3eb020fd8917075d8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 9 Feb 2022 09:28:28 +0100 Subject: scsi: block: Remove REQ_OP_WRITE_SAME support No more users of REQ_OP_WRITE_SAME or drivers implementing it are left, so remove the infrastructure. [mkp: fold in and tweak sysfs reporting fix] Link: https://lore.kernel.org/r/20220209082828.2629273-8-hch@lst.de Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/linux/bio.h | 3 --- include/linux/blk_types.h | 2 -- include/linux/blkdev.h | 19 ------------------- 3 files changed, 24 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 117d7f248ac9..eb402afa370a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -65,7 +65,6 @@ static inline bool bio_no_advance_iter(const struct bio *bio) { return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE || - bio_op(bio) == REQ_OP_WRITE_SAME || bio_op(bio) == REQ_OP_WRITE_ZEROES; } @@ -186,8 +185,6 @@ static inline unsigned bio_segments(struct bio *bio) case REQ_OP_SECURE_ERASE: case REQ_OP_WRITE_ZEROES: return 0; - case REQ_OP_WRITE_SAME: - return 1; default: break; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index fe065c394fff..077adf4b6e73 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -354,8 +354,6 @@ enum req_opf { REQ_OP_DISCARD = 3, /* securely erase sectors */ REQ_OP_SECURE_ERASE = 5, - /* write the same sector many times */ - REQ_OP_WRITE_SAME = 7, /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = 9, /* Open a zone */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9c95df26fc26..b10470d5e986 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -97,7 +97,6 @@ struct queue_limits { unsigned int io_opt; unsigned int max_discard_sectors; unsigned int max_hw_discard_sectors; - unsigned int max_write_same_sectors; unsigned int max_write_zeroes_sectors; unsigned int max_zone_append_sectors; unsigned int discard_granularity; @@ -651,9 +650,6 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, return min(q->limits.max_discard_sectors, UINT_MAX >> SECTOR_SHIFT); - if (unlikely(op == REQ_OP_WRITE_SAME)) - return q->limits.max_write_same_sectors; - if (unlikely(op == REQ_OP_WRITE_ZEROES)) return q->limits.max_write_zeroes_sectors; @@ -696,8 +692,6 @@ extern void blk_queue_max_discard_segments(struct request_queue *, extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); -extern void blk_queue_max_write_same_sectors(struct request_queue *q, - unsigned int max_write_same_sectors); extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, unsigned int max_write_same_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned int); @@ -842,9 +836,6 @@ static inline long nr_blockdev_pages(void) extern void blk_io_schedule(void); -extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, struct page *page); - #define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, @@ -1071,16 +1062,6 @@ static inline int bdev_discard_alignment(struct block_device *bdev) return q->limits.discard_alignment; } -static inline unsigned int bdev_write_same(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return q->limits.max_write_same_sectors; - - return 0; -} - static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); -- cgit v1.2.3 From 763087dab97547230a6807c865a6a5ae53a59247 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Feb 2022 19:21:12 -0800 Subject: net: add skb_set_end_offset() helper We have multiple places where this helper is convenient, and plan using it in the following patch. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a3e90efe6586..115be7f73487 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1536,6 +1536,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = offset; +} #else static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { @@ -1546,6 +1551,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end - skb->head; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = skb->head + offset; +} #endif /* Internal */ -- cgit v1.2.3 From 2b88cba55883eaafbc9b7cbff0b2c7cdba71ed01 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Feb 2022 19:21:13 -0800 Subject: net: preserve skb_end_offset() in skb_unclone_keeptruesize() syzbot found another way to trigger the infamous WARN_ON_ONCE(delta < len) in skb_try_coalesce() [1] I was able to root cause the issue to kfence. When kfence is in action, the following assertion is no longer true: int size = xxxx; void *ptr1 = kmalloc(size, gfp); void *ptr2 = kmalloc(size, gfp); if (ptr1 && ptr2) ASSERT(ksize(ptr1) == ksize(ptr2)); We attempted to fix these issues in the blamed commits, but forgot that TCP was possibly shifting data after skb_unclone_keeptruesize() has been used, notably from tcp_retrans_try_collapse(). So we not only need to keep same skb->truesize value, we also need to make sure TCP wont fill new tailroom that pskb_expand_head() was able to get from a addr = kmalloc(...) followed by ksize(addr) Split skb_unclone_keeptruesize() into two parts: 1) Inline skb_unclone_keeptruesize() for the common case, when skb is not cloned. 2) Out of line __skb_unclone_keeptruesize() for the 'slow path'. WARNING: CPU: 1 PID: 6490 at net/core/skbuff.c:5295 skb_try_coalesce+0x1235/0x1560 net/core/skbuff.c:5295 Modules linked in: CPU: 1 PID: 6490 Comm: syz-executor161 Not tainted 5.17.0-rc4-syzkaller-00229-g4f12b742eb2b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:skb_try_coalesce+0x1235/0x1560 net/core/skbuff.c:5295 Code: bf 01 00 00 00 0f b7 c0 89 c6 89 44 24 20 e8 62 24 4e fa 8b 44 24 20 83 e8 01 0f 85 e5 f0 ff ff e9 87 f4 ff ff e8 cb 20 4e fa <0f> 0b e9 06 f9 ff ff e8 af b2 95 fa e9 69 f0 ff ff e8 95 b2 95 fa RSP: 0018:ffffc900063af268 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 00000000ffffffd5 RCX: 0000000000000000 RDX: ffff88806fc05700 RSI: ffffffff872abd55 RDI: 0000000000000003 RBP: ffff88806e675500 R08: 00000000ffffffd5 R09: 0000000000000000 R10: ffffffff872ab659 R11: 0000000000000000 R12: ffff88806dd554e8 R13: ffff88806dd9bac0 R14: ffff88806dd9a2c0 R15: 0000000000000155 FS: 00007f18014f9700(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020002000 CR3: 000000006be7a000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcp_try_coalesce net/ipv4/tcp_input.c:4651 [inline] tcp_try_coalesce+0x393/0x920 net/ipv4/tcp_input.c:4630 tcp_queue_rcv+0x8a/0x6e0 net/ipv4/tcp_input.c:4914 tcp_data_queue+0x11fd/0x4bb0 net/ipv4/tcp_input.c:5025 tcp_rcv_established+0x81e/0x1ff0 net/ipv4/tcp_input.c:5947 tcp_v4_do_rcv+0x65e/0x980 net/ipv4/tcp_ipv4.c:1719 sk_backlog_rcv include/net/sock.h:1037 [inline] __release_sock+0x134/0x3b0 net/core/sock.c:2779 release_sock+0x54/0x1b0 net/core/sock.c:3311 sk_wait_data+0x177/0x450 net/core/sock.c:2821 tcp_recvmsg_locked+0xe28/0x1fd0 net/ipv4/tcp.c:2457 tcp_recvmsg+0x137/0x610 net/ipv4/tcp.c:2572 inet_recvmsg+0x11b/0x5e0 net/ipv4/af_inet.c:850 sock_recvmsg_nosec net/socket.c:948 [inline] sock_recvmsg net/socket.c:966 [inline] sock_recvmsg net/socket.c:962 [inline] ____sys_recvmsg+0x2c4/0x600 net/socket.c:2632 ___sys_recvmsg+0x127/0x200 net/socket.c:2674 __sys_recvmsg+0xe2/0x1a0 net/socket.c:2704 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: c4777efa751d ("net: add and use skb_unclone_keeptruesize() helper") Fixes: 097b9146c0e2 ("net: fix up truesize of cloned skb in skb_prepare_for_shift()") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Marco Elver Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 115be7f73487..31be38078918 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1795,19 +1795,19 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) return 0; } -/* This variant of skb_unclone() makes sure skb->truesize is not changed */ +/* This variant of skb_unclone() makes sure skb->truesize + * and skb_end_offset() are not changed, whenever a new skb->head is needed. + * + * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X)) + * when various debugging features are in place. + */ +int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri); static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); - if (skb_cloned(skb)) { - unsigned int save = skb->truesize; - int res; - - res = pskb_expand_head(skb, 0, 0, pri); - skb->truesize = save; - return res; - } + if (skb_cloned(skb)) + return __skb_unclone_keeptruesize(skb, pri); return 0; } -- cgit v1.2.3 From d0b9d6dcaa5ac480c272683919f387cc6d82b638 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Sep 2021 20:42:44 +0200 Subject: sched/headers: Fix header to build standalone: Uses various kernel types that don't build standalone. Signed-off-by: Ingo Molnar Reviewed-by: Peter Zijlstra --- include/linux/sched_clock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h index 835ee87ed792..cb41c5edb4d4 100644 --- a/include/linux/sched_clock.h +++ b/include/linux/sched_clock.h @@ -5,6 +5,8 @@ #ifndef LINUX_SCHED_CLOCK #define LINUX_SCHED_CLOCK +#include + #ifdef CONFIG_GENERIC_SCHED_CLOCK /** * struct clock_read_data - data required to read from sched_clock() -- cgit v1.2.3 From 669f45f19cf7feea7066dce86a0ce89dfc8a2065 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Feb 2022 13:23:59 +0100 Subject: sched/headers: Add initial new headers as identity mappings This allows code sharing between fast-headers tree and the vanilla scheduler tree. Signed-off-by: Ingo Molnar Reviewed-by: Peter Zijlstra --- include/linux/cgroup_api.h | 1 + include/linux/cpumask_api.h | 1 + include/linux/fs_api.h | 1 + include/linux/gfp_api.h | 1 + include/linux/hashtable_api.h | 1 + include/linux/hrtimer_api.h | 1 + include/linux/kobject_api.h | 1 + include/linux/kref_api.h | 1 + include/linux/ktime_api.h | 1 + include/linux/llist_api.h | 1 + include/linux/lockdep_api.h | 1 + include/linux/mm_api.h | 1 + include/linux/mutex_api.h | 1 + include/linux/perf_event_api.h | 1 + include/linux/pgtable_api.h | 1 + include/linux/ptrace_api.h | 1 + include/linux/rcuwait_api.h | 1 + include/linux/refcount_api.h | 1 + include/linux/sched/affinity.h | 1 + include/linux/sched/cond_resched.h | 1 + include/linux/sched/posix-timers.h | 1 + include/linux/sched/rseq_api.h | 1 + include/linux/sched/task_flags.h | 1 + include/linux/sched/thread_info_api.h | 1 + include/linux/seqlock_api.h | 1 + include/linux/softirq.h | 1 + include/linux/spinlock_api.h | 1 + include/linux/swait_api.h | 1 + include/linux/syscalls_api.h | 1 + include/linux/u64_stats_sync_api.h | 1 + include/linux/wait_api.h | 1 + include/linux/workqueue_api.h | 1 + 32 files changed, 32 insertions(+) create mode 100644 include/linux/cgroup_api.h create mode 100644 include/linux/cpumask_api.h create mode 100644 include/linux/fs_api.h create mode 100644 include/linux/gfp_api.h create mode 100644 include/linux/hashtable_api.h create mode 100644 include/linux/hrtimer_api.h create mode 100644 include/linux/kobject_api.h create mode 100644 include/linux/kref_api.h create mode 100644 include/linux/ktime_api.h create mode 100644 include/linux/llist_api.h create mode 100644 include/linux/lockdep_api.h create mode 100644 include/linux/mm_api.h create mode 100644 include/linux/mutex_api.h create mode 100644 include/linux/perf_event_api.h create mode 100644 include/linux/pgtable_api.h create mode 100644 include/linux/ptrace_api.h create mode 100644 include/linux/rcuwait_api.h create mode 100644 include/linux/refcount_api.h create mode 100644 include/linux/sched/affinity.h create mode 100644 include/linux/sched/cond_resched.h create mode 100644 include/linux/sched/posix-timers.h create mode 100644 include/linux/sched/rseq_api.h create mode 100644 include/linux/sched/task_flags.h create mode 100644 include/linux/sched/thread_info_api.h create mode 100644 include/linux/seqlock_api.h create mode 100644 include/linux/softirq.h create mode 100644 include/linux/spinlock_api.h create mode 100644 include/linux/swait_api.h create mode 100644 include/linux/syscalls_api.h create mode 100644 include/linux/u64_stats_sync_api.h create mode 100644 include/linux/wait_api.h create mode 100644 include/linux/workqueue_api.h (limited to 'include') diff --git a/include/linux/cgroup_api.h b/include/linux/cgroup_api.h new file mode 100644 index 000000000000..d0cfe8025111 --- /dev/null +++ b/include/linux/cgroup_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/cpumask_api.h b/include/linux/cpumask_api.h new file mode 100644 index 000000000000..83bd3ebe82b0 --- /dev/null +++ b/include/linux/cpumask_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/fs_api.h b/include/linux/fs_api.h new file mode 100644 index 000000000000..83be38d6d413 --- /dev/null +++ b/include/linux/fs_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/gfp_api.h b/include/linux/gfp_api.h new file mode 100644 index 000000000000..5a05a2764a86 --- /dev/null +++ b/include/linux/gfp_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/hashtable_api.h b/include/linux/hashtable_api.h new file mode 100644 index 000000000000..c268ac2c5c0e --- /dev/null +++ b/include/linux/hashtable_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/hrtimer_api.h b/include/linux/hrtimer_api.h new file mode 100644 index 000000000000..8d9700894468 --- /dev/null +++ b/include/linux/hrtimer_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/kobject_api.h b/include/linux/kobject_api.h new file mode 100644 index 000000000000..6e36a054c2d6 --- /dev/null +++ b/include/linux/kobject_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/kref_api.h b/include/linux/kref_api.h new file mode 100644 index 000000000000..d67e554721d2 --- /dev/null +++ b/include/linux/kref_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/ktime_api.h b/include/linux/ktime_api.h new file mode 100644 index 000000000000..f697d493960f --- /dev/null +++ b/include/linux/ktime_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/llist_api.h b/include/linux/llist_api.h new file mode 100644 index 000000000000..625bec0393a1 --- /dev/null +++ b/include/linux/llist_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/lockdep_api.h b/include/linux/lockdep_api.h new file mode 100644 index 000000000000..907e66979ab2 --- /dev/null +++ b/include/linux/lockdep_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/mm_api.h b/include/linux/mm_api.h new file mode 100644 index 000000000000..a5ace2b198b8 --- /dev/null +++ b/include/linux/mm_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/mutex_api.h b/include/linux/mutex_api.h new file mode 100644 index 000000000000..85ab9491e13e --- /dev/null +++ b/include/linux/mutex_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/perf_event_api.h b/include/linux/perf_event_api.h new file mode 100644 index 000000000000..c2fd6048b790 --- /dev/null +++ b/include/linux/perf_event_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/pgtable_api.h b/include/linux/pgtable_api.h new file mode 100644 index 000000000000..ff367a4ba8c4 --- /dev/null +++ b/include/linux/pgtable_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/ptrace_api.h b/include/linux/ptrace_api.h new file mode 100644 index 000000000000..26e7d275ad8d --- /dev/null +++ b/include/linux/ptrace_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/rcuwait_api.h b/include/linux/rcuwait_api.h new file mode 100644 index 000000000000..f962e28544dd --- /dev/null +++ b/include/linux/rcuwait_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/refcount_api.h b/include/linux/refcount_api.h new file mode 100644 index 000000000000..5f032589f568 --- /dev/null +++ b/include/linux/refcount_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/sched/affinity.h b/include/linux/sched/affinity.h new file mode 100644 index 000000000000..227f5be81bcd --- /dev/null +++ b/include/linux/sched/affinity.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/sched/cond_resched.h b/include/linux/sched/cond_resched.h new file mode 100644 index 000000000000..227f5be81bcd --- /dev/null +++ b/include/linux/sched/cond_resched.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/sched/posix-timers.h b/include/linux/sched/posix-timers.h new file mode 100644 index 000000000000..523a381d6c88 --- /dev/null +++ b/include/linux/sched/posix-timers.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/sched/rseq_api.h b/include/linux/sched/rseq_api.h new file mode 100644 index 000000000000..cf2af72693e1 --- /dev/null +++ b/include/linux/sched/rseq_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/sched/task_flags.h b/include/linux/sched/task_flags.h new file mode 100644 index 000000000000..227f5be81bcd --- /dev/null +++ b/include/linux/sched/task_flags.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/sched/thread_info_api.h b/include/linux/sched/thread_info_api.h new file mode 100644 index 000000000000..2c60fbc16c08 --- /dev/null +++ b/include/linux/sched/thread_info_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/seqlock_api.h b/include/linux/seqlock_api.h new file mode 100644 index 000000000000..be91e7d3b826 --- /dev/null +++ b/include/linux/seqlock_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/softirq.h b/include/linux/softirq.h new file mode 100644 index 000000000000..c73d7dcb4cb5 --- /dev/null +++ b/include/linux/softirq.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/spinlock_api.h b/include/linux/spinlock_api.h new file mode 100644 index 000000000000..6338b27f98df --- /dev/null +++ b/include/linux/spinlock_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/swait_api.h b/include/linux/swait_api.h new file mode 100644 index 000000000000..1eeaaaaa5ea7 --- /dev/null +++ b/include/linux/swait_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/syscalls_api.h b/include/linux/syscalls_api.h new file mode 100644 index 000000000000..23e012b04db4 --- /dev/null +++ b/include/linux/syscalls_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/u64_stats_sync_api.h b/include/linux/u64_stats_sync_api.h new file mode 100644 index 000000000000..c72ca63da44b --- /dev/null +++ b/include/linux/u64_stats_sync_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/wait_api.h b/include/linux/wait_api.h new file mode 100644 index 000000000000..4e930548935a --- /dev/null +++ b/include/linux/wait_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/workqueue_api.h b/include/linux/workqueue_api.h new file mode 100644 index 000000000000..77debb5d2760 --- /dev/null +++ b/include/linux/workqueue_api.h @@ -0,0 +1 @@ +#include -- cgit v1.2.3 From fbed5664b73830dcb1a19af4f7f1f1b424f54609 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Feb 2022 13:28:20 +0100 Subject: sched/headers: Make the header build standalone This header depends on various scheduler definitions. Signed-off-by: Ingo Molnar Reviewed-by: Peter Zijlstra --- include/linux/sched/deadline.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 1aff00b65f3c..7c83d4d5a971 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -6,6 +6,8 @@ * NORMAL/BATCH tasks. */ +#include + #define MAX_DL_PRIO 0 static inline int dl_prio(int prio) -- cgit v1.2.3 From b26ef81c46ed15d11ddddba9ba1cd52c749385ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Feb 2022 14:04:50 -0800 Subject: drop_monitor: remove quadratic behavior drop_monitor is using an unique list on which all netdevices in the host have an element, regardless of their netns. This scales poorly, not only at device unregister time (what I caught during my netns dismantle stress tests), but also at packet processing time whenever trace_napi_poll_hit() is called. If the intent was to avoid adding one pointer in 'struct net_device' then surely we prefer O(1) behavior. Signed-off-by: Eric Dumazet Cc: Neil Horman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 93fc680b658f..c79ee2296296 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2236,7 +2236,9 @@ struct net_device { #if IS_ENABLED(CONFIG_MRP) struct mrp_port __rcu *mrp_port; #endif - +#if IS_ENABLED(CONFIG_NET_DROP_MONITOR) + struct dm_hw_stat_delta __rcu *dm_private; +#endif struct device dev; const struct attribute_group *sysfs_groups[4]; const struct attribute_group *sysfs_rx_queue_group; -- cgit v1.2.3 From a21d9a670d81103db7f788de1a4a4a6e4b891a0b Mon Sep 17 00:00:00 2001 From: Hans Schultz Date: Wed, 23 Feb 2022 11:16:46 +0100 Subject: net: bridge: Add support for bridge port in locked mode In a 802.1X scenario, clients connected to a bridge port shall not be allowed to have traffic forwarded until fully authenticated. A static fdb entry of the clients MAC address for the bridge port unlocks the client and allows bidirectional communication. This scenario is facilitated with setting the bridge port in locked mode, which is also supported by various switchcore chipsets. Signed-off-by: Hans Schultz Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + include/uapi/linux/if_link.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 509e18c7e740..3aae023a9353 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -58,6 +58,7 @@ struct br_ip_list { #define BR_MRP_LOST_CONT BIT(18) #define BR_MRP_LOST_IN_CONT BIT(19) #define BR_TX_FWD_OFFLOAD BIT(20) +#define BR_PORT_LOCKED BIT(21) #define BR_DEFAULT_AGEING_TIME (300 * HZ) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e1ba2d51b717..be09d2ad4b5d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -537,6 +537,7 @@ enum { IFLA_BRPORT_MRP_IN_OPEN, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, + IFLA_BRPORT_LOCKED, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) -- cgit v1.2.3 From c2700d2886a87f83f31e0a301de1d2350b52c79b Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sat, 22 Jan 2022 22:27:44 +0530 Subject: nvme-tcp: send H2CData PDUs based on MAXH2CDATA As per NVMe/TCP specification (revision 1.0a, section 3.6.2.3) Maximum Host to Controller Data length (MAXH2CDATA): Specifies the maximum number of PDU-Data bytes per H2CData PDU in bytes. This value is a multiple of dwords and should be no less than 4,096. Current code sets H2CData PDU data_length to r2t_length, it does not check MAXH2CDATA value. Fix this by setting H2CData PDU data_length to min(req->h2cdata_left, queue->maxh2cdata). Also validate MAXH2CDATA value returned by target in ICResp PDU, if it is not a multiple of dword or if it is less than 4096 return -EINVAL from nvme_tcp_init_connection(). Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/nvme-tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 959e0bd9a913..75470159a194 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -12,6 +12,7 @@ #define NVME_TCP_DISC_PORT 8009 #define NVME_TCP_ADMIN_CCSZ SZ_8K #define NVME_TCP_DIGEST_LENGTH 4 +#define NVME_TCP_MIN_MAXH2CDATA 4096 enum nvme_tcp_pfv { NVME_TCP_PFV_1_0 = 0x0, -- cgit v1.2.3 From f2eb478f2f322217aa642e11c1cc011f99c797e6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Feb 2022 08:07:13 +0100 Subject: kernfs: move struct kernfs_root out of the public view. There is no need to have struct kernfs_root be part of kernfs.h for the whole kernel to see and poke around it. Move it internal to kernfs code and provide a helper function, kernfs_root_to_node(), to handle the one field that kernfs users were directly accessing from the structure. Cc: Imran Khan Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20220222070713.3517679-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 861c4f0f8a29..62aff082dc3f 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -185,6 +185,7 @@ struct kernfs_syscall_ops { struct kernfs_root *root); }; +#if 0 struct kernfs_root { /* published fields */ struct kernfs_node *kn; @@ -202,6 +203,9 @@ struct kernfs_root { wait_queue_head_t deactivate_waitq; struct rw_semaphore kernfs_rwsem; }; +#endif + +struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root); struct kernfs_open_file { /* published fields */ -- cgit v1.2.3 From c404c64d64bc31bebe8a2015103671f7cd282731 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:06 +0100 Subject: powercap/dtpm: Destroy hierarchy function The hierarchy creation function exits but without a destroy hierarchy function. Due to that, the modules creating the hierarchy can not be unloaded properly because they don't have an exit callback. Provide the dtpm_destroy_hierarchy() function to remove the previously created hierarchy. The function relies on all the release mechanisms implemented by the underlying powercap framework. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220130210210.549877-4-daniel.lezcano@linaro.org --- include/linux/dtpm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index f7a25c70dd4c..a4a13514b730 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -37,6 +37,7 @@ struct device_node; struct dtpm_subsys_ops { const char *name; int (*init)(void); + void (*exit)(void); int (*setup)(struct dtpm *, struct device_node *); }; @@ -67,4 +68,6 @@ void dtpm_unregister(struct dtpm *dtpm); int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent); int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table); + +void dtpm_destroy_hierarchy(void); #endif -- cgit v1.2.3 From 43c075959de3c45608636d9d80ff9e61d166fb21 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 26 Jan 2022 10:57:07 -0800 Subject: mlx5: remove unused static inlines mlx5 has some unused static inline helpers in include/ while at it also clean static inlines in the driver itself. Signed-off-by: Jakub Kicinski Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 78655d8d13a7..1b398c9e17b9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -863,20 +863,10 @@ struct mlx5_hca_vport_context { bool grh_required; }; -static inline void *mlx5_buf_offset(struct mlx5_frag_buf *buf, int offset) -{ - return buf->frags->buf + offset; -} - #define STRUCT_FIELD(header, field) \ .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field -static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev) -{ - return pci_get_drvdata(pdev); -} - extern struct dentry *mlx5_debugfs_root; static inline u16 fw_rev_maj(struct mlx5_core_dev *dev) -- cgit v1.2.3 From c2c922dae77f36e24d246c6e310cee0c61afc6fb Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 29 Nov 2021 16:24:28 +0200 Subject: net/mlx5: Add ability to insert to specific flow group If the flow table isn't an autogroup the upper driver has to create the flow groups explicitly. This information can't later be used when creating rules to insert into a specific flow group. Allow such use case. Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index b1aad14689e3..e3bfed68b08a 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -224,6 +224,7 @@ struct mlx5_flow_act { u32 flags; struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH]; struct ib_counters *counters; + struct mlx5_flow_group *fg; }; #define MLX5_DECLARE_FLOW_ACT(name) \ -- cgit v1.2.3 From 605bef0015b163867127202b821dce79804d603d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 5 Apr 2020 17:50:25 -0700 Subject: net/mlx5: cmdif, cmd_check refactoring Do not mangle the command outbox in the internal low level cmd_exec and cmd_invoke functions. Instead return a proper unique error code and move the driver error checking to be at a higher level in mlx5_cmd_exec(). Signed-off-by: Saeed Mahameed Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1b398c9e17b9..8a8408708e6c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -981,7 +981,6 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); -void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); -- cgit v1.2.3 From f23519e542e51c19ab3081deb089bb3f8fec7bb9 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sat, 17 Aug 2019 03:05:10 -0700 Subject: net/mlx5: cmdif, Add new api for command execution Add mlx5_cmd_do. Unlike mlx5_cmd_exec, this function will not modify or translate outbox.status. The function will return: return = 0: Command was executed, outbox.status == MLX5_CMD_STAT_OK. return = -EREMOTEIO: Executed, outbox.status != MLX5_CMD_STAT_OK. return < 0: Command execution couldn't be performed by FW or driver. And document other mlx5_cmd_exec functions. Signed-off-by: Saeed Mahameed Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8a8408708e6c..1b9bec8fa870 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -964,6 +964,8 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, void *out, int out_size, mlx5_async_cbk_t callback, struct mlx5_async_work *work); +int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); +int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out); int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); -- cgit v1.2.3 From 31803e59233efc838b9dcb26edea28a4b2389e97 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 30 Mar 2020 21:12:58 -0700 Subject: net/mlx5: Use mlx5_cmd_do() in core create_{cq,dct} mlx5_core_create_{cq/dct} functions are non-trivial mlx5 commands functions. They check command execution status themselves and hide valuable FW failure information. For mlx5_core/eth kernel user this is what we actually want, but for a devx/rdma user the hidden information is essential and should be propagated up to the caller, thus we convert these commands to use mlx5_cmd_do to return the FW/driver and command outbox status as is, and let the caller decide what to do with it. For kernel callers of mlx5_core_create_{cq/dct} or those who only care about the binary status (FAIL/SUCCESS) they must check status themselves via mlx5_cmd_check() to restore the current behavior. err = mlx5_create_cq(in, out) err = mlx5_cmd_check(err, in, out) if (err) // handle err For DEVX users and those who care about full visibility, They will just propagate the error to user space, and app can check if err == -EREMOTEIO, then outbox.{status,syndrome} are valid. API Note: mlx5_cmd_check() must be used by kernel users since it allows the driver to intercept the command execution status and return a driver simulated status in case of driver induced error handling or reset/recovery flows. Signed-off-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- include/linux/mlx5/cq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 7bfb67363434..cb15308b5cb0 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -183,6 +183,8 @@ static inline void mlx5_cq_put(struct mlx5_core_cq *cq) complete(&cq->free); } +int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen, u32 *out, int outlen); int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); -- cgit v1.2.3 From 0a41527608e7f3da61e76564f5a8749a1fddc7f1 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 31 Mar 2020 22:03:00 -0700 Subject: net/mlx5: cmdif, Refactor error handling and reporting of async commands Same as the new mlx5_cmd_do API, report all information to callers and let them handle the error values and outbox parsing. The user callback status "work->user_callback(status)" is now similar to the error rc code returned from the blocking mlx5_cmd_do() version, and now is defined as follows: -EREMOTEIO : Command executed by FW, outbox.status != MLX5_CMD_STAT_OK. Caller must check FW outbox status. 0 : Command execution successful, outbox.status == MLX5_CMD_STAT_OK. < 0 : Command couldn't execute, FW or driver induced error. Signed-off-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1b9bec8fa870..432151d7d0bf 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -955,6 +955,7 @@ typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context); struct mlx5_async_work { struct mlx5_async_ctx *ctx; mlx5_async_cbk_t user_callback; + void *out; /* pointer to the cmd output buffer */ }; void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, @@ -963,7 +964,7 @@ void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx); int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, void *out, int out_size, mlx5_async_cbk_t callback, struct mlx5_async_work *work); - +void mlx5_cmd_out_err(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out); int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out); int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, -- cgit v1.2.3 From 72fb3b60a3114a1154a8ae5629ea3b43a88a7a4d Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sat, 14 Aug 2021 17:57:51 +0300 Subject: net/mlx5: Add reset_state field to MFRL register Add new field reset_state to MFRL register. This field expose current state of sync reset for fw update. This field enables sharing with the user more details on why fw activate failed in case it failed the sync reset stage. Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 598ac3bcc901..8ca2d65ff789 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9694,7 +9694,8 @@ struct mlx5_ifc_pcam_reg_bits { }; struct mlx5_ifc_mcam_enhanced_features_bits { - u8 reserved_at_0[0x6b]; + u8 reserved_at_0[0x6a]; + u8 reset_state[0x1]; u8 ptpcyc2realtime_modify[0x1]; u8 reserved_at_6c[0x2]; u8 pci_status_and_power[0x1]; @@ -10375,6 +10376,14 @@ struct mlx5_ifc_mcda_reg_bits { u8 data[][0x20]; }; +enum { + MLX5_MFRL_REG_RESET_STATE_IDLE = 0, + MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION = 1, + MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS = 2, + MLX5_MFRL_REG_RESET_STATE_TIMEOUT = 3, + MLX5_MFRL_REG_RESET_STATE_NACK = 4, +}; + enum { MLX5_MFRL_REG_RESET_TYPE_FULL_CHIP = BIT(0), MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE = BIT(1), @@ -10393,7 +10402,8 @@ struct mlx5_ifc_mfrl_reg_bits { u8 pci_sync_for_fw_update_start[0x1]; u8 pci_sync_for_fw_update_resp[0x2]; u8 rst_type_sel[0x3]; - u8 reserved_at_28[0x8]; + u8 reserved_at_28[0x4]; + u8 reset_state[0x4]; u8 reset_type[0x8]; u8 reset_level[0x8]; }; -- cgit v1.2.3 From 45fee8edb4b333af79efad7a99de51718ebda94b Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 6 Sep 2021 11:02:44 +0300 Subject: net/mlx5: Add clarification on sync reset failure In case devlink reload action fw_activate failed in sync reset stage, use the new MFRL field reset_state to find why it failed and share this clarification with the user. Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 432151d7d0bf..d3b1a6a1f8d2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1031,6 +1031,9 @@ int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev); void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, + void *data_out, int size_out, u16 reg_id, int arg, + int write, bool verbose); int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, u16 reg_num, int arg, int write); -- cgit v1.2.3 From 1241e329ce2e1f5b1039fd356b75867b29721ad2 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 23 Feb 2022 00:09:12 +0530 Subject: ethtool: add support to set/get completion queue event size Add support to set completion queue event size via ethtool -G parameter and get it via ethtool -g parameter. ~ # ./ethtool -G eth0 cqe-size 512 ~ # ./ethtool -g eth0 Ring parameters for eth0: Pre-set maximums: RX: 1048576 RX Mini: n/a RX Jumbo: n/a TX: 1048576 Current hardware settings: RX: 256 RX Mini: n/a RX Jumbo: n/a TX: 4096 RX Buf Len: 2048 CQE Size: 128 Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 4 ++++ include/uapi/linux/ethtool_netlink.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e0853f48b75e..4af58459a1e7 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -71,18 +71,22 @@ enum { * struct kernel_ethtool_ringparam - RX/TX ring configuration * @rx_buf_len: Current length of buffers on the rx ring. * @tcp_data_split: Scatter packet headers and data to separate buffers + * @cqe_size: Size of TX/RX completion queue event */ struct kernel_ethtool_ringparam { u32 rx_buf_len; u8 tcp_data_split; + u32 cqe_size; }; /** * enum ethtool_supported_ring_param - indicator caps for setting ring params * @ETHTOOL_RING_USE_RX_BUF_LEN: capture for setting rx_buf_len + * @ETHTOOL_RING_USE_CQE_SIZE: capture for setting cqe_size */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), + ETHTOOL_RING_USE_CQE_SIZE = BIT(1), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 417d4280d7b5..979850221b8d 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -337,6 +337,7 @@ enum { ETHTOOL_A_RINGS_TX, /* u32 */ ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ + ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, -- cgit v1.2.3 From 5597f082fcaf17e2d9adee7a1f6fa02f5872f9d5 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 13 Jan 2022 15:34:07 +0100 Subject: can: bittiming: mark function arguments and local variables as const This patch marks the arguments of some functions as well as some local variables as constant. Link: https://lore.kernel.org/all/20220124215642.3474154-7-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index a81652d1c6f3..7ae21c0f7f23 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -113,7 +113,7 @@ struct can_tdc_const { }; #ifdef CONFIG_CAN_CALC_BITTIMING -int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, +int can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc); void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, @@ -121,7 +121,7 @@ void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, u32 *ctrlmode, u32 ctrlmode_supported); #else /* !CONFIG_CAN_CALC_BITTIMING */ static inline int -can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, +can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc) { netdev_err(dev, "bit-timing calculation not available\n"); @@ -136,7 +136,7 @@ can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, } #endif /* CONFIG_CAN_CALC_BITTIMING */ -int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt, +int can_get_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc, const u32 *bitrate_const, const unsigned int bitrate_const_cnt); -- cgit v1.2.3 From 05f2281b4192320a20d746df6146b3dd82f96e39 Mon Sep 17 00:00:00 2001 From: Ricardo Rivera-Matos Date: Mon, 14 Feb 2022 18:07:56 -0600 Subject: power: supply: Introduces bypass charging property Adds a POWER_SUPPLY_CHARGE_TYPE_BYPASS option to the POWER_SUPPLY_PROP_CHARGE_TYPE property to facilitate bypass charging operation. In bypass charging operation, the charger bypasses the charging path around the integrated converter allowing for a "smart" wall adaptor to perform the power conversion externally. This operational mode is critical for the USB PPS standard of power adaptors and is becoming a common feature in modern charging ICs such as: - BQ25980 - BQ25975 - BQ25960 - LN8000 - LN8410 Signed-off-by: Ricardo Rivera-Matos Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 006111917d1a..c135196aa9d1 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -49,6 +49,7 @@ enum { POWER_SUPPLY_CHARGE_TYPE_ADAPTIVE, /* dynamically adjusted speed */ POWER_SUPPLY_CHARGE_TYPE_CUSTOM, /* use CHARGE_CONTROL_* props */ POWER_SUPPLY_CHARGE_TYPE_LONGLIFE, /* slow speed, longer life */ + POWER_SUPPLY_CHARGE_TYPE_BYPASS, /* bypassing the charger */ }; enum { -- cgit v1.2.3 From a3a59919ab662682a2cb77b25b0f7b9e6c78737e Mon Sep 17 00:00:00 2001 From: Jean-Jacques Hiblot Date: Mon, 21 Feb 2022 10:50:27 +0100 Subject: dt-bindings: clock: r9a06g032: Add the definition of the watchdog clock This clock is actually the REF_SYNC_D8 clock. Signed-off-by: Jean-Jacques Hiblot Acked-by: Rob Herring Link: https://lore.kernel.org/r/20220221095032.95054-2-jjhiblot@traphandler.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/r9a06g032-sysctrl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/r9a06g032-sysctrl.h b/include/dt-bindings/clock/r9a06g032-sysctrl.h index 90c0f3dc1ba1..d9d7b8b4f426 100644 --- a/include/dt-bindings/clock/r9a06g032-sysctrl.h +++ b/include/dt-bindings/clock/r9a06g032-sysctrl.h @@ -74,6 +74,7 @@ #define R9A06G032_CLK_DDRPHY_PCLK 81 /* AKA CLK_REF_SYNC_D4 */ #define R9A06G032_CLK_FW 81 /* AKA CLK_REF_SYNC_D4 */ #define R9A06G032_CLK_CRYPTO 81 /* AKA CLK_REF_SYNC_D4 */ +#define R9A06G032_CLK_WATCHDOG 82 /* AKA CLK_REF_SYNC_D8 */ #define R9A06G032_CLK_A7MP 84 /* AKA DIV_CA7 */ #define R9A06G032_HCLK_CAN0 85 #define R9A06G032_HCLK_CAN1 86 -- cgit v1.2.3 From 4f0b903ded728c505850daf2914bfc08841f0ae6 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 23 Feb 2022 17:14:37 +0200 Subject: fsnotify: fix merge with parent's ignored mask fsnotify_parent() does not consider the parent's mark at all unless the parent inode shows interest in events on children and in the specific event. So unless parent added an event to both its mark mask and ignored mask, the event will not be ignored. Fix this by declaring the interest of an object in an event when the event is in either a mark mask or ignored mask. Link: https://lore.kernel.org/r/20220223151438.790268-2-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 790c31844db5..5f9c960049b0 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -601,6 +601,21 @@ extern void fsnotify_remove_queued_event(struct fsnotify_group *group, /* functions used to manipulate the marks attached to inodes */ +/* Get mask for calculating object interest taking ignored mask into account */ +static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark) +{ + __u32 mask = mark->mask; + + if (!mark->ignored_mask) + return mask; + + /* + * If mark is interested in ignoring events on children, the object must + * show interest in those events for fsnotify_parent() to notice it. + */ + return mask | (mark->ignored_mask & ALL_FSNOTIFY_EVENTS); +} + /* Get mask of events for a list of marks */ extern __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn); /* Calculate mask of events for a list of marks */ -- cgit v1.2.3 From 04e317ba72d07901b03399b3d1525e83424df5b3 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 23 Feb 2022 17:14:38 +0200 Subject: fsnotify: optimize FS_MODIFY events with no ignored masks fsnotify() treats FS_MODIFY events specially - it does not skip them even if the FS_MODIFY event does not apear in the object's fsnotify mask. This is because send_to_group() checks if FS_MODIFY needs to clear ignored mask of marks. The common case is that an object does not have any mark with ignored mask and in particular, that it does not have a mark with ignored mask and without the FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY flag. Set FS_MODIFY in object's fsnotify mask during fsnotify_recalc_mask() if object has a mark with an ignored mask and without the FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY flag and remove the special treatment of FS_MODIFY in fsnotify(), so that FS_MODIFY events could be optimized in the common case. Call fsnotify_recalc_mask() from fanotify after adding or removing an ignored mask from a mark without FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY or when adding the FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY flag to a mark with ignored mask (the flag cannot be removed by fanotify uapi). Performance results for doing 10000000 write(2)s to tmpfs: vanilla patched without notification mark 25.486+-1.054 24.965+-0.244 with notification mark 30.111+-0.139 26.891+-1.355 So we can see the overhead of notification subsystem has been drastically reduced. Link: https://lore.kernel.org/r/20220223151438.790268-3-amir73il@gmail.com Suggested-by: Jan Kara Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 5f9c960049b0..0805b74cae44 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -609,6 +609,10 @@ static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark) if (!mark->ignored_mask) return mask; + /* Interest in FS_MODIFY may be needed for clearing ignored mask */ + if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) + mask |= FS_MODIFY; + /* * If mark is interested in ignoring events on children, the object must * show interest in those events for fsnotify_parent() to notice it. -- cgit v1.2.3 From 0cc62aed370d91ec3d5cf258e53028c736e88a09 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 21 Jan 2022 08:42:21 +0000 Subject: sizes.h: Add SZ_1T macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Today drivers/pci/controller/pci-xgene.c defines SZ_1T Move it into linux/sizes.h so that it can be re-used elsewhere. Link: https://lore.kernel.org/r/575cb7164cf124c75df7cb9242ea7374733942bf.1642752946.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Signed-off-by: Lorenzo Pieralisi Reviewed-by: Krzysztof Wilczyński Acked-by: Bjorn Helgaas Cc: Toan Le Cc: linux-pci@vger.kernel.org --- include/linux/sizes.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sizes.h b/include/linux/sizes.h index 1ac79bcee2bb..84aa448d8bb3 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -47,6 +47,8 @@ #define SZ_8G _AC(0x200000000, ULL) #define SZ_16G _AC(0x400000000, ULL) #define SZ_32G _AC(0x800000000, ULL) + +#define SZ_1T _AC(0x10000000000, ULL) #define SZ_64T _AC(0x400000000000, ULL) #endif /* __LINUX_SIZES_H__ */ -- cgit v1.2.3 From d9b5ae5c1b241b91480aa30408be12fe91af834a Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 23 Feb 2022 18:34:16 +0200 Subject: openvswitch: Fix setting ipv6 fields causing hw csum failure Ipv6 ttl, label and tos fields are modified without first pulling/pushing the ipv6 header, which would have updated the hw csum (if available). This might cause csum validation when sending the packet to the stack, as can be seen in the trace below. Fix this by updating skb->csum if available. Trace resulted by ipv6 ttl dec and then sending packet to conntrack [actions: set(ipv6(hlimit=63)),ct(zone=99)]: [295241.900063] s_pf0vf2: hw csum failure [295241.923191] Call Trace: [295241.925728] [295241.927836] dump_stack+0x5c/0x80 [295241.931240] __skb_checksum_complete+0xac/0xc0 [295241.935778] nf_conntrack_tcp_packet+0x398/0xba0 [nf_conntrack] [295241.953030] nf_conntrack_in+0x498/0x5e0 [nf_conntrack] [295241.958344] __ovs_ct_lookup+0xac/0x860 [openvswitch] [295241.968532] ovs_ct_execute+0x4a7/0x7c0 [openvswitch] [295241.979167] do_execute_actions+0x54a/0xaa0 [openvswitch] [295242.001482] ovs_execute_actions+0x48/0x100 [openvswitch] [295242.006966] ovs_dp_process_packet+0x96/0x1d0 [openvswitch] [295242.012626] ovs_vport_receive+0x6c/0xc0 [openvswitch] [295242.028763] netdev_frame_hook+0xc0/0x180 [openvswitch] [295242.034074] __netif_receive_skb_core+0x2ca/0xcb0 [295242.047498] netif_receive_skb_internal+0x3e/0xc0 [295242.052291] napi_gro_receive+0xba/0xe0 [295242.056231] mlx5e_handle_rx_cqe_mpwrq_rep+0x12b/0x250 [mlx5_core] [295242.062513] mlx5e_poll_rx_cq+0xa0f/0xa30 [mlx5_core] [295242.067669] mlx5e_napi_poll+0xe1/0x6b0 [mlx5_core] [295242.077958] net_rx_action+0x149/0x3b0 [295242.086762] __do_softirq+0xd7/0x2d6 [295242.090427] irq_exit+0xf7/0x100 [295242.093748] do_IRQ+0x7f/0xd0 [295242.096806] common_interrupt+0xf/0xf [295242.100559] [295242.102750] RIP: 0033:0x7f9022e88cbd [295242.125246] RSP: 002b:00007f9022282b20 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffda [295242.132900] RAX: 0000000000000005 RBX: 0000000000000010 RCX: 0000000000000000 [295242.140120] RDX: 00007f9022282ba8 RSI: 00007f9022282a30 RDI: 00007f9014005c30 [295242.147337] RBP: 00007f9014014d60 R08: 0000000000000020 R09: 00007f90254a8340 [295242.154557] R10: 00007f9022282a28 R11: 0000000000000246 R12: 0000000000000000 [295242.161775] R13: 00007f902308c000 R14: 000000000000002b R15: 00007f9022b71f40 Fixes: 3fdbd1ce11e5 ("openvswitch: add ipv6 'set' action") Signed-off-by: Paul Blakey Link: https://lore.kernel.org/r/20220223163416.24096-1-paulb@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/checksum.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/checksum.h b/include/net/checksum.h index 02d0c2d01014..79c67f14c448 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -142,6 +142,11 @@ static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) *sum = ~csum16_add(csum16_sub(~(*sum), old), new); } +static inline void csum_replace(__wsum *csum, __wsum old, __wsum new) +{ + *csum = csum_add(csum_sub(*csum, old), new); +} + struct sk_buff; void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, bool pseudohdr); -- cgit v1.2.3 From 07d743902a1205d54d93c0637c22575811014431 Mon Sep 17 00:00:00 2001 From: Mohan Kumar Date: Wed, 16 Feb 2022 14:52:38 +0530 Subject: dt-bindings: Add HDA support for Tegra234 Add hda clocks, memory ,power and reset binding entries for Tegra234. Signed-off-by: Mohan Kumar Signed-off-by: Thierry Reding --- include/dt-bindings/clock/tegra234-clock.h | 4 ++++ include/dt-bindings/memory/tegra234-mc.h | 5 +++++ include/dt-bindings/power/tegra234-powergate.h | 1 + include/dt-bindings/reset/tegra234-reset.h | 2 ++ 4 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/tegra234-clock.h b/include/dt-bindings/clock/tegra234-clock.h index 5c201a8278ff..0c38769c4a76 100644 --- a/include/dt-bindings/clock/tegra234-clock.h +++ b/include/dt-bindings/clock/tegra234-clock.h @@ -136,4 +136,8 @@ #define TEGRA234_CLK_PLLC4 237U /** @brief 32K input clock provided by PMIC */ #define TEGRA234_CLK_CLK_32K 289U +/** @brief CLK_RST_CONTROLLER_AZA2XBITCLK_OUT_SWITCH_DIVIDER switch divider output (aza_2xbitclk) */ +#define TEGRA234_CLK_AZA_2XBIT 457U +/** @brief aza_2xbitclk / 2 (aza_bitclk) */ +#define TEGRA234_CLK_AZA_BIT 458U #endif diff --git a/include/dt-bindings/memory/tegra234-mc.h b/include/dt-bindings/memory/tegra234-mc.h index 42661dda1c53..672b953cadd3 100644 --- a/include/dt-bindings/memory/tegra234-mc.h +++ b/include/dt-bindings/memory/tegra234-mc.h @@ -10,6 +10,7 @@ /* NISO0 stream IDs */ #define TEGRA234_SID_APE 0x02 +#define TEGRA234_SID_HDA 0x03 /* NISO1 stream IDs */ #define TEGRA234_SID_SDMMC4 0x02 @@ -19,6 +20,10 @@ * memory client IDs */ +/* High-definition audio (HDA) read clients */ +#define TEGRA234_MEMORY_CLIENT_HDAR 0x15 +/* High-definition audio (HDA) write clients */ +#define TEGRA234_MEMORY_CLIENT_HDAW 0x35 /* sdmmcd memory read client */ #define TEGRA234_MEMORY_CLIENT_SDMMCRAB 0x63 /* sdmmcd memory write client */ diff --git a/include/dt-bindings/power/tegra234-powergate.h b/include/dt-bindings/power/tegra234-powergate.h index 8e28fcbdc038..a635a8b53810 100644 --- a/include/dt-bindings/power/tegra234-powergate.h +++ b/include/dt-bindings/power/tegra234-powergate.h @@ -5,5 +5,6 @@ #define __ABI_MACH_T234_POWERGATE_T234_H_ #define TEGRA234_POWER_DOMAIN_AUD 2U +#define TEGRA234_POWER_DOMAIN_DISP 3U #endif diff --git a/include/dt-bindings/reset/tegra234-reset.h b/include/dt-bindings/reset/tegra234-reset.h index ba390b86361d..178e73a687f0 100644 --- a/include/dt-bindings/reset/tegra234-reset.h +++ b/include/dt-bindings/reset/tegra234-reset.h @@ -10,6 +10,8 @@ * @brief Identifiers for Resets controllable by firmware * @{ */ +#define TEGRA234_RESET_HDA 20U +#define TEGRA234_RESET_HDACODEC 21U #define TEGRA234_RESET_I2C1 24U #define TEGRA234_RESET_I2C2 29U #define TEGRA234_RESET_I2C3 30U -- cgit v1.2.3 From d06a171e07bc6aa524b402c754611ef08a34b131 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Sat, 5 Feb 2022 21:51:35 +0530 Subject: dt-bindings: Add Tegra234 PCIe clocks and resets Add the clocks and resets used by the PCIe hardware found on Tegra234 SoCs. Signed-off-by: Vidya Sagar Acked-by: Rob Herring Signed-off-by: Thierry Reding --- include/dt-bindings/clock/tegra234-clock.h | 22 ++++++++++++++++++++++ include/dt-bindings/reset/tegra234-reset.h | 27 ++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/tegra234-clock.h b/include/dt-bindings/clock/tegra234-clock.h index 0c38769c4a76..8cae969e8cba 100644 --- a/include/dt-bindings/clock/tegra234-clock.h +++ b/include/dt-bindings/clock/tegra234-clock.h @@ -130,8 +130,30 @@ #define TEGRA234_CLK_SYNC_I2S6 150U /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_UARTA */ #define TEGRA234_CLK_UARTA 155U +/** @brief output of gate CLK_ENB_PEX1_CORE_6 */ +#define TEGRA234_CLK_PEX1_C6_CORE 161U +/** @brief output of gate CLK_ENB_PEX2_CORE_7 */ +#define TEGRA234_CLK_PEX2_C7_CORE 171U +/** @brief output of gate CLK_ENB_PEX2_CORE_8 */ +#define TEGRA234_CLK_PEX2_C8_CORE 172U +/** @brief output of gate CLK_ENB_PEX2_CORE_9 */ +#define TEGRA234_CLK_PEX2_C9_CORE 173U +/** @brief output of gate CLK_ENB_PEX2_CORE_10 */ +#define TEGRA234_CLK_PEX2_C10_CORE 187U /** @brief CLK_RST_CONTROLLER_CLK_SOURCE_SDMMC_LEGACY_TM switch divider output */ #define TEGRA234_CLK_SDMMC_LEGACY_TM 219U +/** @brief output of gate CLK_ENB_PEX0_CORE_0 */ +#define TEGRA234_CLK_PEX0_C0_CORE 220U +/** @brief output of gate CLK_ENB_PEX0_CORE_1 */ +#define TEGRA234_CLK_PEX0_C1_CORE 221U +/** @brief output of gate CLK_ENB_PEX0_CORE_2 */ +#define TEGRA234_CLK_PEX0_C2_CORE 222U +/** @brief output of gate CLK_ENB_PEX0_CORE_3 */ +#define TEGRA234_CLK_PEX0_C3_CORE 223U +/** @brief output of gate CLK_ENB_PEX0_CORE_4 */ +#define TEGRA234_CLK_PEX0_C4_CORE 224U +/** @brief output of gate CLK_ENB_PEX1_CORE_5 */ +#define TEGRA234_CLK_PEX1_C5_CORE 225U /** @brief PLL controlled by CLK_RST_CONTROLLER_PLLC4_BASE */ #define TEGRA234_CLK_PLLC4 237U /** @brief 32K input clock provided by PMIC */ diff --git a/include/dt-bindings/reset/tegra234-reset.h b/include/dt-bindings/reset/tegra234-reset.h index 178e73a687f0..1362cd5e03f0 100644 --- a/include/dt-bindings/reset/tegra234-reset.h +++ b/include/dt-bindings/reset/tegra234-reset.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. */ +/* Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved. */ #ifndef DT_BINDINGS_RESET_TEGRA234_RESET_H #define DT_BINDINGS_RESET_TEGRA234_RESET_H @@ -10,9 +10,18 @@ * @brief Identifiers for Resets controllable by firmware * @{ */ +#define TEGRA234_RESET_PEX1_CORE_6 11U +#define TEGRA234_RESET_PEX1_CORE_6_APB 12U +#define TEGRA234_RESET_PEX1_COMMON_APB 13U +#define TEGRA234_RESET_PEX2_CORE_7 14U +#define TEGRA234_RESET_PEX2_CORE_7_APB 15U #define TEGRA234_RESET_HDA 20U #define TEGRA234_RESET_HDACODEC 21U #define TEGRA234_RESET_I2C1 24U +#define TEGRA234_RESET_PEX2_CORE_8 25U +#define TEGRA234_RESET_PEX2_CORE_8_APB 26U +#define TEGRA234_RESET_PEX2_CORE_9 27U +#define TEGRA234_RESET_PEX2_CORE_9_APB 28U #define TEGRA234_RESET_I2C2 29U #define TEGRA234_RESET_I2C3 30U #define TEGRA234_RESET_I2C4 31U @@ -20,6 +29,9 @@ #define TEGRA234_RESET_I2C7 33U #define TEGRA234_RESET_I2C8 34U #define TEGRA234_RESET_I2C9 35U +#define TEGRA234_RESET_PEX2_CORE_10 56U +#define TEGRA234_RESET_PEX2_CORE_10_APB 57U +#define TEGRA234_RESET_PEX2_COMMON_APB 58U #define TEGRA234_RESET_PWM1 68U #define TEGRA234_RESET_PWM2 69U #define TEGRA234_RESET_PWM3 70U @@ -30,6 +42,19 @@ #define TEGRA234_RESET_PWM8 75U #define TEGRA234_RESET_SDMMC4 85U #define TEGRA234_RESET_UARTA 100U +#define TEGRA234_RESET_PEX0_CORE_0 116U +#define TEGRA234_RESET_PEX0_CORE_1 117U +#define TEGRA234_RESET_PEX0_CORE_2 118U +#define TEGRA234_RESET_PEX0_CORE_3 119U +#define TEGRA234_RESET_PEX0_CORE_4 120U +#define TEGRA234_RESET_PEX0_CORE_0_APB 121U +#define TEGRA234_RESET_PEX0_CORE_1_APB 122U +#define TEGRA234_RESET_PEX0_CORE_2_APB 123U +#define TEGRA234_RESET_PEX0_CORE_3_APB 124U +#define TEGRA234_RESET_PEX0_CORE_4_APB 125U +#define TEGRA234_RESET_PEX0_COMMON_APB 126U +#define TEGRA234_RESET_PEX1_CORE_5 129U +#define TEGRA234_RESET_PEX1_CORE_5_APB 130U /** @} */ -- cgit v1.2.3 From 6460278f6faf31cae753f0e94c6ba8483b272612 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Sat, 5 Feb 2022 21:51:36 +0530 Subject: dt-bindings: power: Add Tegra234 PCIe power domains Add power domain IDs for the four PCIe power partitions found on Tegra234. Signed-off-by: Vidya Sagar Acked-by: Rob Herring Signed-off-by: Thierry Reding --- include/dt-bindings/power/tegra234-powergate.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/power/tegra234-powergate.h b/include/dt-bindings/power/tegra234-powergate.h index a635a8b53810..f610eee9bce8 100644 --- a/include/dt-bindings/power/tegra234-powergate.h +++ b/include/dt-bindings/power/tegra234-powergate.h @@ -6,5 +6,17 @@ #define TEGRA234_POWER_DOMAIN_AUD 2U #define TEGRA234_POWER_DOMAIN_DISP 3U +#define TEGRA234_POWER_DOMAIN_PCIEX8A 5U +#define TEGRA234_POWER_DOMAIN_PCIEX4A 6U +#define TEGRA234_POWER_DOMAIN_PCIEX4BA 7U +#define TEGRA234_POWER_DOMAIN_PCIEX4BB 8U +#define TEGRA234_POWER_DOMAIN_PCIEX1A 9U +#define TEGRA234_POWER_DOMAIN_PCIEX4CA 13U +#define TEGRA234_POWER_DOMAIN_PCIEX4CB 14U +#define TEGRA234_POWER_DOMAIN_PCIEX4CC 15U +#define TEGRA234_POWER_DOMAIN_PCIEX8B 16U +#define TEGRA234_POWER_DOMAIN_MGBEA 17U +#define TEGRA234_POWER_DOMAIN_MGBEB 18U +#define TEGRA234_POWER_DOMAIN_MGBEC 19U #endif -- cgit v1.2.3 From a4ad66da3fccebdcbd53d55c035d5851b73f8bcb Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Sat, 5 Feb 2022 21:51:37 +0530 Subject: dt-bindings: memory: Add Tegra234 PCIe memory Add the memory client and stream ID definitions for the PCIe hardware found on Tegra234 SoCs. Signed-off-by: Vidya Sagar Acked-by: Rob Herring Signed-off-by: Thierry Reding --- include/dt-bindings/memory/tegra234-mc.h | 63 ++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/memory/tegra234-mc.h b/include/dt-bindings/memory/tegra234-mc.h index 672b953cadd3..e3b0e9da295d 100644 --- a/include/dt-bindings/memory/tegra234-mc.h +++ b/include/dt-bindings/memory/tegra234-mc.h @@ -11,9 +11,20 @@ /* NISO0 stream IDs */ #define TEGRA234_SID_APE 0x02 #define TEGRA234_SID_HDA 0x03 +#define TEGRA234_SID_PCIE0 0x12 +#define TEGRA234_SID_PCIE4 0x13 +#define TEGRA234_SID_PCIE5 0x14 +#define TEGRA234_SID_PCIE6 0x15 +#define TEGRA234_SID_PCIE9 0x1f /* NISO1 stream IDs */ #define TEGRA234_SID_SDMMC4 0x02 +#define TEGRA234_SID_PCIE1 0x05 +#define TEGRA234_SID_PCIE2 0x06 +#define TEGRA234_SID_PCIE3 0x07 +#define TEGRA234_SID_PCIE7 0x08 +#define TEGRA234_SID_PCIE8 0x09 +#define TEGRA234_SID_PCIE10 0x0b #define TEGRA234_SID_BPMP 0x10 /* @@ -22,8 +33,34 @@ /* High-definition audio (HDA) read clients */ #define TEGRA234_MEMORY_CLIENT_HDAR 0x15 +/* PCIE6 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE6AR 0x28 +/* PCIE6 write clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE6AW 0x29 +/* PCIE7 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE7AR 0x2a +/* PCIE7 write clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE7AW 0x30 +/* PCIE8 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE8AR 0x32 /* High-definition audio (HDA) write clients */ #define TEGRA234_MEMORY_CLIENT_HDAW 0x35 +/* PCIE8 write clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE8AW 0x3b +/* PCIE9 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE9AR 0x3c +/* PCIE6r1 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE6AR1 0x3d +/* PCIE9 write clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE9AW 0x3e +/* PCIE10 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE10AR 0x3f +/* PCIE10 write clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE10AW 0x40 +/* PCIE10r1 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE10AR1 0x48 +/* PCIE7r1 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE7AR1 0x49 /* sdmmcd memory read client */ #define TEGRA234_MEMORY_CLIENT_SDMMCRAB 0x63 /* sdmmcd memory write client */ @@ -40,5 +77,31 @@ #define TEGRA234_MEMORY_CLIENT_APEDMAR 0x9f /* APEDMA write client */ #define TEGRA234_MEMORY_CLIENT_APEDMAW 0xa0 +/* PCIE0 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE0R 0xd8 +/* PCIE0 write clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE0W 0xd9 +/* PCIE1 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE1R 0xda +/* PCIE1 write clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE1W 0xdb +/* PCIE2 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE2AR 0xdc +/* PCIE2 write clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE2AW 0xdd +/* PCIE3 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE3R 0xde +/* PCIE3 write clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE3W 0xdf +/* PCIE4 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE4R 0xe0 +/* PCIE4 write clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE4W 0xe1 +/* PCIE5 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE5R 0xe2 +/* PCIE5 write clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE5W 0xe3 +/* PCIE5r1 read clients */ +#define TEGRA234_MEMORY_CLIENT_PCIE5R1 0xef #endif -- cgit v1.2.3 From 29fb608396d6a62c1b85acc421ad7a4399085b9f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 14 Feb 2022 17:59:38 -0800 Subject: Bluetooth: Fix bt_skb_sendmmsg not allocating partial chunks Since bt_skb_sendmmsg can be used with the likes of SOCK_STREAM it shall return the partial chunks it could allocate instead of freeing everything as otherwise it can cause problems like bellow. Fixes: 81be03e026dc ("Bluetooth: RFCOMM: Replace use of memcpy_from_msg with bt_skb_sendmmsg") Reported-by: Paul Menzel Link: https://lore.kernel.org/r/d7206e12-1b99-c3be-84f4-df22af427ef5@molgen.mpg.de BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215594 Signed-off-by: Luiz Augusto von Dentz Tested-by: Paul Menzel (Nokia N9 (MeeGo/Harmattan) Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 4b3d0b16c185..a647e5fabdbd 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -506,8 +506,7 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); if (IS_ERR(tmp)) { - kfree_skb(skb); - return tmp; + return skb; } len -= tmp->len; -- cgit v1.2.3 From d148363684a41162a835c1803c02cfd90b22b2c6 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Sun, 20 Feb 2022 23:30:01 +0100 Subject: dt-bindings: power: rpmpd: Add MSM8226 to rpmpd binding Add compatible and constants for the power domains exposed by the MSM8226 RPM. Signed-off-by: Luca Weiss Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220220223004.507739-1-luca@z3ntu.xyz --- include/dt-bindings/power/qcom-rpmpd.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h index edfc1ff2acb3..c002cc6ddf55 100644 --- a/include/dt-bindings/power/qcom-rpmpd.h +++ b/include/dt-bindings/power/qcom-rpmpd.h @@ -139,6 +139,11 @@ #define MDM9607_VDDMX_AO 4 #define MDM9607_VDDMX_VFL 5 +/* MSM8226 Power Domain Indexes */ +#define MSM8226_VDDCX 0 +#define MSM8226_VDDCX_AO 1 +#define MSM8226_VDDCX_VFC 2 + /* MSM8939 Power Domains */ #define MSM8939_VDDMDCX 0 #define MSM8939_VDDMDCX_AO 1 -- cgit v1.2.3 From a56a1138cbd85e4d565356199d60e1cb94e5a77a Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 17 Feb 2022 13:10:38 -0800 Subject: Bluetooth: hci_sync: Fix not using conn_timeout When using hci_le_create_conn_sync it shall wait for the conn_timeout since the connection complete may take longer than just 2 seconds. Also fix the masking of HCI_EV_LE_ENHANCED_CONN_COMPLETE and HCI_EV_LE_CONN_COMPLETE so they are never both set so we can predict which one the controller will use in case of HCI_OP_LE_CREATE_CONN. Fixes: 6cd29ec6ae5e3 ("Bluetooth: hci_sync: Wait for proper events when connecting LE") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 586f69d084a2..e336e9c1dda4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1489,6 +1489,14 @@ void hci_conn_del_sysfs(struct hci_conn *conn); /* Extended advertising support */ #define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV)) +/* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 1789: + * + * C24: Mandatory if the LE Controller supports Connection State and either + * LE Feature (LL Privacy) or LE Feature (Extended Advertising) is supported + */ +#define use_enhanced_conn_complete(dev) (ll_privacy_capable(dev) || \ + ext_adv_capable(dev)) + /* ----- HCI protocols ----- */ #define HCI_PROTO_DEFER 0x01 -- cgit v1.2.3 From 46a76724e4c93bb1cda8ee11276001a92d1f7987 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 23 Feb 2022 16:00:44 +0200 Subject: net: dsa: rename references to "lag" as "lag_dev" In preparation of converting struct net_device *dp->lag_dev into a struct dsa_lag *dp->lag, we need to rename, for consistency purposes, all occurrences of the "lag" variable in the DSA core to "lag_dev". Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index f13de2d8aef3..ef7f446cbdf4 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -182,12 +182,12 @@ static inline struct net_device *dsa_lag_dev(struct dsa_switch_tree *dst, } static inline int dsa_lag_id(struct dsa_switch_tree *dst, - struct net_device *lag) + struct net_device *lag_dev) { unsigned int id; dsa_lags_foreach_id(id, dst) { - if (dsa_lag_dev(dst, id) == lag) + if (dsa_lag_dev(dst, id) == lag_dev) return id; } @@ -966,10 +966,10 @@ struct dsa_switch_ops { int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index, int port); int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, - int port, struct net_device *lag, + int port, struct net_device *lag_dev, struct netdev_lag_upper_info *info); int (*crosschip_lag_leave)(struct dsa_switch *ds, int sw_index, - int port, struct net_device *lag); + int port, struct net_device *lag_dev); /* * PTP functionality @@ -1041,10 +1041,10 @@ struct dsa_switch_ops { */ int (*port_lag_change)(struct dsa_switch *ds, int port); int (*port_lag_join)(struct dsa_switch *ds, int port, - struct net_device *lag, + struct net_device *lag_dev, struct netdev_lag_upper_info *info); int (*port_lag_leave)(struct dsa_switch *ds, int port, - struct net_device *lag); + struct net_device *lag_dev); /* * HSR integration -- cgit v1.2.3 From 3d4a0a2a46ab8ff8897dfd6324edee5e8184d2c5 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 23 Feb 2022 16:00:47 +0200 Subject: net: dsa: make LAG IDs one-based The DSA LAG API will be changed to become more similar with the bridge data structures, where struct dsa_bridge holds an unsigned int num, which is generated by DSA and is one-based. We have a similar thing going with the DSA LAG, except that isn't stored anywhere, it is calculated dynamically by dsa_lag_id() by iterating through dst->lags. The idea of encoding an invalid (or not requested) LAG ID as zero for the purpose of simplifying checks in drivers means that the LAG IDs passed by DSA to drivers need to be one-based too. So back-and-forth conversion is needed when indexing the dst->lags array, as well as in drivers which assume a zero-based index. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index ef7f446cbdf4..3ae93adda25d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -163,9 +163,10 @@ struct dsa_switch_tree { unsigned int last_switch; }; +/* LAG IDs are one-based, the dst->lags array is zero-based */ #define dsa_lags_foreach_id(_id, _dst) \ - for ((_id) = 0; (_id) < (_dst)->lags_len; (_id)++) \ - if ((_dst)->lags[(_id)]) + for ((_id) = 1; (_id) <= (_dst)->lags_len; (_id)++) \ + if ((_dst)->lags[(_id) - 1]) #define dsa_lag_foreach_port(_dp, _dst, _lag) \ list_for_each_entry((_dp), &(_dst)->ports, list) \ @@ -178,7 +179,8 @@ struct dsa_switch_tree { static inline struct net_device *dsa_lag_dev(struct dsa_switch_tree *dst, unsigned int id) { - return dst->lags[id]; + /* DSA LAG IDs are one-based, dst->lags is zero-based */ + return dst->lags[id - 1]; } static inline int dsa_lag_id(struct dsa_switch_tree *dst, -- cgit v1.2.3 From dedd6a009f4191989bee83c1faf66728648a223f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 23 Feb 2022 16:00:49 +0200 Subject: net: dsa: create a dsa_lag structure The main purpose of this change is to create a data structure for a LAG as seen by DSA. This is similar to what we have for bridging - we pass a copy of this structure by value to ->port_lag_join and ->port_lag_leave. For now we keep the lag_dev, id and a reference count in it. Future patches will add a list of FDB entries for the LAG (these also need to be refcounted to work properly). The LAG structure is created using dsa_port_lag_create() and destroyed using dsa_port_lag_destroy(), just like we have for bridging. Because now, the dsa_lag itself is refcounted, we can simplify dsa_lag_map() and dsa_lag_unmap(). These functions need to keep a LAG in the dst->lags array only as long as at least one port uses it. The refcounting logic inside those functions can be removed now - they are called only when we should perform the operation. dsa_lag_dev() is renamed to dsa_lag_by_id() and now returns the dsa_lag structure instead of the lag_dev net_device. dsa_lag_foreach_port() now takes the dsa_lag structure as argument. dst->lags holds an array of dsa_lag structures. dsa_lag_map() now also saves the dsa_lag->id value, so that linear walking of dst->lags in drivers using dsa_lag_id() is no longer necessary. They can just look at lag.id. dsa_port_lag_id_get() is a helper, similar to dsa_port_bridge_num_get(), which can be used by drivers to get the LAG ID assigned by DSA to a given port. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 50 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 3ae93adda25d..81ed34998416 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -116,6 +116,12 @@ struct dsa_netdevice_ops { #define MODULE_ALIAS_DSA_TAG_DRIVER(__proto) \ MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __stringify(__proto##_VALUE)) +struct dsa_lag { + struct net_device *dev; + unsigned int id; + refcount_t refcount; +}; + struct dsa_switch_tree { struct list_head list; @@ -134,7 +140,7 @@ struct dsa_switch_tree { /* Maps offloaded LAG netdevs to a zero-based linear ID for * drivers that need it. */ - struct net_device **lags; + struct dsa_lag **lags; /* Tagging protocol operations */ const struct dsa_device_ops *tag_ops; @@ -170,14 +176,14 @@ struct dsa_switch_tree { #define dsa_lag_foreach_port(_dp, _dst, _lag) \ list_for_each_entry((_dp), &(_dst)->ports, list) \ - if ((_dp)->lag_dev == (_lag)) + if (dsa_port_offloads_lag((_dp), (_lag))) #define dsa_hsr_foreach_port(_dp, _ds, _hsr) \ list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ if ((_dp)->ds == (_ds) && (_dp)->hsr_dev == (_hsr)) -static inline struct net_device *dsa_lag_dev(struct dsa_switch_tree *dst, - unsigned int id) +static inline struct dsa_lag *dsa_lag_by_id(struct dsa_switch_tree *dst, + unsigned int id) { /* DSA LAG IDs are one-based, dst->lags is zero-based */ return dst->lags[id - 1]; @@ -189,8 +195,10 @@ static inline int dsa_lag_id(struct dsa_switch_tree *dst, unsigned int id; dsa_lags_foreach_id(id, dst) { - if (dsa_lag_dev(dst, id) == lag_dev) - return id; + struct dsa_lag *lag = dsa_lag_by_id(dst, id); + + if (lag->dev == lag_dev) + return lag->id; } return -ENODEV; @@ -293,7 +301,7 @@ struct dsa_port { struct devlink_port devlink_port; struct phylink *pl; struct phylink_config pl_config; - struct net_device *lag_dev; + struct dsa_lag *lag; struct net_device *hsr_dev; struct list_head list; @@ -643,14 +651,30 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp) return dp->vlan_filtering; } +static inline unsigned int dsa_port_lag_id_get(struct dsa_port *dp) +{ + return dp->lag ? dp->lag->id : 0; +} + +static inline struct net_device *dsa_port_lag_dev_get(struct dsa_port *dp) +{ + return dp->lag ? dp->lag->dev : NULL; +} + +static inline bool dsa_port_offloads_lag(struct dsa_port *dp, + const struct dsa_lag *lag) +{ + return dsa_port_lag_dev_get(dp) == lag->dev; +} + static inline struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) { if (!dp->bridge) return NULL; - if (dp->lag_dev) - return dp->lag_dev; + if (dp->lag) + return dp->lag->dev; else if (dp->hsr_dev) return dp->hsr_dev; @@ -968,10 +992,10 @@ struct dsa_switch_ops { int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index, int port); int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, - int port, struct net_device *lag_dev, + int port, struct dsa_lag lag, struct netdev_lag_upper_info *info); int (*crosschip_lag_leave)(struct dsa_switch *ds, int sw_index, - int port, struct net_device *lag_dev); + int port, struct dsa_lag lag); /* * PTP functionality @@ -1043,10 +1067,10 @@ struct dsa_switch_ops { */ int (*port_lag_change)(struct dsa_switch *ds, int port); int (*port_lag_join)(struct dsa_switch *ds, int port, - struct net_device *lag_dev, + struct dsa_lag lag, struct netdev_lag_upper_info *info); int (*port_lag_leave)(struct dsa_switch *ds, int port, - struct net_device *lag_dev); + struct dsa_lag lag); /* * HSR integration -- cgit v1.2.3 From ec638740fce990ad2b9af43ead8088d6d6eb2145 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 23 Feb 2022 16:00:50 +0200 Subject: net: switchdev: remove lag_mod_cb from switchdev_handle_fdb_event_to_device When the switchdev_handle_fdb_event_to_device() event replication helper was created, my original thought was that FDB events on LAG interfaces should most likely be special-cased, not just replicated towards all switchdev ports beneath that LAG. So this replication helper currently does not recurse through switchdev lower interfaces of LAG bridge ports, but rather calls the lag_mod_cb() if that was provided. No switchdev driver uses this helper for FDB events on LAG interfaces yet, so that was an assumption which was yet to be tested. It is certainly usable for that purpose, as my RFC series shows: https://patchwork.kernel.org/project/netdevbpf/cover/20220210125201.2859463-1-vladimir.oltean@nxp.com/ however this approach is slightly convoluted because: - the switchdev driver gets a "dev" that isn't its own net device, but rather the LAG net device. It must call switchdev_lower_dev_find(dev) in order to get a handle of any of its own net devices (the ones that pass check_cb). - in order for FDB entries on LAG ports to be correctly refcounted per the number of switchdev ports beneath that LAG, we haven't escaped the need to iterate through the LAG's lower interfaces. Except that is now the responsibility of the switchdev driver, because the replication helper just stopped half-way. So, even though yes, FDB events on LAG bridge ports must be special-cased, in the end it's simpler to let switchdev_handle_fdb_* just iterate through the LAG port's switchdev lowers, and let the switchdev driver figure out that those physical ports are under a LAG. The switchdev_handle_fdb_event_to_device() helper takes a "foreign_dev_check" callback so it can figure out whether @dev can autonomously forward to @foreign_dev. DSA fills this method properly: if the LAG is offloaded by another port in the same tree as @dev, then it isn't foreign. If it is a software LAG, it is foreign - forwarding happens in software. Whether an interface is foreign or not decides whether the replication helper will go through the LAG's switchdev lowers or not. Since the lan966x doesn't properly fill this out, FDB events on software LAG uppers will get called. By changing lan966x_foreign_dev_check(), we can suppress them. Whereas DSA will now start receiving FDB events for its offloaded LAG uppers, so we need to return -EOPNOTSUPP, since we currently don't do the right thing for them. Cc: Horatiu Vultur Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/net/switchdev.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index c32e1c8f79ec..3e424d40fae3 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -313,10 +313,7 @@ int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long e const struct net_device *foreign_dev), int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev, unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev, - unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info)); + const struct switchdev_notifier_fdb_info *fdb_info)); int switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, @@ -443,10 +440,7 @@ switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event const struct net_device *foreign_dev), int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev, unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev, - unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info)) + const struct switchdev_notifier_fdb_info *fdb_info)) { return 0; } -- cgit v1.2.3 From e212fa7c54184b7b1f88990bd328b23b567cbf41 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 23 Feb 2022 16:00:53 +0200 Subject: net: dsa: support FDB events on offloaded LAG interfaces This change introduces support for installing static FDB entries towards a bridge port that is a LAG of multiple DSA switch ports, as well as support for filtering towards the CPU local FDB entries emitted for LAG interfaces that are bridge ports. Conceptually, host addresses on LAG ports are identical to what we do for plain bridge ports. Whereas FDB entries _towards_ a LAG can't simply be replicated towards all member ports like we do for multicast, or VLAN. Instead we need new driver API. Hardware usually considers a LAG to be a "logical port", and sets the entire LAG as the forwarding destination. The physical egress port selection within the LAG is made by hashing policy, as usual. To represent the logical port corresponding to the LAG, we pass by value a copy of the dsa_lag structure to all switches in the tree that have at least one port in that LAG. To illustrate why a refcounted list of FDB entries is needed in struct dsa_lag, it is enough to say that: - a LAG may be a bridge port and may therefore receive FDB events even while it isn't yet offloaded by any DSA interface - DSA interfaces may be removed from a LAG while that is a bridge port; we don't want FDB entries lingering around, but we don't want to remove entries that are still in use, either For all the cases below to work, the idea is to always keep an FDB entry on a LAG with a reference count equal to the DSA member ports. So: - if a port joins a LAG, it requests the bridge to replay the FDB, and the FDB entries get created, or their refcount gets bumped by one - if a port leaves a LAG, the FDB replay deletes or decrements refcount by one - if an FDB is installed towards a LAG with ports already present, that entry is created (if it doesn't exist) and its refcount is bumped by the amount of ports already present in the LAG echo "Adding FDB entry to bond with existing ports" ip link del bond0 ip link add bond0 type bond mode 802.3ad ip link set swp1 down && ip link set swp1 master bond0 && ip link set swp1 up ip link set swp2 down && ip link set swp2 master bond0 && ip link set swp2 up ip link del br0 ip link add br0 type bridge ip link set bond0 master br0 bridge fdb add dev bond0 00:01:02:03:04:05 master static ip link del br0 ip link del bond0 echo "Adding FDB entry to empty bond" ip link del bond0 ip link add bond0 type bond mode 802.3ad ip link del br0 ip link add br0 type bridge ip link set bond0 master br0 bridge fdb add dev bond0 00:01:02:03:04:05 master static ip link set swp1 down && ip link set swp1 master bond0 && ip link set swp1 up ip link set swp2 down && ip link set swp2 master bond0 && ip link set swp2 up ip link del br0 ip link del bond0 echo "Adding FDB entry to empty bond, then removing ports one by one" ip link del bond0 ip link add bond0 type bond mode 802.3ad ip link del br0 ip link add br0 type bridge ip link set bond0 master br0 bridge fdb add dev bond0 00:01:02:03:04:05 master static ip link set swp1 down && ip link set swp1 master bond0 && ip link set swp1 up ip link set swp2 down && ip link set swp2 master bond0 && ip link set swp2 up ip link set swp1 nomaster ip link set swp2 nomaster ip link del br0 ip link del bond0 Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 81ed34998416..01faba89c987 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -119,6 +119,8 @@ struct dsa_netdevice_ops { struct dsa_lag { struct net_device *dev; unsigned int id; + struct mutex fdb_lock; + struct list_head fdbs; refcount_t refcount; }; @@ -944,6 +946,10 @@ struct dsa_switch_ops { const unsigned char *addr, u16 vid); int (*port_fdb_dump)(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data); + int (*lag_fdb_add)(struct dsa_switch *ds, struct dsa_lag lag, + const unsigned char *addr, u16 vid); + int (*lag_fdb_del)(struct dsa_switch *ds, struct dsa_lag lag, + const unsigned char *addr, u16 vid); /* * Multicast database -- cgit v1.2.3 From 961d8b699070070fb3a9639af61641a52c8e49ef Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 23 Feb 2022 16:00:54 +0200 Subject: net: dsa: felix: support FDB entries on offloaded LAG interfaces This adds the logic in the Felix DSA driver and Ocelot switch library. For Ocelot switches, the DEST_IDX that is the output of the MAC table lookup is a logical port (equal to physical port, if no LAG is used, or a dynamically allocated number otherwise). The allocation we have in place for LAG IDs is different from DSA's, so we can't use that: - DSA allocates a continuous range of LAG IDs starting from 1 - Ocelot appears to require that physical ports and LAG IDs are in the same space of [0, num_phys_ports), and additionally, ports that aren't in a LAG must have physical port id == logical port id The implication is that an FDB entry towards a LAG might need to be deleted and reinstalled when the LAG ID changes. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/soc/mscc/ocelot.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 78f56502bc09..dd4fc34d2992 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -635,6 +635,13 @@ enum macaccess_entry_type { #define OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION BIT(0) #define OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP BIT(1) +struct ocelot_lag_fdb { + unsigned char addr[ETH_ALEN]; + u16 vid; + struct net_device *bond; + struct list_head list; +}; + struct ocelot_port { struct ocelot *ocelot; @@ -690,6 +697,7 @@ struct ocelot { struct list_head vlans; struct list_head traps; + struct list_head lag_fdbs; /* Switches like VSC9959 have flooding per traffic class */ int num_flooding_pgids; @@ -866,6 +874,10 @@ int ocelot_fdb_add(struct ocelot *ocelot, int port, const unsigned char *addr, u16 vid); int ocelot_fdb_del(struct ocelot *ocelot, int port, const unsigned char *addr, u16 vid); +int ocelot_lag_fdb_add(struct ocelot *ocelot, struct net_device *bond, + const unsigned char *addr, u16 vid); +int ocelot_lag_fdb_del(struct ocelot *ocelot, struct net_device *bond, + const unsigned char *addr, u16 vid); int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid, bool untagged, struct netlink_ext_ack *extack); int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, -- cgit v1.2.3 From 7bbb765b73496699a165d505ecdce962f903b422 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 23 Feb 2022 17:57:40 +0000 Subject: net/tcp: Merge TCP-MD5 inbound callbacks The functions do essentially the same work to verify TCP-MD5 sign. Code can be merged into one family-independent function in order to reduce copy'n'paste and generated code. Later with TCP-AO option added, this will allow to create one function that's responsible for segment verification, that will have all the different checks for MD5/AO/non-signed packets, which in turn will help to see checks for all corner-cases in one function, rather than spread around different families and functions. Cc: Eric Dumazet Cc: Hideaki YOSHIFUJI Signed-off-by: Dmitry Safonov Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220223175740.452397-1-dima@arista.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 04f4650e0ff0..479a27777ad6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1674,6 +1674,11 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, return NULL; return __tcp_md5_do_lookup(sk, l3index, addr, family); } +bool tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, + enum skb_drop_reason *reason, + const void *saddr, const void *daddr, + int family, int dif, int sdif); + #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else @@ -1683,6 +1688,14 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, { return NULL; } +static inline bool tcp_inbound_md5_hash(const struct sock *sk, + const struct sk_buff *skb, + enum skb_drop_reason *reason, + const void *saddr, const void *daddr, + int family, int dif, int sdif) +{ + return false; +} #define tcp_twsk_md5_key(twsk) NULL #endif -- cgit v1.2.3 From 34737e26980341519d00e84711fe619f9f47e79c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 11 Feb 2022 08:50:00 +0100 Subject: uaccess: add generic __{get,put}_kernel_nofault Nine architectures are still missing __{get,put}_kernel_nofault: alpha, ia64, microblaze, nds32, nios2, openrisc, sh, sparc32, xtensa. Add a generic version that lets everything use the normal copy_{from,to}_kernel_nofault() code based on these, removing the last use of get_fs()/set_fs() from architecture-independent code. Reviewed-by: Christoph Hellwig Acked-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann --- include/asm-generic/uaccess.h | 2 -- include/linux/uaccess.h | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 10ffa8b5c117..0870fa11a7c5 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -77,8 +77,6 @@ do { \ goto err_label; \ } while (0) -#define HAVE_GET_KERNEL_NOFAULT 1 - static inline __must_check unsigned long raw_copy_from_user(void *to, const void __user * from, unsigned long n) { diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index ac0394087f7d..67e9bc94dc40 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -368,6 +368,25 @@ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long count); long strnlen_user_nofault(const void __user *unsafe_addr, long count); +#ifndef __get_kernel_nofault +#define __get_kernel_nofault(dst, src, type, label) \ +do { \ + type __user *p = (type __force __user *)(src); \ + type data; \ + if (__get_user(data, p)) \ + goto label; \ + *(type *)dst = data; \ +} while (0) + +#define __put_kernel_nofault(dst, src, type, label) \ +do { \ + type __user *p = (type __force __user *)(dst); \ + type data = *(type *)src; \ + if (__put_user(data, p)) \ + goto label; \ +} while (0) +#endif + /** * get_kernel_nofault(): safely attempt to read from a location * @val: read into this variable -- cgit v1.2.3 From 12700c17fc286149324f92d6d380bc48e43f253d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 15 Feb 2022 17:55:04 +0100 Subject: uaccess: generalize access_ok() There are many different ways that access_ok() is defined across architectures, but in the end, they all just compare against the user_addr_max() value or they accept anything. Provide one definition that works for most architectures, checking against TASK_SIZE_MAX for user processes or skipping the check inside of uaccess_kernel() sections. For architectures without CONFIG_SET_FS(), this should be the fastest check, as it comes down to a single comparison of a pointer against a compile-time constant, while the architecture specific versions tend to do something more complex for historic reasons or get something wrong. Type checking for __user annotations is handled inconsistently across architectures, but this is easily simplified as well by using an inline function that takes a 'const void __user *' argument. A handful of callers need an extra __user annotation for this. Some architectures had trick to use 33-bit or 65-bit arithmetic on the addresses to calculate the overflow, however this simpler version uses fewer registers, which means it can produce better object code in the end despite needing a second (statically predicted) branch. Reviewed-by: Christoph Hellwig Acked-by: Mark Rutland [arm64, asm-generic] Acked-by: Geert Uytterhoeven Acked-by: Stafford Horne Acked-by: Dinh Nguyen Signed-off-by: Arnd Bergmann --- include/asm-generic/access_ok.h | 60 +++++++++++++++++++++++++++++++++++++++++ include/asm-generic/uaccess.h | 21 +-------------- include/linux/uaccess.h | 7 ----- 3 files changed, 61 insertions(+), 27 deletions(-) create mode 100644 include/asm-generic/access_ok.h (limited to 'include') diff --git a/include/asm-generic/access_ok.h b/include/asm-generic/access_ok.h new file mode 100644 index 000000000000..d38cc5dad65b --- /dev/null +++ b/include/asm-generic/access_ok.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_GENERIC_ACCESS_OK_H__ +#define __ASM_GENERIC_ACCESS_OK_H__ + +/* + * Checking whether a pointer is valid for user space access. + * These definitions work on most architectures, but overrides can + * be used where necessary. + */ + +/* + * architectures with compat tasks have a variable TASK_SIZE and should + * override this to a constant. + */ +#ifndef TASK_SIZE_MAX +#define TASK_SIZE_MAX TASK_SIZE +#endif + +#ifndef uaccess_kernel +#ifdef CONFIG_SET_FS +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) +#else +#define uaccess_kernel() (0) +#endif +#endif + +#ifndef user_addr_max +#define user_addr_max() (uaccess_kernel() ? ~0UL : TASK_SIZE_MAX) +#endif + +#ifndef __access_ok +/* + * 'size' is a compile-time constant for most callers, so optimize for + * this case to turn the check into a single comparison against a constant + * limit and catch all possible overflows. + * On architectures with separate user address space (m68k, s390, parisc, + * sparc64) or those without an MMU, this should always return true. + * + * This version was originally contributed by Jonas Bonn for the + * OpenRISC architecture, and was found to be the most efficient + * for constant 'size' and 'limit' values. + */ +static inline int __access_ok(const void __user *ptr, unsigned long size) +{ + unsigned long limit = user_addr_max(); + unsigned long addr = (unsigned long)ptr; + + if (IS_ENABLED(CONFIG_ALTERNATE_USER_ADDRESS_SPACE) || + !IS_ENABLED(CONFIG_MMU)) + return true; + + return (size <= limit) && (addr <= (limit - size)); +} +#endif + +#ifndef access_ok +#define access_ok(addr, size) likely(__access_ok(addr, size)) +#endif + +#endif diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 0870fa11a7c5..ebc685dc8d74 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -114,28 +114,9 @@ static inline void set_fs(mm_segment_t fs) } #endif -#ifndef uaccess_kernel -#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) -#endif - -#ifndef user_addr_max -#define user_addr_max() (uaccess_kernel() ? ~0UL : TASK_SIZE) -#endif - #endif /* CONFIG_SET_FS */ -#define access_ok(addr, size) __access_ok((unsigned long)(addr),(size)) - -/* - * The architecture should really override this if possible, at least - * doing a check on the get_fs() - */ -#ifndef __access_ok -static inline int __access_ok(unsigned long addr, unsigned long size) -{ - return 1; -} -#endif +#include /* * These are the main single-value transfer routines. They automatically diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 67e9bc94dc40..2c31667e62e0 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -33,13 +33,6 @@ typedef struct { /* empty dummy */ } mm_segment_t; -#ifndef TASK_SIZE_MAX -#define TASK_SIZE_MAX TASK_SIZE -#endif - -#define uaccess_kernel() (false) -#define user_addr_max() (TASK_SIZE_MAX) - static inline mm_segment_t force_uaccess_begin(void) { return (mm_segment_t) { }; -- cgit v1.2.3 From 967747bbc084b93b54e66f9047d342232314cd25 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 11 Feb 2022 21:42:45 +0100 Subject: uaccess: remove CONFIG_SET_FS There are no remaining callers of set_fs(), so CONFIG_SET_FS can be removed globally, along with the thread_info field and any references to it. This turns access_ok() into a cheaper check against TASK_SIZE_MAX. As CONFIG_SET_FS is now gone, drop all remaining references to set_fs()/get_fs(), mm_segment_t, user_addr_max() and uaccess_kernel(). Acked-by: Sam Ravnborg # for sparc32 changes Acked-by: "Eric W. Biederman" Tested-by: Sergey Matyukevich # for arc changes Acked-by: Stafford Horne # [openrisc, asm-generic] Acked-by: Dinh Nguyen Signed-off-by: Arnd Bergmann --- include/asm-generic/access_ok.h | 14 +------------- include/asm-generic/uaccess.h | 25 +------------------------ include/linux/syscalls.h | 4 ---- include/linux/uaccess.h | 33 --------------------------------- include/rdma/ib.h | 2 +- 5 files changed, 3 insertions(+), 75 deletions(-) (limited to 'include') diff --git a/include/asm-generic/access_ok.h b/include/asm-generic/access_ok.h index d38cc5dad65b..2866ae61b1cd 100644 --- a/include/asm-generic/access_ok.h +++ b/include/asm-generic/access_ok.h @@ -16,18 +16,6 @@ #define TASK_SIZE_MAX TASK_SIZE #endif -#ifndef uaccess_kernel -#ifdef CONFIG_SET_FS -#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) -#else -#define uaccess_kernel() (0) -#endif -#endif - -#ifndef user_addr_max -#define user_addr_max() (uaccess_kernel() ? ~0UL : TASK_SIZE_MAX) -#endif - #ifndef __access_ok /* * 'size' is a compile-time constant for most callers, so optimize for @@ -42,7 +30,7 @@ */ static inline int __access_ok(const void __user *ptr, unsigned long size) { - unsigned long limit = user_addr_max(); + unsigned long limit = TASK_SIZE_MAX; unsigned long addr = (unsigned long)ptr; if (IS_ENABLED(CONFIG_ALTERNATE_USER_ADDRESS_SPACE) || diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index ebc685dc8d74..a5be9e61a2a2 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -8,6 +8,7 @@ * address space, e.g. all NOMMU machines. */ #include +#include #ifdef CONFIG_UACCESS_MEMCPY #include @@ -94,30 +95,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) #define INLINE_COPY_TO_USER #endif /* CONFIG_UACCESS_MEMCPY */ -#ifdef CONFIG_SET_FS -#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) - -#ifndef KERNEL_DS -#define KERNEL_DS MAKE_MM_SEG(~0UL) -#endif - -#ifndef USER_DS -#define USER_DS MAKE_MM_SEG(TASK_SIZE - 1) -#endif - -#ifndef get_fs -#define get_fs() (current_thread_info()->addr_limit) - -static inline void set_fs(mm_segment_t fs) -{ - current_thread_info()->addr_limit = fs; -} -#endif - -#endif /* CONFIG_SET_FS */ - -#include - /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 819c0cb00b6d..a34b0f9a9972 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -290,10 +290,6 @@ static inline void addr_limit_user_check(void) return; #endif - if (CHECK_DATA_CORRUPTION(uaccess_kernel(), - "Invalid address limit on user-mode return")) - force_sig(SIGKILL); - #ifdef TIF_FSCHECK clear_thread_flag(TIF_FSCHECK); #endif diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 2c31667e62e0..2421a41f3a8e 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -10,39 +10,6 @@ #include -#ifdef CONFIG_SET_FS -/* - * Force the uaccess routines to be wired up for actual userspace access, - * overriding any possible set_fs(KERNEL_DS) still lingering around. Undone - * using force_uaccess_end below. - */ -static inline mm_segment_t force_uaccess_begin(void) -{ - mm_segment_t fs = get_fs(); - - set_fs(USER_DS); - return fs; -} - -static inline void force_uaccess_end(mm_segment_t oldfs) -{ - set_fs(oldfs); -} -#else /* CONFIG_SET_FS */ -typedef struct { - /* empty dummy */ -} mm_segment_t; - -static inline mm_segment_t force_uaccess_begin(void) -{ - return (mm_segment_t) { }; -} - -static inline void force_uaccess_end(mm_segment_t oldfs) -{ -} -#endif /* CONFIG_SET_FS */ - /* * Architectures should provide two primitives (raw_copy_{to,from}_user()) * and get rid of their private instances of copy_{to,from}_user() and diff --git a/include/rdma/ib.h b/include/rdma/ib.h index 83139b9ce409..f7c185ff7a11 100644 --- a/include/rdma/ib.h +++ b/include/rdma/ib.h @@ -75,7 +75,7 @@ struct sockaddr_ib { */ static inline bool ib_safe_file_access(struct file *filp) { - return filp->f_cred == current_cred() && !uaccess_kernel(); + return filp->f_cred == current_cred(); } #endif /* _RDMA_IB_H */ -- cgit v1.2.3 From 440c7317e4bb243f9464fa508d675db944972dd6 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Mon, 31 Jan 2022 14:30:46 +0100 Subject: dt-bindings: reset: Add Delta TN48M Add header for the Delta TN48M CPLD provided resets. Acked-by: Philipp Zabel Signed-off-by: Robert Marko Link: https://lore.kernel.org/r/20220131133049.77780-4-robert.marko@sartura.hr Signed-off-by: Greg Kroah-Hartman --- include/dt-bindings/reset/delta,tn48m-reset.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/dt-bindings/reset/delta,tn48m-reset.h (limited to 'include') diff --git a/include/dt-bindings/reset/delta,tn48m-reset.h b/include/dt-bindings/reset/delta,tn48m-reset.h new file mode 100644 index 000000000000..d4e9ed12de3e --- /dev/null +++ b/include/dt-bindings/reset/delta,tn48m-reset.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Delta TN48M CPLD GPIO driver + * + * Copyright (C) 2021 Sartura Ltd. + * + * Author: Robert Marko + */ + +#ifndef _DT_BINDINGS_RESET_TN48M_H +#define _DT_BINDINGS_RESET_TN48M_H + +#define CPU_88F7040_RESET 0 +#define CPU_88F6820_RESET 1 +#define MAC_98DX3265_RESET 2 +#define PHY_88E1680_RESET 3 +#define PHY_88E1512_RESET 4 +#define POE_RESET 5 + +#endif /* _DT_BINDINGS_RESET_TN48M_H */ -- cgit v1.2.3 From 2c861b73a23b1237bdd054b6023bb27f6747c2b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 19 Feb 2022 16:28:13 +0100 Subject: math64: New DIV_U64_ROUND_CLOSEST helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide DIV_U64_ROUND_CLOSEST helper which uses div_u64 to perform division rounded to the closest integer using unsigned 64bit dividend and unsigned 32bit divisor. Reviewed-by: Marek Behún Signed-off-by: Pali Rohár Signed-off-by: Marek Behún Link: https://lore.kernel.org/r/20220219152818.4319-2-kabel@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/math64.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/math64.h b/include/linux/math64.h index 2928f03d6d46..a14f40de1dca 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -300,6 +300,19 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); #define DIV64_U64_ROUND_CLOSEST(dividend, divisor) \ ({ u64 _tmp = (divisor); div64_u64((dividend) + _tmp / 2, _tmp); }) +/* + * DIV_U64_ROUND_CLOSEST - unsigned 64bit divide with 32bit divisor rounded to nearest integer + * @dividend: unsigned 64bit dividend + * @divisor: unsigned 32bit divisor + * + * Divide unsigned 64bit dividend by unsigned 32bit divisor + * and round to closest integer. + * + * Return: dividend / divisor rounded to nearest integer + */ +#define DIV_U64_ROUND_CLOSEST(dividend, divisor) \ + ({ u32 _tmp = (divisor); div_u64((u64)(dividend) + _tmp / 2, _tmp); }) + /* * DIV_S64_ROUND_CLOSEST - signed 64bit divide with 32bit divisor rounded to nearest integer * @dividend: signed 64bit dividend -- cgit v1.2.3 From 28a3f0601727d521a1c6cce62ecbcb7402a9e4f5 Mon Sep 17 00:00:00 2001 From: Toms Atteka Date: Wed, 23 Feb 2022 16:54:09 -0800 Subject: net: openvswitch: IPv6: Add IPv6 extension header support This change adds a new OpenFlow field OFPXMT_OFB_IPV6_EXTHDR and packets can be filtered using ipv6_ext flag. Signed-off-by: Toms Atteka Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 150bcff49b1c..9d1710f20505 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -351,6 +351,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */ OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */ OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */ + OVS_KEY_ATTR_IPV6_EXTHDRS, /* struct ovs_key_ipv6_exthdr */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -430,6 +431,11 @@ struct ovs_key_ipv6 { __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */ }; +/* separate structure to support backward compatibility with older user space */ +struct ovs_key_ipv6_exthdrs { + __u16 hdrs; +}; + struct ovs_key_tcp { __be16 tcp_src; __be16 tcp_dst; -- cgit v1.2.3 From 085a884434f3e3b08349a0ba0904f9f561739d57 Mon Sep 17 00:00:00 2001 From: Richard Gong Date: Wed, 23 Feb 2022 08:49:08 -0600 Subject: firmware: stratix10-svc: extend SVC driver to get the firmware version Extend Intel service layer driver to get the firmware version running at FPGA device. Therefore FPGA manager driver, one of Intel service layer driver's client, can decide whether to handle the newly added bitstream authentication function based on the retrieved firmware version. Link: https://lore.kernel.org/lkml/1617114785-22211-2-git-send-email-richard.gong@linux.intel.com Acked-by: Moritz Fischr Signed-off-by: Richard Gong Signed-off-by: Dinh Nguyen Link: https://lore.kernel.org/r/20220223144908.399522-2-dinguyen@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/intel/stratix10-smc.h | 21 +++++++++++++++++++-- include/linux/firmware/intel/stratix10-svc-client.h | 4 ++++ 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/intel/stratix10-smc.h b/include/linux/firmware/intel/stratix10-smc.h index c3e5ab014caf..aad497a9ad8b 100644 --- a/include/linux/firmware/intel/stratix10-smc.h +++ b/include/linux/firmware/intel/stratix10-smc.h @@ -321,8 +321,6 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE) #define INTEL_SIP_SMC_ECC_DBE \ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_ECC_DBE) -#endif - /** * Request INTEL_SIP_SMC_RSU_NOTIFY * @@ -404,3 +402,22 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE) #define INTEL_SIP_SMC_FUNCID_RSU_MAX_RETRY 18 #define INTEL_SIP_SMC_RSU_MAX_RETRY \ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_RSU_MAX_RETRY) + +/** + * Request INTEL_SIP_SMC_FIRMWARE_VERSION + * + * Sync call used to query the version of running firmware + * + * Call register usage: + * a0 INTEL_SIP_SMC_FIRMWARE_VERSION + * a1-a7 not used + * + * Return status: + * a0 INTEL_SIP_SMC_STATUS_OK or INTEL_SIP_SMC_STATUS_ERROR + * a1 running firmware version + */ +#define INTEL_SIP_SMC_FUNCID_FIRMWARE_VERSION 31 +#define INTEL_SIP_SMC_FIRMWARE_VERSION \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FIRMWARE_VERSION) + +#endif diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index 19781b0f6429..18c1841fdb1f 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -104,6 +104,9 @@ struct stratix10_svc_chan; * * @COMMAND_RSU_DCMF_VERSION: query firmware for the DCMF version, return status * is SVC_STATUS_OK or SVC_STATUS_ERROR + * + * @COMMAND_FIRMWARE_VERSION: query running firmware version, return status + * is SVC_STATUS_OK or SVC_STATUS_ERROR */ enum stratix10_svc_command_code { COMMAND_NOOP = 0, @@ -117,6 +120,7 @@ enum stratix10_svc_command_code { COMMAND_RSU_RETRY, COMMAND_RSU_MAX_RETRY, COMMAND_RSU_DCMF_VERSION, + COMMAND_FIRMWARE_VERSION, }; /** -- cgit v1.2.3 From f1d0821bf37ba3cecee0fd1e9ae72a943a69d01d Mon Sep 17 00:00:00 2001 From: Ronak Jain Date: Wed, 9 Feb 2022 00:27:07 -0800 Subject: firmware: xilinx: Add support for runtime features Add support for runtime features by using an IOCTL call. The features can be enabled or disabled from the firmware as well as the features can be configured at runtime by querying IOCTL_SET_FEATURE_CONFIG id. Similarly, the user can get the configured values of features by querying IOCTL_GET_FEATURE_CONFIG id. Acked-by: Michal Simek Signed-off-by: Ronak Jain Link: https://lore.kernel.org/r/20220209082709.32378-2-ronak.jain@xilinx.com Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/xlnx-zynqmp.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 907cb01890cf..cf557fbeb8c7 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -143,6 +143,9 @@ enum pm_ioctl_id { IOCTL_OSPI_MUX_SELECT = 21, /* Register SGI to ATF */ IOCTL_REGISTER_SGI = 25, + /* Runtime feature configuration */ + IOCTL_SET_FEATURE_CONFIG = 26, + IOCTL_GET_FEATURE_CONFIG = 27, }; enum pm_query_id { @@ -376,6 +379,14 @@ enum ospi_mux_select_type { PM_OSPI_MUX_SEL_LINEAR = 1, }; +enum pm_feature_config_id { + PM_FEATURE_INVALID = 0, + PM_FEATURE_OVERTEMP_STATUS = 1, + PM_FEATURE_OVERTEMP_VALUE = 2, + PM_FEATURE_EXTWDT_STATUS = 3, + PM_FEATURE_EXTWDT_VALUE = 4, +}; + /** * struct zynqmp_pm_query_data - PM query data * @qid: query ID @@ -447,6 +458,8 @@ int zynqmp_pm_load_pdi(const u32 src, const u64 address); int zynqmp_pm_register_notifier(const u32 node, const u32 event, const u32 wake, const u32 enable); int zynqmp_pm_feature(const u32 api_id); +int zynqmp_pm_set_feature_config(enum pm_feature_config_id id, u32 value); +int zynqmp_pm_get_feature_config(enum pm_feature_config_id id, u32 *payload); #else static inline int zynqmp_pm_get_api_version(u32 *version) { @@ -689,6 +702,18 @@ static inline int zynqmp_pm_feature(const u32 api_id) { return -ENODEV; } + +static inline int zynqmp_pm_set_feature_config(enum pm_feature_config_id id, + u32 value) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_get_feature_config(enum pm_feature_config_id id, + u32 *payload) +{ + return -ENODEV; +} #endif #endif /* __FIRMWARE_ZYNQMP_H__ */ -- cgit v1.2.3 From 2502960fba7e94e090112069694365295c32ccc5 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 14 Feb 2022 14:07:57 +0800 Subject: component: Add common helper for compare/release functions The component requires the compare/release functions, there are so many copies in current kernel. Just define four common helpers for them. Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Yong Wu Link: https://lore.kernel.org/r/20220214060819.7334-2-yong.wu@mediatek.com Signed-off-by: Greg Kroah-Hartman --- include/linux/component.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/component.h b/include/linux/component.h index 7012569c6546..df4aa75c9e7c 100644 --- a/include/linux/component.h +++ b/include/linux/component.h @@ -82,6 +82,12 @@ struct component_master_ops { void (*unbind)(struct device *master); }; +/* A set helper functions for component compare/release */ +int component_compare_of(struct device *dev, void *data); +void component_release_of(struct device *dev, void *data); +int component_compare_dev(struct device *dev, void *data); +int component_compare_dev_name(struct device *dev, void *data); + void component_master_del(struct device *, const struct component_master_ops *); -- cgit v1.2.3 From dc3005703f8cd893d325081c20b400e08377d9bb Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 13 Jan 2022 16:48:51 +0200 Subject: ARM: at91: ddr: remove CONFIG_SOC_SAMA7 dependency Remove CONFIG_SOC_SAMA7 dependency to avoid having #ifdef preprocessor directives in driver code (arch/arm/mach-at91/pm.c). This prepares the code for next commits. Signed-off-by: Claudiu Beznea Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220113144900.906370-2-claudiu.beznea@microchip.com --- include/soc/at91/sama7-ddr.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/soc/at91/sama7-ddr.h b/include/soc/at91/sama7-ddr.h index f6542584ca13..13b47e26cdbe 100644 --- a/include/soc/at91/sama7-ddr.h +++ b/include/soc/at91/sama7-ddr.h @@ -11,8 +11,6 @@ #ifndef __SAMA7_DDR_H__ #define __SAMA7_DDR_H__ -#ifdef CONFIG_SOC_SAMA7 - /* DDR3PHY */ #define DDR3PHY_PIR (0x04) /* DDR3PHY PHY Initialization Register */ #define DDR3PHY_PIR_DLLBYP (1 << 17) /* DLL Bypass */ @@ -75,6 +73,4 @@ #define UDDRC_PCTRL_3 (0x6A0) /* UDDRC Port 3 Control Register */ #define UDDRC_PCTRL_4 (0x750) /* UDDRC Port 4 Control Register */ -#endif /* CONFIG_SOC_SAMA7 */ - #endif /* __SAMA7_DDR_H__ */ -- cgit v1.2.3 From 55614e682a2c9fac12a0f121b43b93ff4915d0e6 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 13 Jan 2022 16:48:52 +0200 Subject: ARM: at91: ddr: align macro definitions Align all macro definitions. Signed-off-by: Claudiu Beznea Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220113144900.906370-3-claudiu.beznea@microchip.com --- include/soc/at91/sama7-ddr.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/soc/at91/sama7-ddr.h b/include/soc/at91/sama7-ddr.h index 13b47e26cdbe..817b360efbb8 100644 --- a/include/soc/at91/sama7-ddr.h +++ b/include/soc/at91/sama7-ddr.h @@ -13,11 +13,11 @@ /* DDR3PHY */ #define DDR3PHY_PIR (0x04) /* DDR3PHY PHY Initialization Register */ -#define DDR3PHY_PIR_DLLBYP (1 << 17) /* DLL Bypass */ +#define DDR3PHY_PIR_DLLBYP (1 << 17) /* DLL Bypass */ #define DDR3PHY_PIR_ITMSRST (1 << 4) /* Interface Timing Module Soft Reset */ -#define DDR3PHY_PIR_DLLLOCK (1 << 2) /* DLL Lock */ +#define DDR3PHY_PIR_DLLLOCK (1 << 2) /* DLL Lock */ #define DDR3PHY_PIR_DLLSRST (1 << 1) /* DLL Soft Rest */ -#define DDR3PHY_PIR_INIT (1 << 0) /* Initialization Trigger */ +#define DDR3PHY_PIR_INIT (1 << 0) /* Initialization Trigger */ #define DDR3PHY_PGCR (0x08) /* DDR3PHY PHY General Configuration Register */ #define DDR3PHY_PGCR_CKDV1 (1 << 13) /* CK# Disable Value */ @@ -65,7 +65,7 @@ #define UDDRC_SWSTAT_SW_DONE_ACK (1 << 0) /* Register programming done */ #define UDDRC_PSTAT (0x3FC) /* UDDRC Port Status Register */ -#define UDDRC_PSTAT_ALL_PORTS (0x1F001F) /* Read + writes outstanding transactions on all ports */ +#define UDDRC_PSTAT_ALL_PORTS (0x1F001F) /* Read + writes outstanding transactions on all ports */ #define UDDRC_PCTRL_0 (0x490) /* UDDRC Port 0 Control Register */ #define UDDRC_PCTRL_1 (0x540) /* UDDRC Port 1 Control Register */ -- cgit v1.2.3 From 9a0775c9cd3d89d1fe957a137131681a33f5736b Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 13 Jan 2022 16:48:53 +0200 Subject: ARM: at91: ddr: fix typo to align with datasheet naming Fix typo on UDDRC_PWRCTL.SELFREF_SW bitmask to align with datasheet naming. Signed-off-by: Claudiu Beznea Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220113144900.906370-4-claudiu.beznea@microchip.com --- include/soc/at91/sama7-ddr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/soc/at91/sama7-ddr.h b/include/soc/at91/sama7-ddr.h index 817b360efbb8..fee1b11bddca 100644 --- a/include/soc/at91/sama7-ddr.h +++ b/include/soc/at91/sama7-ddr.h @@ -53,7 +53,7 @@ #define UDDRC_STAT_OPMODE_MSK (0x7 << 0) /* Operating mode mask */ #define UDDRC_PWRCTL (0x30) /* UDDRC Low Power Control Register */ -#define UDDRC_PWRCTRL_SELFREF_SW (1 << 5) /* Software self-refresh */ +#define UDDRC_PWRCTL_SELFREF_SW (1 << 5) /* Software self-refresh */ #define UDDRC_DFIMISC (0x1B0) /* UDDRC DFI Miscellaneous Control Register */ #define UDDRC_DFIMISC_DFI_INIT_COMPLETE_EN (1 << 0) /* PHY initialization complete enable signal */ -- cgit v1.2.3 From 9584e7263e9ebcd94b184dc3efc847355a624220 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 13 Jan 2022 16:48:54 +0200 Subject: ARM: at91: PM: add cpu idle support for sama7g5 Add CPU idle support for SAMA7G5. Support will make use of PMC_CPU_RATIO register to divide the CPU clock by 16 before switching it to idle and use automatic self-refresh option of DDR controller. Signed-off-by: Claudiu Beznea Acked-by: Stephen Boyd Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220113144900.906370-5-claudiu.beznea@microchip.com --- include/linux/clk/at91_pmc.h | 4 ++++ include/soc/at91/sama7-ddr.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index ccb3f034bfa9..3484309b59bf 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -78,6 +78,10 @@ #define AT91_PMC_MAINRDY (1 << 16) /* Main Clock Ready */ #define AT91_CKGR_PLLAR 0x28 /* PLL A Register */ + +#define AT91_PMC_RATIO 0x2c /* Processor clock ratio register [SAMA7G5 only] */ +#define AT91_PMC_RATIO_RATIO (0xf) /* CPU clock ratio. */ + #define AT91_CKGR_PLLBR 0x2c /* PLL B Register */ #define AT91_PMC_DIV (0xff << 0) /* Divider */ #define AT91_PMC_PLLCOUNT (0x3f << 8) /* PLL Counter */ diff --git a/include/soc/at91/sama7-ddr.h b/include/soc/at91/sama7-ddr.h index fee1b11bddca..9e17247474fa 100644 --- a/include/soc/at91/sama7-ddr.h +++ b/include/soc/at91/sama7-ddr.h @@ -53,6 +53,7 @@ #define UDDRC_STAT_OPMODE_MSK (0x7 << 0) /* Operating mode mask */ #define UDDRC_PWRCTL (0x30) /* UDDRC Low Power Control Register */ +#define UDDRC_PWRCTL_SELFREF_EN (1 << 0) /* Automatic self-refresh */ #define UDDRC_PWRCTL_SELFREF_SW (1 << 5) /* Software self-refresh */ #define UDDRC_DFIMISC (0x1B0) /* UDDRC DFI Miscellaneous Control Register */ -- cgit v1.2.3 From d0054a470c33902f5ae88835ed8a8ecc3cf8faa4 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 17 Feb 2022 10:13:50 +0000 Subject: soc: add microchip polarfire soc system controller This driver provides an interface for other drivers to access the functions of the system controller on the Microchip PolarFire SoC. Signed-off-by: Conor Dooley Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220217101349.2374873-2-conor.dooley@microchip.com --- include/soc/microchip/mpfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/soc/microchip/mpfs.h b/include/soc/microchip/mpfs.h index 2b64c95f3be5..6466515262bd 100644 --- a/include/soc/microchip/mpfs.h +++ b/include/soc/microchip/mpfs.h @@ -34,9 +34,9 @@ struct mpfs_mss_response { #if IS_ENABLED(CONFIG_POLARFIRE_SOC_SYS_CTRL) -int mpfs_blocking_transaction(struct mpfs_sys_controller *mpfs_client, void *msg); +int mpfs_blocking_transaction(struct mpfs_sys_controller *mpfs_client, struct mpfs_mss_msg *msg); -struct mpfs_sys_controller *mpfs_sys_controller_get(struct device_node *mailbox_node); +struct mpfs_sys_controller *mpfs_sys_controller_get(struct device *dev); #endif /* if IS_ENABLED(CONFIG_POLARFIRE_SOC_SYS_CTRL) */ -- cgit v1.2.3 From fcfaff508b9fa9ad6d8a17d4855e3ec7382886ae Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 7 Feb 2022 05:09:51 -0800 Subject: soc/tegra: bpmp: cleanup double word in comment Remove the second 'or'. Signed-off-by: Tom Rix Signed-off-by: Thierry Reding --- include/soc/tegra/bpmp-abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/soc/tegra/bpmp-abi.h b/include/soc/tegra/bpmp-abi.h index bff99f23860c..53171e324d1c 100644 --- a/include/soc/tegra/bpmp-abi.h +++ b/include/soc/tegra/bpmp-abi.h @@ -931,7 +931,7 @@ enum mrq_reset_commands { * @brief Request with MRQ_RESET * * Used by the sender of an #MRQ_RESET message to request BPMP to - * assert or or deassert a given reset line. + * assert or deassert a given reset line. */ struct mrq_reset_request { /** @brief Reset action to perform (@ref mrq_reset_commands) */ -- cgit v1.2.3 From ba7bb663f5547ef474c98df99a97bb4a13c5715f Mon Sep 17 00:00:00 2001 From: David Dunn Date: Wed, 23 Feb 2022 22:57:41 +0000 Subject: KVM: x86: Provide per VM capability for disabling PMU virtualization Add a new capability, KVM_CAP_PMU_CAPABILITY, that takes a bitmask of settings/features to allow userspace to configure PMU virtualization on a per-VM basis. For now, support a single flag, KVM_PMU_CAP_DISABLE, to allow disabling PMU virtualization for a VM even when KVM is configured with enable_pmu=true a module level. To keep KVM simple, disallow changing VM's PMU configuration after vCPUs have been created. Signed-off-by: David Dunn Message-Id: <20220223225743.2703915-2-daviddunn@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a02bbf8fd0f6..d2f1efc3aa35 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1142,6 +1142,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SYS_ATTRIBUTES 209 #define KVM_CAP_PPC_AIL_MODE_3 210 #define KVM_CAP_S390_MEM_OP_EXTENSION 211 +#define KVM_CAP_PMU_CAPABILITY 212 #ifdef KVM_CAP_IRQ_ROUTING @@ -1978,6 +1979,8 @@ struct kvm_dirty_gfn { #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) +#define KVM_PMU_CAP_DISABLE (1 << 0) + /** * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. * @flags: Some extra information for header, always 0 for now. -- cgit v1.2.3 From 766121ba5de38a6f67980ec24a6af76c55def100 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 16 Feb 2022 17:32:24 +0000 Subject: arm64/mte: Add userspace interface for enabling asymmetric mode The architecture provides an asymmetric mode for MTE where tag mismatches are checked asynchronously for stores but synchronously for loads. Allow userspace processes to select this and make it available as a default mode via the existing per-CPU sysfs interface. Since there PR_MTE_TCF_ values are a bitmask (allowing the kernel to choose between the multiple modes) and there are no free bits adjacent to the existing PR_MTE_TCF_ bits the set of bits used to specify the mode becomes disjoint. Programs using the new interface should be aware of this and programs that do not use it will not see any change in behaviour. When userspace requests two possible modes but the system default for the CPU is the third mode (eg, default is synchronous but userspace requests either asynchronous or asymmetric) the preference order is: ASYMM > ASYNC > SYNC This situation is not currently possible since there are only two modes and it is mandatory to have a system default so there could be no ambiguity and there is no ABI change. The chosen order is basically arbitrary as we do not have a clear metric for what is better here. If userspace requests specifically asymmetric mode via the prctl() and the system does not support it then we will return an error, this mirrors how we handle the case where userspace enables MTE on a system that does not support MTE at all and the behaviour that will be seen if running on an older kernel that does not support userspace use of asymmetric mode. Attempts to set asymmetric mode as the default mode will result in an error if the system does not support it. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Vincenzo Frascino Tested-by: Branislav Rankov Link: https://lore.kernel.org/r/20220216173224.2342152-5-broonie@kernel.org Signed-off-by: Will Deacon --- include/uapi/linux/prctl.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index e998764f0262..4ae2b21e4066 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -238,7 +238,9 @@ struct prctl_mm_map { # define PR_MTE_TCF_NONE 0UL # define PR_MTE_TCF_SYNC (1UL << 1) # define PR_MTE_TCF_ASYNC (1UL << 2) -# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) +# define PR_MTE_TCF_ASYMM (1UL << 19) +# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC | \ + PR_MTE_TCF_ASYMM) /* MTE tag inclusion mask */ # define PR_MTE_TAG_SHIFT 3 # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) -- cgit v1.2.3 From 8b4195cd6dc3f1f0ab457d23d21e9f72fde0760a Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 23 Feb 2022 14:43:47 +0100 Subject: mtd: spi-nor: move all xilinx specifics into xilinx.c Mechanically move all the xilinx functions to its own module. Then register the new flash specific ready() function. Signed-off-by: Michael Walle Signed-off-by: Tudor Ambarus Reviewed-by: Pratyush Yadav Link: https://lore.kernel.org/r/20220223134358.1914798-22-michael@walle.cc --- include/linux/mtd/spi-nor.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index fc90fce26e33..b44b05a6f934 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -86,15 +86,6 @@ #define SPINOR_OP_BP 0x02 /* Byte program */ #define SPINOR_OP_AAI_WP 0xad /* Auto address increment word program */ -/* Used for S3AN flashes only */ -#define SPINOR_OP_XSE 0x50 /* Sector erase */ -#define SPINOR_OP_XPP 0x82 /* Page program */ -#define SPINOR_OP_XRDSR 0xd7 /* Read status register */ - -#define XSR_PAGESIZE BIT(0) /* Page size in Po2 or Linear */ -#define XSR_RDY BIT(7) /* Ready */ - - /* Used for Macronix and Winbond flashes. */ #define SPINOR_OP_EN4B 0xb7 /* Enter 4-byte mode */ #define SPINOR_OP_EX4B 0xe9 /* Exit 4-byte mode */ -- cgit v1.2.3 From c770abe52d81089a8b8ecd1fe42722e29bbab5f5 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 23 Feb 2022 14:43:50 +0100 Subject: mtd: spi-nor: move all micron-st specifics into micron-st.c The flag status register is only available on micron flashes. Move all the functions around that into the micron module. This is almost a mechanical move except for the spi_nor_fsr_ready() which now also checks the normal status register. Previously, this was done in spi_nor_ready(). Signed-off-by: Michael Walle Signed-off-by: Tudor Ambarus Tested-by: Pratyush Yadav # on mt35xu512aba, s28hs512t Reviewed-by: Pratyush Yadav Link: https://lore.kernel.org/r/20220223134358.1914798-25-michael@walle.cc --- include/linux/mtd/spi-nor.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index b44b05a6f934..4622251a79ff 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -47,8 +47,6 @@ #define SPINOR_OP_RDID 0x9f /* Read JEDEC ID */ #define SPINOR_OP_RDSFDP 0x5a /* Read SFDP */ #define SPINOR_OP_RDCR 0x35 /* Read configuration register */ -#define SPINOR_OP_RDFSR 0x70 /* Read flag status register */ -#define SPINOR_OP_CLFSR 0x50 /* Clear flag status register */ #define SPINOR_OP_RDEAR 0xc8 /* Read Extended Address Register */ #define SPINOR_OP_WREAR 0xc5 /* Write Extended Address Register */ #define SPINOR_OP_SRSTEN 0x66 /* Software Reset Enable */ @@ -126,12 +124,6 @@ /* Enhanced Volatile Configuration Register bits */ #define EVCR_QUAD_EN_MICRON BIT(7) /* Micron Quad I/O */ -/* Flag Status Register bits */ -#define FSR_READY BIT(7) /* Device status, 0 = Busy, 1 = Ready */ -#define FSR_E_ERR BIT(5) /* Erase operation status */ -#define FSR_P_ERR BIT(4) /* Program operation status */ -#define FSR_PT_ERR BIT(1) /* Protection error bit */ - /* Status Register 2 bits. */ #define SR2_QUAD_EN_BIT1 BIT(1) #define SR2_LB1 BIT(3) /* Security Register Lock Bit 1 */ -- cgit v1.2.3 From 837d5181beef068c16bb8424c2c1571a7d5d7966 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 23 Feb 2022 14:43:54 +0100 Subject: mtd: spi-nor: move all spansion specifics into spansion.c The clear status register flags is only available on spansion flashes. Move all the functions around that into the spanion module. Signed-off-by: Michael Walle Signed-off-by: Tudor Ambarus Tested-by: Pratyush Yadav # on mt35xu512aba, s28hs512t Reviewed-by: Pratyush Yadav Link: https://lore.kernel.org/r/20220223134358.1914798-29-michael@walle.cc --- include/linux/mtd/spi-nor.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 4622251a79ff..5e25a7b75ae2 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -90,7 +90,6 @@ /* Used for Spansion flashes only. */ #define SPINOR_OP_BRWR 0x17 /* Bank register write */ -#define SPINOR_OP_CLSR 0x30 /* Clear status register 1 */ /* Used for Micron flashes only. */ #define SPINOR_OP_RD_EVCR 0x65 /* Read EVCR register */ -- cgit v1.2.3 From bc82c38a6933aab308387d4aca47e0a05de7b553 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 11 Feb 2022 08:10:18 +0100 Subject: tracing: Uninline trace_trigger_soft_disabled() partly On a powerpc32 build with CONFIG_CC_OPTIMISE_FOR_SIZE, the inline keyword is not honored and trace_trigger_soft_disabled() appears approx 50 times in vmlinux. Adding -Winline to the build, the following message appears: ./include/linux/trace_events.h:712:1: error: inlining failed in call to 'trace_trigger_soft_disabled': call is unlikely and code size would grow [-Werror=inline] That function is rather big for an inlined function: c003df60 : c003df60: 94 21 ff f0 stwu r1,-16(r1) c003df64: 7c 08 02 a6 mflr r0 c003df68: 90 01 00 14 stw r0,20(r1) c003df6c: bf c1 00 08 stmw r30,8(r1) c003df70: 83 e3 00 24 lwz r31,36(r3) c003df74: 73 e9 01 00 andi. r9,r31,256 c003df78: 41 82 00 10 beq c003df88 c003df7c: 38 60 00 00 li r3,0 c003df80: 39 61 00 10 addi r11,r1,16 c003df84: 4b fd 60 ac b c0014030 <_rest32gpr_30_x> c003df88: 73 e9 00 80 andi. r9,r31,128 c003df8c: 7c 7e 1b 78 mr r30,r3 c003df90: 41 a2 00 14 beq c003dfa4 c003df94: 38 c0 00 00 li r6,0 c003df98: 38 a0 00 00 li r5,0 c003df9c: 38 80 00 00 li r4,0 c003dfa0: 48 05 c5 f1 bl c009a590 c003dfa4: 73 e9 00 40 andi. r9,r31,64 c003dfa8: 40 82 00 28 bne c003dfd0 c003dfac: 73 ff 02 00 andi. r31,r31,512 c003dfb0: 41 82 ff cc beq c003df7c c003dfb4: 80 01 00 14 lwz r0,20(r1) c003dfb8: 83 e1 00 0c lwz r31,12(r1) c003dfbc: 7f c3 f3 78 mr r3,r30 c003dfc0: 83 c1 00 08 lwz r30,8(r1) c003dfc4: 7c 08 03 a6 mtlr r0 c003dfc8: 38 21 00 10 addi r1,r1,16 c003dfcc: 48 05 6f 6c b c0094f38 c003dfd0: 38 60 00 01 li r3,1 c003dfd4: 4b ff ff ac b c003df80 However it is located in a hot path so inlining it is important. But forcing inlining of the entire function by using __always_inline leads to increasing the text size by approx 20 kbytes. Instead, split the fonction in two parts, one part with the likely fast path, flagged __always_inline, and a second part out of line. With this change, on a powerpc32 with CONFIG_CC_OPTIMISE_FOR_SIZE vmlinux text increases by only 1,4 kbytes, which is partly compensated by a decrease of vmlinux data by 7 kbytes. On ppc64_defconfig which has CONFIG_CC_OPTIMISE_FOR_SPEED, this change reduces vmlinux text by more than 30 kbytes. Link: https://lkml.kernel.org/r/69ce0986a52d026d381d612801d978aa4f977460.1644563295.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 70c069aef02c..dcea51fb60e2 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -699,6 +699,8 @@ event_triggers_post_call(struct trace_event_file *file, bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); +bool __trace_trigger_soft_disabled(struct trace_event_file *file); + /** * trace_trigger_soft_disabled - do triggers and test if soft disabled * @file: The file pointer of the event to test @@ -708,20 +710,20 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); * triggers that require testing the fields, it will return true, * otherwise false. */ -static inline bool +static __always_inline bool trace_trigger_soft_disabled(struct trace_event_file *file) { unsigned long eflags = file->flags; - if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { - if (eflags & EVENT_FILE_FL_TRIGGER_MODE) - event_triggers_call(file, NULL, NULL, NULL); - if (eflags & EVENT_FILE_FL_SOFT_DISABLED) - return true; - if (eflags & EVENT_FILE_FL_PID_FILTER) - return trace_event_ignore_this_pid(file); - } - return false; + if (likely(!(eflags & (EVENT_FILE_FL_TRIGGER_MODE | + EVENT_FILE_FL_SOFT_DISABLED | + EVENT_FILE_FL_PID_FILTER)))) + return false; + + if (likely(eflags & EVENT_FILE_FL_TRIGGER_COND)) + return false; + + return __trace_trigger_soft_disabled(file); } #ifdef CONFIG_BPF_EVENTS -- cgit v1.2.3 From 8786fde8421ce755a842051f9528674a1b1f0b9a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 22 Jan 2022 20:54:52 +0000 Subject: Convert NFS from readpages to readahead NFS is one of the last two users of the deprecated ->readpages aop. This conversion looks straightforward, but I have only compile-tested it. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 68f81d8d36de..333ea05e2531 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -601,8 +601,7 @@ nfs_have_writebacks(struct inode *inode) * linux/fs/nfs/read.c */ extern int nfs_readpage(struct file *, struct page *); -extern int nfs_readpages(struct file *, struct address_space *, - struct list_head *, unsigned); +void nfs_readahead(struct readahead_control *); /* * inline functions -- cgit v1.2.3 From 43245eca6e670ebf65908b549641c1460a9cc944 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 2 Feb 2022 17:55:02 -0500 Subject: NFSv4.1 support for NFS4_RESULT_PRESERVER_UNLINKED In 4.1+, the server is allowed to set a flag NFS4_RESULT_PRESERVE_UNLINKED in reply to the OPEN, that tells the client that it does not need to do a silly rename of an opened file when it's being removed. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + include/uapi/linux/nfs4.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 333ea05e2531..0e79dbbc759a 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -277,6 +277,7 @@ struct nfs4_copy_state { #define NFS_INO_STALE (1) /* possible stale inode */ #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ #define NFS_INO_INVALIDATING (3) /* inode is being invalidated */ +#define NFS_INO_PRESERVE_UNLINKED (4) /* preserve file if removed while open */ #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ #define NFS_INO_FORCE_READDIR (7) /* force readdirplus */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 800bb0ffa6e6..1d2043708bf1 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -45,6 +45,7 @@ #define NFS4_OPEN_RESULT_CONFIRM 0x0002 #define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 +#define NFS4_OPEN_RESULT_PRESERVE_UNLINKED 0x0008 #define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020 #define NFS4_SHARE_ACCESS_MASK 0x000F -- cgit v1.2.3 From 88a6099fc3274a27814d26dd688fdc5cd7a480ee Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 9 Feb 2022 13:22:48 -0500 Subject: NFS: Replace last uses of NFS_INO_REVAL_PAGECACHE Now that we have more fine grained attribute revalidation, let's just get rid of NFS_INO_REVAL_PAGECACHE. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0e79dbbc759a..ce3128e4bffa 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -356,11 +356,9 @@ static inline void nfs_mark_for_revalidate(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_CHANGE - | NFS_INO_INVALID_CTIME; + nfsi->cache_validity |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | + NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME | + NFS_INO_INVALID_SIZE; if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); -- cgit v1.2.3 From 41e97b7f8a15d15da03ca15e6ff7b9b7ab7f588c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 9 Feb 2022 13:26:19 -0500 Subject: NFS: Remove unused flag NFS_INO_REVAL_PAGECACHE Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index ce3128e4bffa..72a732a5103c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -247,7 +247,6 @@ struct nfs4_copy_state { #define NFS_INO_INVALID_ATIME BIT(2) /* cached atime is invalid */ #define NFS_INO_INVALID_ACCESS BIT(3) /* cached access cred invalid */ #define NFS_INO_INVALID_ACL BIT(4) /* cached acls are invalid */ -#define NFS_INO_REVAL_PAGECACHE BIT(5) /* must revalidate pagecache */ #define NFS_INO_REVAL_FORCED BIT(6) /* force revalidation ignoring a delegation */ #define NFS_INO_INVALID_LABEL BIT(7) /* cached label is invalid */ #define NFS_INO_INVALID_CHANGE BIT(8) /* cached change is invalid */ -- cgit v1.2.3 From 891b7023010cc0d62d814619b1893745d7613f7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sat, 5 Feb 2022 11:36:09 +0100 Subject: clk: mux: Declare u32 *table parameter as const MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The elements of the table are never modified in clk-mux.c. To make this clear to clock drivers, declare the parameter as const u32 *table. Signed-off-by: Jonathan Neuschäfer Link: https://lore.kernel.org/r/20220205103613.1216218-4-j.neuschaefer@gmx.net Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 2faa6f7aa8a8..27be57528874 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -888,7 +888,7 @@ void clk_hw_unregister_divider(struct clk_hw *hw); struct clk_mux { struct clk_hw hw; void __iomem *reg; - u32 *table; + const u32 *table; u32 mask; u8 shift; u8 flags; @@ -913,18 +913,18 @@ struct clk_hw *__clk_hw_register_mux(struct device *dev, struct device_node *np, const struct clk_hw **parent_hws, const struct clk_parent_data *parent_data, unsigned long flags, void __iomem *reg, u8 shift, u32 mask, - u8 clk_mux_flags, u32 *table, spinlock_t *lock); + u8 clk_mux_flags, const u32 *table, spinlock_t *lock); struct clk_hw *__devm_clk_hw_register_mux(struct device *dev, struct device_node *np, const char *name, u8 num_parents, const char * const *parent_names, const struct clk_hw **parent_hws, const struct clk_parent_data *parent_data, unsigned long flags, void __iomem *reg, u8 shift, u32 mask, - u8 clk_mux_flags, u32 *table, spinlock_t *lock); + u8 clk_mux_flags, const u32 *table, spinlock_t *lock); struct clk *clk_register_mux_table(struct device *dev, const char *name, const char * const *parent_names, u8 num_parents, unsigned long flags, void __iomem *reg, u8 shift, u32 mask, - u8 clk_mux_flags, u32 *table, spinlock_t *lock); + u8 clk_mux_flags, const u32 *table, spinlock_t *lock); #define clk_register_mux(dev, name, parent_names, num_parents, flags, reg, \ shift, width, clk_mux_flags, lock) \ @@ -962,9 +962,9 @@ struct clk *clk_register_mux_table(struct device *dev, const char *name, (shift), BIT((width)) - 1, (clk_mux_flags), \ NULL, (lock)) -int clk_mux_val_to_index(struct clk_hw *hw, u32 *table, unsigned int flags, +int clk_mux_val_to_index(struct clk_hw *hw, const u32 *table, unsigned int flags, unsigned int val); -unsigned int clk_mux_index_to_val(u32 *table, unsigned int flags, u8 index); +unsigned int clk_mux_index_to_val(const u32 *table, unsigned int flags, u8 index); void clk_unregister_mux(struct clk *clk); void clk_hw_unregister_mux(struct clk_hw *hw); -- cgit v1.2.3 From f7f497cb702462e8505ff3d8d4e7722ad95626a1 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 16 Feb 2022 12:30:35 +0530 Subject: jbd2: kill t_handle_lock transaction spinlock This patch kills t_handle_lock transaction spinlock completely from jbd2. To explain the reasoning, currently there were three sites at which this spinlock was used. 1. jbd2_journal_wait_updates() a. Based on careful code review it can be seen that, we don't need this lock here. This is since we wait for any currently ongoing updates based on a atomic variable t_updates. And we anyway don't take any t_handle_lock while in stop_this_handle(). i.e. write_lock(&journal->j_state_lock() jbd2_journal_wait_updates() stop_this_handle() while (atomic_read(txn->t_updates) { | DEFINE_WAIT(wait); | prepare_to_wait(); | if (atomic_read(txn->t_updates) if (atomic_dec_and_test(txn->t_updates)) write_unlock(&journal->j_state_lock); schedule(); wake_up() write_lock(&journal->j_state_lock); finish_wait(); } txn->t_state = T_COMMIT write_unlock(&journal->j_state_lock); b. Also note that between atomic_inc(&txn->t_updates) in start_this_handle() and jbd2_journal_wait_updates(), the synchronization happens via read_lock(journal->j_state_lock) in start_this_handle(); 2. jbd2_journal_extend() a. jbd2_journal_extend() is called with the handle of each process from task_struct. So no lock required in updating member fields of handle_t b. For member fields of h_transaction, all updates happens only via atomic APIs (which is also within read_lock()). So, no need of this transaction spinlock. 3. update_t_max_wait() Based on Jan suggestion, this can be carefully removed using atomic cmpxchg API. Note that there can be several processes which are waiting for a new transaction to be allocated and started. For doing this only one process will succeed in taking write_lock() and allocating a new txn. After that all of the process will be updating the t_max_wait (max transaction wait time). This can be done via below method w/o taking any locks using atomic cmpxchg. For more details refer [1] new = get_new_val(); old = READ_ONCE(ptr->max_val); while (old < new) old = cmpxchg(&ptr->max_val, old, new); [1]: https://lwn.net/Articles/849237/ Suggested-by: Jan Kara Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/d89e599658b4a1f3893a48c6feded200073037fc.1644992076.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 9c3ada74ffb1..a787872e1e86 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -554,9 +554,6 @@ struct transaction_chp_stats_s { * ->j_list_lock * * j_state_lock - * ->t_handle_lock - * - * j_state_lock * ->j_list_lock (journal_unmap_buffer) * */ -- cgit v1.2.3 From 9e8d5470325f25bed7d33f9faaae6d5e4f313650 Mon Sep 17 00:00:00 2001 From: Hammer Hsieh Date: Tue, 22 Feb 2022 17:36:03 +0800 Subject: serial: sunplus-uart: Add Sunplus SoC UART Driver Add Sunplus SoC UART Driver. SP7021 UART block contains 5 UARTs. There are UART0~4 that supported in SP7021, the features list as below. Support Full-duplex communication. Support data packet length configurable. Support stop bit number configurable. Support force break condition. Support baud rate configurable. Support error detection and report. Support RXD Noise Rejection Vote configurable. UART0 pinout only support TX/RX two pins. UART1 to UART4 pinout support TX/RX/CTS/RTS four pins. Normally UART0 used for kernel console, also can be used for normal uart. Command line set "console=ttySUP0,115200", SUP means Sunplus Uart Port. UART driver probe will create path named "/dev/ttySUPx". https://sunplus.atlassian.net/wiki/spaces/doc/pages/1873412290/13.+Universal+Asynchronous+Receiver+Transmitter+UART Signed-off-by: Hammer Hsieh Link: https://lore.kernel.org/r/1645522563-17183-3-git-send-email-hammerh0314@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 8885e69178bd..6faf502b7860 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -277,4 +277,7 @@ /* Freescale LINFlexD UART */ #define PORT_LINFLEXUART 122 +/* Sunplus UART */ +#define PORT_SUNPLUS 123 + #endif /* _UAPILINUX_SERIAL_CORE_H */ -- cgit v1.2.3 From c2faf737abfb10f88f2d2612d573e9edc3c42c37 Mon Sep 17 00:00:00 2001 From: Max Staudt Date: Fri, 11 Feb 2022 15:10:36 +0100 Subject: tty: Reserve ldisc 29 for development purposes It's handy to have an ldisc number free for out-of-tree testing. This way, a new ldisc can be developed on any running system, without having to recompile the kernel just to define a new number. This is the highest number (and also the last one) available under the old numbering scheme, so let's reserve it before it's too late. From now on, every new ldisc upstreamed will have to increment NR_LDISCS in lockstep with its addition to the table in tty.h. Signed-off-by: Max Staudt Link: https://lore.kernel.org/r/20220211141036.6403-1-max@enpas.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/tty.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/tty.h b/include/uapi/linux/tty.h index a58deb3061eb..9d0f06bfbac3 100644 --- a/include/uapi/linux/tty.h +++ b/include/uapi/linux/tty.h @@ -6,8 +6,6 @@ * 'tty.h' defines some structures used by tty_io.c and some defines. */ -#define NR_LDISCS 30 - /* line disciplines */ #define N_TTY 0 #define N_SLIP 1 @@ -39,5 +37,9 @@ #define N_SPEAKUP 26 /* Speakup communication with synths */ #define N_NULL 27 /* Null ldisc used for error handling */ #define N_MCTP 28 /* MCTP-over-serial */ +#define N_DEVELOPMENT 29 /* Manual out-of-tree testing */ + +/* Always the newest line discipline + 1 */ +#define NR_LDISCS 30 #endif /* _UAPI_LINUX_TTY_H */ -- cgit v1.2.3 From 5e187189ec324f78035d33a4bc123a9c4ca6f3e3 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 26 Feb 2022 12:18:29 +0800 Subject: net: ip: add skb drop reasons for ip egress path Replace kfree_skb() which is used in the packet egress path of IP layer with kfree_skb_reason(). Functions that are involved include: __ip_queue_xmit() ip_finish_output() ip_mc_finish_output() ip6_output() ip6_finish_output() ip6_finish_output2() Following new drop reasons are introduced: SKB_DROP_REASON_IP_OUTNOROUTES SKB_DROP_REASON_BPF_CGROUP_EGRESS SKB_DROP_REASON_IPV6DISABLED SKB_DROP_REASON_NEIGH_CREATEFAIL Reviewed-by: Mengen Sun Reviewed-by: Hao Peng Signed-off-by: Menglong Dong Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++++ include/trace/events/skb.h | 5 +++++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 31be38078918..62b4bed1b7bc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -380,6 +380,15 @@ enum skb_drop_reason { * the ofo queue, corresponding to * LINUX_MIB_TCPOFOMERGE */ + SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ + SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by + * BPF_PROG_TYPE_CGROUP_SKB + * eBPF program + */ + SKB_DROP_REASON_IPV6DISABLED, /* IPv6 is disabled on the device */ + SKB_DROP_REASON_NEIGH_CREATEFAIL, /* failed to create neigh + * entry + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 2ab7193313aa..97f8097651da 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -37,6 +37,11 @@ EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \ EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \ EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE) \ + EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES) \ + EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \ + BPF_CGROUP_EGRESS) \ + EM(SKB_DROP_REASON_IPV6DISABLED, IPV6DISABLED) \ + EM(SKB_DROP_REASON_NEIGH_CREATEFAIL, NEIGH_CREATEFAIL) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From a5736edda10ca2ba075606baad1dda3f2426766d Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 26 Feb 2022 12:18:30 +0800 Subject: net: neigh: use kfree_skb_reason() for __neigh_event_send() Replace kfree_skb() used in __neigh_event_send() with kfree_skb_reason(). Following drop reasons are added: SKB_DROP_REASON_NEIGH_FAILED SKB_DROP_REASON_NEIGH_QUEUEFULL SKB_DROP_REASON_NEIGH_DEAD The first two reasons above should be the hot path that skb drops in neighbour subsystem. Reviewed-by: Mengen Sun Reviewed-by: Hao Peng Signed-off-by: Menglong Dong Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ include/trace/events/skb.h | 3 +++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 62b4bed1b7bc..d67941f78b92 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -389,6 +389,11 @@ enum skb_drop_reason { SKB_DROP_REASON_NEIGH_CREATEFAIL, /* failed to create neigh * entry */ + SKB_DROP_REASON_NEIGH_FAILED, /* neigh entry in failed state */ + SKB_DROP_REASON_NEIGH_QUEUEFULL, /* arp_queue for neigh + * entry is full + */ + SKB_DROP_REASON_NEIGH_DEAD, /* neigh entry is dead */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 97f8097651da..1977f301260d 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -42,6 +42,9 @@ BPF_CGROUP_EGRESS) \ EM(SKB_DROP_REASON_IPV6DISABLED, IPV6DISABLED) \ EM(SKB_DROP_REASON_NEIGH_CREATEFAIL, NEIGH_CREATEFAIL) \ + EM(SKB_DROP_REASON_NEIGH_FAILED, NEIGH_FAILED) \ + EM(SKB_DROP_REASON_NEIGH_QUEUEFULL, NEIGH_QUEUEFULL) \ + EM(SKB_DROP_REASON_NEIGH_DEAD, NEIGH_DEAD) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 21ca9fb62d4688da41825e0f05d8e7e26afc69d6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Feb 2022 16:20:10 +0200 Subject: PCI/IOV: Add pci_iov_vf_id() to get VF index The PCI core uses the VF index internally, often called the vf_id, during the setup of the VF, eg pci_iov_add_virtfn(). This index is needed for device drivers that implement live migration for their internal operations that configure/control their VFs. Specifically, mlx5_vfio_pci driver that is introduced in coming patches from this series needs it and not the bus/device/function which is exposed today. Add pci_iov_vf_id() which computes the vf_id by reversing the math that was used to create the bus/device/function. Link: https://lore.kernel.org/all/20220224142024.147653-2-yishaih@nvidia.com Signed-off-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/pci.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 8253a5413d7c..3d4ff7b35ad1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2166,7 +2166,7 @@ void __iomem *pci_ioremap_wc_bar(struct pci_dev *pdev, int bar); #ifdef CONFIG_PCI_IOV int pci_iov_virtfn_bus(struct pci_dev *dev, int id); int pci_iov_virtfn_devfn(struct pci_dev *dev, int id); - +int pci_iov_vf_id(struct pci_dev *dev); int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); void pci_disable_sriov(struct pci_dev *dev); @@ -2194,6 +2194,12 @@ static inline int pci_iov_virtfn_devfn(struct pci_dev *dev, int id) { return -ENOSYS; } + +static inline int pci_iov_vf_id(struct pci_dev *dev) +{ + return -ENOSYS; +} + static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) { return -ENODEV; } -- cgit v1.2.3 From a7e9f240c0da4fb73a353c603daf4beba04c6ecf Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Feb 2022 16:20:13 +0200 Subject: PCI/IOV: Add pci_iov_get_pf_drvdata() to allow VF reaching the drvdata of a PF There are some cases where a SR-IOV VF driver will need to reach into and interact with the PF driver. This requires accessing the drvdata of the PF. Provide a function pci_iov_get_pf_drvdata() to return this PF drvdata in a safe way. Normally accessing a drvdata of a foreign struct device would be done using the device_lock() to protect against device driver probe()/remove() races. However, due to the design of pci_enable_sriov() this will result in a ABBA deadlock on the device_lock as the PF's device_lock is held during PF sriov_configure() while calling pci_enable_sriov() which in turn holds the VF's device_lock while calling VF probe(), and similarly for remove. This means the VF driver can never obtain the PF's device_lock. Instead use the implicit locking created by pci_enable/disable_sriov(). A VF driver can access its PF drvdata only while its own driver is attached, and the PF driver can control access to its own drvdata based on when it calls pci_enable/disable_sriov(). To use this API the PF driver will setup the PF drvdata in the probe() function. pci_enable_sriov() is only called from sriov_configure() which cannot happen until probe() completes, ensuring no VF races with drvdata setup. For removal, the PF driver must call pci_disable_sriov() in its remove function before destroying any of the drvdata. This ensures that all VF drivers are unbound before returning, fencing concurrent access to the drvdata. The introduction of a new function to do this access makes clear the special locking scheme and the documents the requirements on the PF/VF drivers using this. Link: https://lore.kernel.org/all/20220224142024.147653-5-yishaih@nvidia.com Signed-off-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/pci.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 3d4ff7b35ad1..60d423d8f0c4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2167,6 +2167,7 @@ void __iomem *pci_ioremap_wc_bar(struct pci_dev *pdev, int bar); int pci_iov_virtfn_bus(struct pci_dev *dev, int id); int pci_iov_virtfn_devfn(struct pci_dev *dev, int id); int pci_iov_vf_id(struct pci_dev *dev); +void *pci_iov_get_pf_drvdata(struct pci_dev *dev, struct pci_driver *pf_driver); int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); void pci_disable_sriov(struct pci_dev *dev); @@ -2200,6 +2201,12 @@ static inline int pci_iov_vf_id(struct pci_dev *dev) return -ENOSYS; } +static inline void *pci_iov_get_pf_drvdata(struct pci_dev *dev, + struct pci_driver *pf_driver) +{ + return ERR_PTR(-EINVAL); +} + static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) { return -ENODEV; } -- cgit v1.2.3 From 1695b97b291e79295bf5c26cba5ecc4b443d8ac7 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 24 Feb 2022 16:20:14 +0200 Subject: net/mlx5: Expose APIs to get/put the mlx5 core device Expose an API to get the mlx5 core device from a given VF PCI device if mlx5_core is its driver. Upon the get API we stay with the intf_state_mutex locked to make sure that the device can't be gone/unloaded till the caller will complete its job over the device, this expects to be for a short period of time for any flow that the lock is taken. Upon the put API we unlock the intf_state_mutex. The use case for those APIs is the migration flow of a VF over VFIO PCI. In that case the VF doesn't ride on mlx5_core, because the device is driving *two* different PCI devices, the PF owned by mlx5_core and the VF owned by the vfio driver. The mlx5_core of the PF is accessed only during the narrow window of the VF's ioctl that requires its services. This allows the PF driver to be more independent of the VF driver, so long as it doesn't reset the FW. Link: https://lore.kernel.org/all/20220224142024.147653-6-yishaih@nvidia.com Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 78655d8d13a7..319322a8ff94 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1143,6 +1143,9 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, u64 length, u16 uid, phys_addr_t addr, u32 obj_id); +struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev); +void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev); + #ifdef CONFIG_MLX5_CORE_IPOIB struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, struct ib_device *ibdev, -- cgit v1.2.3 From adfdaff3d14fe819a0420d81788f7ebbcd954940 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 24 Feb 2022 16:20:15 +0200 Subject: net/mlx5: Introduce migration bits and structures Introduce migration IFC related stuff to enable migration commands. Link: https://lore.kernel.org/all/20220224142024.147653-7-yishaih@nvidia.com Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 147 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 146 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 598ac3bcc901..b1c27409c997 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -127,6 +127,11 @@ enum { MLX5_CMD_OP_QUERY_SF_PARTITION = 0x111, MLX5_CMD_OP_ALLOC_SF = 0x113, MLX5_CMD_OP_DEALLOC_SF = 0x114, + MLX5_CMD_OP_SUSPEND_VHCA = 0x115, + MLX5_CMD_OP_RESUME_VHCA = 0x116, + MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE = 0x117, + MLX5_CMD_OP_SAVE_VHCA_STATE = 0x118, + MLX5_CMD_OP_LOAD_VHCA_STATE = 0x119, MLX5_CMD_OP_CREATE_MKEY = 0x200, MLX5_CMD_OP_QUERY_MKEY = 0x201, MLX5_CMD_OP_DESTROY_MKEY = 0x202, @@ -1757,7 +1762,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_682[0x1]; u8 log_max_sf[0x5]; u8 apu[0x1]; - u8 reserved_at_689[0x7]; + u8 reserved_at_689[0x4]; + u8 migration[0x1]; + u8 reserved_at_68e[0x2]; u8 log_min_sf_size[0x8]; u8 max_num_sf_partitions[0x8]; @@ -11519,4 +11526,142 @@ enum { MLX5_MTT_PERM_RW = MLX5_MTT_PERM_READ | MLX5_MTT_PERM_WRITE, }; +enum { + MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR = 0x0, + MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER = 0x1, +}; + +struct mlx5_ifc_suspend_vhca_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_suspend_vhca_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +enum { + MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_RESPONDER = 0x0, + MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR = 0x1, +}; + +struct mlx5_ifc_resume_vhca_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_resume_vhca_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_query_vhca_migration_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_query_vhca_migration_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + u8 required_umem_size[0x20]; + + u8 reserved_at_a0[0x160]; +}; + +struct mlx5_ifc_save_vhca_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; + + u8 va[0x40]; + + u8 mkey[0x20]; + + u8 size[0x20]; +}; + +struct mlx5_ifc_save_vhca_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 actual_image_size[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_load_vhca_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; + + u8 va[0x40]; + + u8 mkey[0x20]; + + u8 size[0x20]; +}; + +struct mlx5_ifc_load_vhca_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + #endif /* MLX5_IFC_H */ -- cgit v1.2.3 From e8eb9e32999dc5995b19d5141f8ebb38f69696fc Mon Sep 17 00:00:00 2001 From: Dimitris Michailidis Date: Thu, 24 Feb 2022 18:58:55 -0800 Subject: PCI: Add Fungible Vendor ID to pci_ids.h Cc: Bjorn Helgaas Cc: linux-pci@vger.kernel.org Signed-off-by: Dimitris Michailidis Acked-by: Bjorn Helgaas Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index aad54c666407..c7e6f2043c7d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2561,6 +2561,8 @@ #define PCI_VENDOR_ID_HYGON 0x1d94 +#define PCI_VENDOR_ID_FUNGIBLE 0x1dad + #define PCI_VENDOR_ID_HXT 0x1dbf #define PCI_VENDOR_ID_TEKRAM 0x1de1 -- cgit v1.2.3 From 91495f21fcec3a889d6a9c21e6df16c4c15f2184 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 25 Feb 2022 11:22:16 +0200 Subject: net: dsa: tag_8021q: replace the SVL bridging with VLAN-unaware IVL bridging For VLAN-unaware bridging, tag_8021q uses something perhaps a bit too tied with the sja1105 switch: each port uses the same pvid which is also used for standalone operation (a unique one from which the source port and device ID can be retrieved when packets from that port are forwarded to the CPU). Since each port has a unique pvid when performing autonomous forwarding, the switch must be configured for Shared VLAN Learning (SVL) such that the VLAN ID itself is ignored when performing FDB lookups. Without SVL, packets would always be flooded, since FDB lookup in the source port's VLAN would never find any entry. First of all, to make tag_8021q more palatable to switches which might not support Shared VLAN Learning, let's just use a common VLAN for all ports that are under the same bridge. Secondly, using Shared VLAN Learning means that FDB isolation can never be enforced. But if all ports under the same VLAN-unaware bridge share the same VLAN ID, it can. The disadvantage is that the CPU port can no longer perform precise source port identification for these packets. But at least we have a mechanism which has proven to be adequate for that situation: imprecise RX (dsa_find_designated_bridge_port_by_vid), which is what we use for termination on VLAN-aware bridges. The VLAN ID that VLAN-unaware bridges will use with tag_8021q is the same one as we were previously using for imprecise TX (bridge TX forwarding offload). It is already allocated, it is just a matter of using it. Note that because now all ports under the same bridge share the same VLAN, the complexity of performing a tag_8021q bridge join decreases dramatically. We no longer have to install the RX VLAN of a newly joining port into the port membership of the existing bridge ports. The newly joining port just becomes a member of the VLAN corresponding to that bridge, and the other ports are already members of it from when they joined the bridge themselves. So forwarding works properly. This means that we can unhook dsa_tag_8021q_bridge_{join,leave} from the cross-chip notifier level dsa_switch_bridge_{join,leave}. We can put these calls directly into the sja1105 driver. With this new mode of operation, a port controlled by tag_8021q can have two pvids whereas before it could only have one. The pvid for standalone operation is different from the pvid used for VLAN-unaware bridging. This is done, again, so that FDB isolation can be enforced. Let tag_8021q manage this by deleting the standalone pvid when a port joins a bridge, and restoring it when it leaves it. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/8021q.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 939a1beaddf7..f47f227baa27 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -32,17 +32,17 @@ int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto); void dsa_tag_8021q_unregister(struct dsa_switch *ds); +int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, + struct dsa_bridge bridge); + +void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_bridge bridge); + struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, u16 tpid, u16 tci); void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id); -int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, - struct dsa_bridge bridge); - -void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, - struct dsa_bridge bridge); - u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num); u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp); -- cgit v1.2.3 From d7f9787a763f35225287aedb9364c972ae128d18 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 25 Feb 2022 11:22:17 +0200 Subject: net: dsa: tag_8021q: add support for imprecise RX based on the VBID The sja1105 switch can't populate the PORT field of the tag_8021q header when sending a frame to the CPU with a non-zero VBID. Similar to dsa_find_designated_bridge_port_by_vid() which performs imprecise RX for VLAN-aware bridges, let's introduce a helper in tag_8021q for performing imprecise RX based on the VLAN that it has allocated for a VLAN-unaware bridge. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/8021q.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index f47f227baa27..92f5243b841e 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -41,7 +41,11 @@ void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port, struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, u16 tpid, u16 tci); -void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id); +void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, + int *vbid); + +struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, + int vbid); u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num); -- cgit v1.2.3 From 04b67e18ce5b29785578397f6785f28f512d64aa Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 25 Feb 2022 11:22:20 +0200 Subject: net: dsa: tag_8021q: merge RX and TX VLANs In the old Shared VLAN Learning mode of operation that tag_8021q previously used for forwarding, we needed to have distinct concepts for an RX and a TX VLAN. An RX VLAN could be installed on all ports that were members of a given bridge, so that autonomous forwarding could still work, while a TX VLAN was dedicated for precise packet steering, so it just contained the CPU port and one egress port. Now that tag_8021q uses Independent VLAN Learning and imprecise RX/TX all over, those lines have been blurred and we no longer have the need to do precise TX towards a port that is in a bridge. As for standalone ports, it is fine to use the same VLAN ID for both RX and TX. This patch changes the tag_8021q format by shifting the VLAN range it reserves, and halving it. Previously, our DIR bits were encoding the VLAN direction (RX/TX) and were set to either 1 or 2. This meant that tag_8021q reserved 2K VLANs, or 50% of the available range. Change the DIR bits to a hardcoded value of 3 now, which makes tag_8021q reserve only 1K VLANs, and a different range now (the last 1K). This is done so that we leave the old format in place in case we need to return to it. In terms of code, the vid_is_dsa_8021q_rxvlan and vid_is_dsa_8021q_txvlan functions go away. Any vid_is_dsa_8021q is both a TX and an RX VLAN, and they are no longer distinct. For example, felix which did different things for different VLAN types, now needs to handle the RX and the TX logic for the same VLAN. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/8021q.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 92f5243b841e..b4e2862633f6 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -49,18 +49,12 @@ struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num); -u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp); - -u16 dsa_tag_8021q_rx_vid(const struct dsa_port *dp); +u16 dsa_tag_8021q_standalone_vid(const struct dsa_port *dp); int dsa_8021q_rx_switch_id(u16 vid); int dsa_8021q_rx_source_port(u16 vid); -bool vid_is_dsa_8021q_rxvlan(u16 vid); - -bool vid_is_dsa_8021q_txvlan(u16 vid); - bool vid_is_dsa_8021q(u16 vid); #endif /* _NET_DSA_8021Q_H */ -- cgit v1.2.3 From b6362bdf750b4ba266d4a10156174ec52460e73d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 25 Feb 2022 11:22:21 +0200 Subject: net: dsa: tag_8021q: rename dsa_8021q_bridge_tx_fwd_offload_vid The dsa_8021q_bridge_tx_fwd_offload_vid is no longer used just for bridge TX forwarding offload, it is the private VLAN reserved for VLAN-unaware bridging in a way that is compatible with FDB isolation. So just rename it dsa_tag_8021q_bridge_vid. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/8021q.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index b4e2862633f6..3ed117e299ec 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -47,7 +47,7 @@ void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, int vbid); -u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num); +u16 dsa_tag_8021q_bridge_vid(unsigned int bridge_num); u16 dsa_tag_8021q_standalone_vid(const struct dsa_port *dp); -- cgit v1.2.3 From c26933639b5402c174c65c01d33f145622784012 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 25 Feb 2022 11:22:22 +0200 Subject: net: dsa: request drivers to perform FDB isolation For DSA, to encourage drivers to perform FDB isolation simply means to track which bridge does each FDB and MDB entry belong to. It then becomes the driver responsibility to use something that makes the FDB entry from one bridge not match the FDB lookup of ports from other bridges. The top-level functions where the bridge is determined are: - dsa_port_fdb_{add,del} - dsa_port_host_fdb_{add,del} - dsa_port_mdb_{add,del} - dsa_port_host_mdb_{add,del} aka the pre-crosschip-notifier functions. Changing the API to pass a reference to a bridge is not superfluous, and looking at the passed bridge argument is not the same as having the driver look at dsa_to_port(ds, port)->bridge from the ->port_fdb_add() method. DSA installs FDB and MDB entries on shared (CPU and DSA) ports as well, and those do not have any dp->bridge information to retrieve, because they are not in any bridge - they are merely the pipes that serve the user ports that are in one or multiple bridges. The struct dsa_bridge associated with each FDB/MDB entry is encapsulated in a larger "struct dsa_db" database. Although only databases associated to bridges are notified for now, this API will be the starting point for implementing IFF_UNICAST_FLT in DSA. There, the idea is to install FDB entries on the CPU port which belong to the corresponding user port's port database. These are supposed to match only when the port is standalone. It is better to introduce the API in its expected final form than to introduce it for bridges first, then to have to change drivers which may have made one or more assumptions. Drivers can use the provided bridge.num, but they can also use a different numbering scheme that is more convenient. DSA must perform refcounting on the CPU and DSA ports by also taking into account the bridge number. So if two bridges request the same local address, DSA must notify the driver twice, once for each bridge. In fact, if the driver supports FDB isolation, DSA must perform refcounting per bridge, but if the driver doesn't, DSA must refcount host addresses across all bridges, otherwise it would be telling the driver to delete an FDB entry for a bridge and the driver would delete it for all bridges. So introduce a bool fdb_isolation in drivers which would make all bridge databases passed to the cross-chip notifier have the same number (0). This makes dsa_mac_addr_find() -> dsa_db_equal() say that all bridge databases are the same database - which is essentially the legacy behavior. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 01faba89c987..87c5f18eb381 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -341,11 +341,28 @@ struct dsa_link { struct list_head list; }; +enum dsa_db_type { + DSA_DB_PORT, + DSA_DB_LAG, + DSA_DB_BRIDGE, +}; + +struct dsa_db { + enum dsa_db_type type; + + union { + const struct dsa_port *dp; + struct dsa_lag lag; + struct dsa_bridge bridge; + }; +}; + struct dsa_mac_addr { unsigned char addr[ETH_ALEN]; u16 vid; refcount_t refcount; struct list_head list; + struct dsa_db db; }; struct dsa_vlan { @@ -409,6 +426,13 @@ struct dsa_switch { */ u32 mtu_enforcement_ingress:1; + /* Drivers that isolate the FDBs of multiple bridges must set this + * to true to receive the bridge as an argument in .port_fdb_{add,del} + * and .port_mdb_{add,del}. Otherwise, the bridge.num will always be + * passed as zero. + */ + u32 fdb_isolation:1; + /* Listener for switch fabric events */ struct notifier_block nb; @@ -941,23 +965,29 @@ struct dsa_switch_ops { * Forwarding database */ int (*port_fdb_add)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + struct dsa_db db); int (*port_fdb_del)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + struct dsa_db db); int (*port_fdb_dump)(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data); int (*lag_fdb_add)(struct dsa_switch *ds, struct dsa_lag lag, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + struct dsa_db db); int (*lag_fdb_del)(struct dsa_switch *ds, struct dsa_lag lag, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + struct dsa_db db); /* * Multicast database */ int (*port_mdb_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); int (*port_mdb_del)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); /* * RXNFC */ -- cgit v1.2.3 From 06b9cce42634a50f2840777a66553b02320db5ef Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 25 Feb 2022 11:22:23 +0200 Subject: net: dsa: pass extack to .port_bridge_join driver methods As FDB isolation cannot be enforced between VLAN-aware bridges in lack of hardware assistance like extra FID bits, it seems plausible that many DSA switches cannot do it. Therefore, they need to reject configurations with multiple VLAN-aware bridges from the two code paths that can transition towards that state: - joining a VLAN-aware bridge - toggling VLAN awareness on an existing bridge The .port_vlan_filtering method already propagates the netlink extack to the driver, let's propagate it from .port_bridge_join too, to make sure that the driver can use the same function for both. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 87c5f18eb381..cfedcfb86350 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -937,7 +937,8 @@ struct dsa_switch_ops { int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, struct dsa_bridge bridge, - bool *tx_fwd_offload); + bool *tx_fwd_offload, + struct netlink_ext_ack *extack); void (*port_bridge_leave)(struct dsa_switch *ds, int port, struct dsa_bridge bridge); void (*port_stp_state_set)(struct dsa_switch *ds, int port, @@ -1021,7 +1022,8 @@ struct dsa_switch_ops { */ int (*crosschip_bridge_join)(struct dsa_switch *ds, int tree_index, int sw_index, int port, - struct dsa_bridge bridge); + struct dsa_bridge bridge, + struct netlink_ext_ack *extack); void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index, int sw_index, int port, struct dsa_bridge bridge); -- cgit v1.2.3 From 54c319846086e57071fd0e92d20f2cba0fbf0e79 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 25 Feb 2022 11:22:25 +0200 Subject: net: mscc: ocelot: enforce FDB isolation when VLAN-unaware Currently ocelot uses a pvid of 0 for standalone ports and ports under a VLAN-unaware bridge, and the pvid of the bridge for ports under a VLAN-aware bridge. Standalone ports do not perform learning, but packets received on them are still subject to FDB lookups. So if the MAC DA that a standalone port receives has been also learned on a VLAN-unaware bridge port, ocelot will attempt to forward to that port, even though it can't, so it will drop packets. So there is a desire to avoid that, and isolate the FDBs of different bridges from one another, and from standalone ports. The ocelot switch library has two distinct entry points: the felix DSA driver and the ocelot switchdev driver. We need to code up a minimal bridge_num allocation in the ocelot switchdev driver too, this is copied from DSA with the exception that ocelot does not care about DSA trees, cross-chip bridging etc. So it only looks at its own ports that are already in the same bridge. The ocelot switchdev driver uses the bridge_num it has allocated itself, while the felix driver uses the bridge_num allocated by DSA. They are both stored inside ocelot_port->bridge_num by the common function ocelot_port_bridge_join() which receives the bridge_num passed by value. Once we have a bridge_num, we can only use it to enforce isolation between VLAN-unaware bridges. As far as I can see, ocelot does not have anything like a FID that further makes VLAN 100 from a port be different to VLAN 100 from another port with regard to FDB lookup. So we simply deny multiple VLAN-aware bridges. For VLAN-unaware bridges, we crop the 4000-4095 VLAN region and we allocate a VLAN for each bridge_num. This will be used as the pvid of each port that is under that VLAN-unaware bridge, for as long as that bridge is VLAN-unaware. VID 0 remains only for standalone ports. It is okay if all standalone ports use the same VID 0, since they perform no address learning, the FDB will contain no entry in VLAN 0, so the packets will always be flooded to the only possible destination, the CPU port. The CPU port module doesn't need to be member of the VLANs to receive packets, but if we use the DSA tag_8021q protocol, those packets are part of the data plane as far as ocelot is concerned, so there it needs to. Just ensure that the DSA tag_8021q CPU port is a member of all reserved VLANs when it is created, and is removed when it is deleted. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index dd4fc34d2992..ee3c59639d70 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -668,6 +668,7 @@ struct ocelot_port { u16 mrp_ring_id; struct net_device *bridge; + int bridge_num; u8 stp_state; int speed; @@ -713,6 +714,8 @@ struct ocelot { enum ocelot_tag_prefix npi_inj_prefix; enum ocelot_tag_prefix npi_xtr_prefix; + unsigned long bridges; + struct list_head multicast; struct list_head pgids; @@ -846,6 +849,9 @@ void ocelot_deinit(struct ocelot *ocelot); void ocelot_init_port(struct ocelot *ocelot, int port); void ocelot_deinit_port(struct ocelot *ocelot, int port); +void ocelot_port_set_dsa_8021q_cpu(struct ocelot *ocelot, int port); +void ocelot_port_unset_dsa_8021q_cpu(struct ocelot *ocelot, int port); + /* DSA callbacks */ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data); void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data); @@ -863,21 +869,24 @@ int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port, struct switchdev_brport_flags val); void ocelot_port_bridge_flags(struct ocelot *ocelot, int port, struct switchdev_brport_flags val); -void ocelot_port_bridge_join(struct ocelot *ocelot, int port, - struct net_device *bridge); +int ocelot_port_bridge_join(struct ocelot *ocelot, int port, + struct net_device *bridge, int bridge_num, + struct netlink_ext_ack *extack); void ocelot_port_bridge_leave(struct ocelot *ocelot, int port, struct net_device *bridge); int ocelot_mact_flush(struct ocelot *ocelot, int port); int ocelot_fdb_dump(struct ocelot *ocelot, int port, dsa_fdb_dump_cb_t *cb, void *data); -int ocelot_fdb_add(struct ocelot *ocelot, int port, - const unsigned char *addr, u16 vid); -int ocelot_fdb_del(struct ocelot *ocelot, int port, - const unsigned char *addr, u16 vid); +int ocelot_fdb_add(struct ocelot *ocelot, int port, const unsigned char *addr, + u16 vid, const struct net_device *bridge); +int ocelot_fdb_del(struct ocelot *ocelot, int port, const unsigned char *addr, + u16 vid, const struct net_device *bridge); int ocelot_lag_fdb_add(struct ocelot *ocelot, struct net_device *bond, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + const struct net_device *bridge); int ocelot_lag_fdb_del(struct ocelot *ocelot, struct net_device *bond, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + const struct net_device *bridge); int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid, bool untagged, struct netlink_ext_ack *extack); int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, @@ -901,9 +910,11 @@ int ocelot_cls_flower_destroy(struct ocelot *ocelot, int port, int ocelot_cls_flower_stats(struct ocelot *ocelot, int port, struct flow_cls_offload *f, bool ingress); int ocelot_port_mdb_add(struct ocelot *ocelot, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + const struct net_device *bridge); int ocelot_port_mdb_del(struct ocelot *ocelot, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + const struct net_device *bridge); int ocelot_port_lag_join(struct ocelot *ocelot, int port, struct net_device *bond, struct netdev_lag_upper_info *info); -- cgit v1.2.3 From bc437f7515f5e14aec9f2801412d9ea48116a97d Mon Sep 17 00:00:00 2001 From: Liam Beguin Date: Sat, 12 Feb 2022 21:57:30 -0500 Subject: iio: afe: rescale: expose scale processing function In preparation for the addition of kunit tests, expose the logic responsible for combining channel scales. Signed-off-by: Liam Beguin Reviewed-by: Peter Rosin Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220213025739.2561834-2-liambeguin@gmail.com Signed-off-by: Jonathan Cameron --- include/linux/iio/afe/rescale.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/linux/iio/afe/rescale.h (limited to 'include') diff --git a/include/linux/iio/afe/rescale.h b/include/linux/iio/afe/rescale.h new file mode 100644 index 000000000000..8a2eb34af327 --- /dev/null +++ b/include/linux/iio/afe/rescale.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2018 Axentia Technologies AB + */ + +#ifndef __IIO_RESCALE_H__ +#define __IIO_RESCALE_H__ + +#include +#include + +struct device; +struct rescale; + +struct rescale_cfg { + enum iio_chan_type type; + int (*props)(struct device *dev, struct rescale *rescale); +}; + +struct rescale { + const struct rescale_cfg *cfg; + struct iio_channel *source; + struct iio_chan_spec chan; + struct iio_chan_spec_ext_info *ext_info; + bool chan_processed; + s32 numerator; + s32 denominator; +}; + +int rescale_process_scale(struct rescale *rescale, int scale_type, + int *val, int *val2); +#endif /* __IIO_RESCALE_H__ */ -- cgit v1.2.3 From a29c3283653b80b916c5ca5292c5d36415e38e92 Mon Sep 17 00:00:00 2001 From: Liam Beguin Date: Sat, 12 Feb 2022 21:57:32 -0500 Subject: iio: afe: rescale: add offset support This is a preparatory change required for the addition of temperature sensing front ends. Signed-off-by: Liam Beguin Reviewed-by: Peter Rosin Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220213025739.2561834-4-liambeguin@gmail.com Signed-off-by: Jonathan Cameron --- include/linux/iio/afe/rescale.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/iio/afe/rescale.h b/include/linux/iio/afe/rescale.h index 8a2eb34af327..6eecb435488f 100644 --- a/include/linux/iio/afe/rescale.h +++ b/include/linux/iio/afe/rescale.h @@ -25,8 +25,12 @@ struct rescale { bool chan_processed; s32 numerator; s32 denominator; + s32 offset; }; int rescale_process_scale(struct rescale *rescale, int scale_type, int *val, int *val2); +int rescale_process_offset(struct rescale *rescale, int scale_type, + int scale, int scale2, int schan_off, + int *val, int *val2); #endif /* __IIO_RESCALE_H__ */ -- cgit v1.2.3 From fad278388e01e3658a356118bed8ee2c2408d280 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 May 2021 21:15:15 -0700 Subject: media: omap3isp: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Wrap the target region in struct_group(). This additionally fixes a theoretical misalignment of the copy (since the size of "buf" changes between 64-bit and 32-bit, but this is likely never built for 64-bit). FWIW, I think this code is totally broken on 64-bit (which appears to not be a "real" build configuration): it would either always fail (with an uninitialized data->buf_size) or would cause corruption in userspace due to the copy_to_user() in the call path against an uninitialized data->buf value: omap3isp_stat_request_statistics_time32(...) struct omap3isp_stat_data data64; ... omap3isp_stat_request_statistics(stat, &data64); int omap3isp_stat_request_statistics(struct ispstat *stat, struct omap3isp_stat_data *data) ... buf = isp_stat_buf_get(stat, data); static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat, struct omap3isp_stat_data *data) ... if (buf->buf_size > data->buf_size) { ... return ERR_PTR(-EINVAL); } ... rval = copy_to_user(data->buf, buf->virt_addr, buf->buf_size); Regardless, additionally initialize data64 to be zero-filled to avoid undefined behavior. Cc: Laurent Pinchart Cc: Mauro Carvalho Chehab Cc: Arnd Bergmann Cc: Sakari Ailus Cc: linux-media@vger.kernel.org Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data") Cc: stable@vger.kernel.org Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor Signed-off-by: Kees Cook --- include/uapi/linux/omap3isp.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h index 87b55755f4ff..d9db7ad43890 100644 --- a/include/uapi/linux/omap3isp.h +++ b/include/uapi/linux/omap3isp.h @@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { * struct omap3isp_stat_data - Statistic data sent to or received from user * @ts: Timestamp of returned framestats. * @buf: Pointer to pass to user. + * @buf_size: Size of buffer. * @frame_number: Frame number of requested stats. * @cur_frame: Current frame number being processed. * @config_counter: Number of the configuration associated with the data. @@ -176,10 +177,12 @@ struct omap3isp_stat_data { struct timeval ts; #endif void __user *buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #ifdef __KERNEL__ @@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { __s32 tv_usec; } ts; __u32 buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #endif -- cgit v1.2.3 From e75d16e58467c5703821e12536c7dc438f3c425d Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Thu, 24 Feb 2022 07:12:09 +0100 Subject: hwmon: (core) Add support for pwm auto channels attribute pwm[1-*]_auto_channels_temp is documented as an official hwmon sysfs attribute, yet there is no support for it in the new with_info-API. Fix that. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20220224061210.16452-2-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index fad1f1df26df..eba380b76d15 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -332,12 +332,14 @@ enum hwmon_pwm_attributes { hwmon_pwm_enable, hwmon_pwm_mode, hwmon_pwm_freq, + hwmon_pwm_auto_channels_temp, }; #define HWMON_PWM_INPUT BIT(hwmon_pwm_input) #define HWMON_PWM_ENABLE BIT(hwmon_pwm_enable) #define HWMON_PWM_MODE BIT(hwmon_pwm_mode) #define HWMON_PWM_FREQ BIT(hwmon_pwm_freq) +#define HWMON_PWM_AUTO_CHANNELS_TEMP BIT(hwmon_pwm_auto_channels_temp) enum hwmon_intrusion_attributes { hwmon_intrusion_alarm, -- cgit v1.2.3 From c5b483d5c1a26b6006180f5dc7b2f8674f19afa3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 14 Feb 2022 16:39:03 -0600 Subject: scsi: libfc: Replace one-element arrays with flexible-array members Use flexible-array members in struct fc_fdmi_attr_entry and fs_fdmi_attrs instead of one-element arrays, and refactor the code accordingly. Also, this helps with the ongoing efforts to globally enable -Warray-bounds and get us closer to being able to tighten the FORTIFY_SOURCE routines on memcpy(). https://github.com/KSPP/linux/issues/79 https://github.com/ClangBuiltLinux/linux/issues/1590 Link: https://lore.kernel.org/r/20220214223903.GA859464@embeddedor Signed-off-by: Gustavo A. R. Silva Signed-off-by: Martin K. Petersen --- include/scsi/fc/fc_ms.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/fc/fc_ms.h b/include/scsi/fc/fc_ms.h index 00191695233a..56a5d2b5a624 100644 --- a/include/scsi/fc/fc_ms.h +++ b/include/scsi/fc/fc_ms.h @@ -158,7 +158,7 @@ struct fc_fdmi_port_name { struct fc_fdmi_attr_entry { __be16 type; __be16 len; - __u8 value[1]; + __u8 value[]; } __attribute__((__packed__)); /* @@ -166,7 +166,7 @@ struct fc_fdmi_attr_entry { */ struct fs_fdmi_attrs { __be32 numattrs; - struct fc_fdmi_attr_entry attr[1]; + struct fc_fdmi_attr_entry attr[]; } __attribute__((__packed__)); /* -- cgit v1.2.3 From f1834fd1635be4b530a7e070c04a6158b8f78c0e Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 25 Feb 2022 18:57:35 +0800 Subject: scsi: libsas: Make sas_notify_{phy,port}_event() return void Nobody checks the return codes, so make them return void. Indeed, if the LLDD cannot send an event, nothing much can be done in the LLDD about it. Also remove prototype for sas_notify_phy_event() in sas_internal.h, which should not be there. Link: https://lore.kernel.org/r/1645786656-221630-2-git-send-email-john.garry@huawei.com Reviewed-by: Xiang Chen Reviewed-by: Damien Le Moal Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index dc529cc92d65..df2c8fc43429 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -727,9 +727,9 @@ int sas_lu_reset(struct domain_device *dev, u8 *lun); int sas_query_task(struct sas_task *task, u16 tag); int sas_abort_task(struct sas_task *task, u16 tag); -int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event, - gfp_t gfp_flags); -int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event, - gfp_t gfp_flags); +void sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event, + gfp_t gfp_flags); +void sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event, + gfp_t gfp_flags); #endif /* _SASLIB_H_ */ -- cgit v1.2.3 From d72ce7d324786257410bec6b36e3756647dd76fd Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 26 Feb 2022 00:27:55 +0100 Subject: power: supply: ab8500: Standardize maintenance charging Maintenance charging is the phase of keeping up the charge after the battery has charged fully using CC/CV charging. This can be done in many successive phases and is usually done with a slightly lower constant voltage than CV, and a slightly lower allowed current. Add an array of maintenance charging points each with a current, voltage and safety timer, and add helper functions to use these. Migrate the AB8500 code over. This is used in several Samsung products using the AB8500 and these batteries and their complete parameters will be added later as full examples, but the default battery in the AB8500 code serves as a reasonable example so far. Reviewed-by: Matti Vaittinen Signed-off-by: Linus Walleij Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 64 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index c135196aa9d1..8ced6550caa7 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -349,6 +349,52 @@ struct power_supply_resistance_temp_table { int resistance; /* internal resistance percent */ }; +/** + * struct power_supply_maintenance_charge_table - setting for maintenace charging + * @charge_current_max_ua: maintenance charging current that is used to keep + * the charge of the battery full as current is consumed after full charging. + * The corresponding charge_voltage_max_uv is used as a safeguard: when we + * reach this voltage the maintenance charging current is turned off. It is + * turned back on if we fall below this voltage. + * @charge_voltage_max_uv: maintenance charging voltage that is usually a bit + * lower than the constant_charge_voltage_max_uv. We can apply this settings + * charge_current_max_ua until we get back up to this voltage. + * @safety_timer_minutes: maintenance charging safety timer, with an expiry + * time in minutes. We will only use maintenance charging in this setting + * for a certain amount of time, then we will first move to the next + * maintenance charge current and voltage pair in respective array and wait + * for the next safety timer timeout, or, if we reached the last maintencance + * charging setting, disable charging until we reach + * charge_restart_voltage_uv and restart ordinary CC/CV charging from there. + * These timers should be chosen to align with the typical discharge curve + * for the battery. + * + * When the main CC/CV charging is complete the battery can optionally be + * maintenance charged at the voltages from this table: a table of settings is + * traversed using a slightly lower current and voltage than what is used for + * CC/CV charging. The maintenance charging will for safety reasons not go on + * indefinately: we lower the current and voltage with successive maintenance + * settings, then disable charging completely after we reach the last one, + * and after that we do not restart charging until we reach + * charge_restart_voltage_uv (see struct power_supply_battery_info) and restart + * ordinary CC/CV charging from there. + * + * As an example, a Samsung EB425161LA Lithium-Ion battery is CC/CV charged + * at 900mA to 4340mV, then maintenance charged at 600mA and 4150mV for + * 60 hours, then maintenance charged at 600mA and 4100mV for 200 hours. + * After this the charge cycle is restarted waiting for + * charge_restart_voltage_uv. + * + * For most mobile electronics this type of maintenance charging is enough for + * the user to disconnect the device and make use of it before both maintenance + * charging cycles are complete. + */ +struct power_supply_maintenance_charge_table { + int charge_current_max_ua; + int charge_voltage_max_uv; + int charge_safety_timer_minutes; +}; + #define POWER_SUPPLY_OCV_TEMP_MAX 20 /** @@ -394,6 +440,10 @@ struct power_supply_resistance_temp_table { * @constant_charge_voltage_max_uv: voltage in microvolts signifying the end of * the CC (constant current) charging phase and the beginning of the CV * (constant voltage) charging phase. + * @maintenance_charge: an array of maintenance charging settings to be used + * after the main CC/CV charging phase is complete. + * @maintenance_charge_size: the number of maintenance charging settings in + * maintenance_charge. * @factory_internal_resistance_uohm: the internal resistance of the battery * at fabrication time, expressed in microohms. This resistance will vary * depending on the lifetime and charge of the battery, so this is just a @@ -543,6 +593,8 @@ struct power_supply_battery_info { int overvoltage_limit_uv; int constant_charge_current_max_ua; int constant_charge_voltage_max_uv; + struct power_supply_maintenance_charge_table *maintenance_charge; + int maintenance_charge_size; int factory_internal_resistance_uohm; int ocv_temp[POWER_SUPPLY_OCV_TEMP_MAX]; int temp_ambient_alert_min; @@ -596,6 +648,8 @@ extern int power_supply_batinfo_ocv2cap(struct power_supply_battery_info *info, extern int power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *table, int table_len, int temp); +extern struct power_supply_maintenance_charge_table * +power_supply_get_maintenance_charging_setting(struct power_supply_battery_info *info, int index); extern void power_supply_changed(struct power_supply *psy); extern int power_supply_am_i_supplied(struct power_supply *psy); int power_supply_get_property_from_supplier(struct power_supply *psy, @@ -603,6 +657,16 @@ int power_supply_get_property_from_supplier(struct power_supply *psy, union power_supply_propval *val); extern int power_supply_set_battery_charged(struct power_supply *psy); +static inline bool +power_supply_supports_maintenance_charging(struct power_supply_battery_info *info) +{ + struct power_supply_maintenance_charge_table *mt; + + mt = power_supply_get_maintenance_charging_setting(info, 0); + + return (mt != NULL); +} + #ifdef CONFIG_POWER_SUPPLY extern int power_supply_is_system_supplied(void); #else -- cgit v1.2.3 From 0e8b903b522b5a3cb473035cea085d396dd7150a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 26 Feb 2022 00:27:56 +0100 Subject: power: supply: ab8500: Standardize alert mode charging The AB8500 code is using a special current and voltage setting when the battery is in "alert mode", i.e. when it is starting to go outside normal operating conditions so it is too cold or too hot. This makes sense as a way for the charging algorithm to deal with hostile environments. Add the needed members to the struct power_supply_battery_info, and switch the AB8500 charging code over to using this. Reviewed-by: Matti Vaittineen Signed-off-by: Linus Walleij Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 8ced6550caa7..f8601598d3d3 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -444,6 +444,19 @@ struct power_supply_maintenance_charge_table { * after the main CC/CV charging phase is complete. * @maintenance_charge_size: the number of maintenance charging settings in * maintenance_charge. + * @alert_low_temp_charge_current_ua: The charging current to use if the battery + * enters low alert temperature, i.e. if the internal temperature is between + * temp_alert_min and temp_min. No matter the charging phase, this + * and alert_high_temp_charge_voltage_uv will be applied. + * @alert_low_temp_charge_voltage_uv: Same as alert_low_temp_charge_current_ua, + * but for the charging voltage. + * @alert_high_temp_charge_current_ua: The charging current to use if the + * battery enters high alert temperature, i.e. if the internal temperature is + * between temp_alert_max and temp_max. No matter the charging phase, this + * and alert_high_temp_charge_voltage_uv will be applied, usually lowering + * the charging current as an evasive manouver. + * @alert_high_temp_charge_voltage_uv: Same as + * alert_high_temp_charge_current_ua, but for the charging voltage. * @factory_internal_resistance_uohm: the internal resistance of the battery * at fabrication time, expressed in microohms. This resistance will vary * depending on the lifetime and charge of the battery, so this is just a @@ -595,6 +608,10 @@ struct power_supply_battery_info { int constant_charge_voltage_max_uv; struct power_supply_maintenance_charge_table *maintenance_charge; int maintenance_charge_size; + int alert_low_temp_charge_current_ua; + int alert_low_temp_charge_voltage_uv; + int alert_high_temp_charge_current_ua; + int alert_high_temp_charge_voltage_uv; int factory_internal_resistance_uohm; int ocv_temp[POWER_SUPPLY_OCV_TEMP_MAX]; int temp_ambient_alert_min; -- cgit v1.2.3 From 1f918e0fe43ec41d906e2cf96b80b15451fed7ba Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 26 Feb 2022 00:27:57 +0100 Subject: power: supply: ab8500: Standardize BTI resistance The Battery Type Indicator (BTI) resistor is a resistor mounted between a special terminal on the battery and ground. By sending a fixed current (such as 7mA) through this resistor and measuring the voltage over it, the resistance can be determined, and this verifies the battery type. Typical side view of the battery: o o o GND BTI +3.8V Typical example of the electrical layout: +3.8 V BTI | | | + | _______ [ ] 7kOhm ___ | | | | | GND GND By verifying this resistance before attempting to charge the battery we add an additional level of security. In some systems this is used for plug-and-play of batteries with different capacity. In other cases, this is merely used to verify that the right type of battery is connected, if several batteries have the same physical shape and can be plugged into the same slot. Sometimes this is just a surplus security mechanism. Nokia and Samsung among many other vendors are known to use these BTI resistors. Add the BTI properties to struct power_supply_battery_info and switch the AB8500 charger code over to using it. Signed-off-by: Linus Walleij Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index f8601598d3d3..7fdc03cf2285 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -498,6 +498,14 @@ struct power_supply_maintenance_charge_table { * by temperature: highest temperature with lowest resistance first, lowest * temperature with highest resistance last. * @resist_table_size: the number of items in the resist_table. + * @bti_resistance_ohm: The Battery Type Indicator (BIT) nominal resistance + * in ohms for this battery, if an identification resistor is mounted + * between a third battery terminal and ground. This scheme is used by a lot + * of mobile device batteries. + * @bti_resistance_tolerance: The tolerance in percent of the BTI resistance, + * for example 10 for +/- 10%, if the bti_resistance is set to 7000 and the + * tolerance is 10% we will detect a proper battery if the BTI resistance + * is between 6300 and 7700 Ohm. * * This is the recommended struct to manage static battery parameters, * populated by power_supply_get_battery_info(). Most platform drivers should @@ -624,6 +632,8 @@ struct power_supply_battery_info { int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX]; struct power_supply_resistance_temp_table *resist_table; int resist_table_size; + int bti_resistance_ohm; + int bti_resistance_tolerance; }; extern struct atomic_notifier_head power_supply_notifier; @@ -667,6 +677,8 @@ power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *table int table_len, int temp); extern struct power_supply_maintenance_charge_table * power_supply_get_maintenance_charging_setting(struct power_supply_battery_info *info, int index); +extern bool power_supply_battery_bti_in_range(struct power_supply_battery_info *info, + int resistance); extern void power_supply_changed(struct power_supply *psy); extern int power_supply_am_i_supplied(struct power_supply *psy); int power_supply_get_property_from_supplier(struct power_supply *psy, @@ -684,6 +696,7 @@ power_supply_supports_maintenance_charging(struct power_supply_battery_info *inf return (mt != NULL); } + #ifdef CONFIG_POWER_SUPPLY extern int power_supply_is_system_supplied(void); #else -- cgit v1.2.3 From e9e7d165b4b0413c5b7db74515e4981a226b78a0 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 26 Feb 2022 00:27:58 +0100 Subject: power: supply: Support VBAT-to-Ri lookup tables In Samsung devices, the method used to compensate for temperature, age, load etc is by way of VBAT to Ri tables, which correlates the battery voltage under load (VBAT) to an internal resistance (Ri). Using this Ri and a measurement of the current out of the battery (IBAT) the open circuit voltage (OCV) can be calculated as: OCV = VBAT - (Ri * IBAT) The details are described in comments to struct power_supply_battery_info in the commit. Since not all batteries supply this VBAT-to-Ri data, the fallback method to use the temperature-to-Ri lookup table can also be used as a fallback. Add two helper functions to check if we have the tables needed for using power_supply_vbat2ri() or power_supply_temp2resist_simple() respectively, so capacity estimation code can choose which one to employ. Signed-off-by: Linus Walleij Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 113 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 7fdc03cf2285..cb380c1d9459 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -349,6 +349,11 @@ struct power_supply_resistance_temp_table { int resistance; /* internal resistance percent */ }; +struct power_supply_vbat_ri_table { + int vbat_uv; /* Battery voltage in microvolt */ + int ri_uohm; /* Internal resistance in microohm */ +}; + /** * struct power_supply_maintenance_charge_table - setting for maintenace charging * @charge_current_max_ua: maintenance charging current that is used to keep @@ -460,7 +465,14 @@ struct power_supply_maintenance_charge_table { * @factory_internal_resistance_uohm: the internal resistance of the battery * at fabrication time, expressed in microohms. This resistance will vary * depending on the lifetime and charge of the battery, so this is just a - * nominal ballpark figure. + * nominal ballpark figure. This internal resistance is given for the state + * when the battery is discharging. + * @factory_internal_resistance_charging_uohm: the internal resistance of the + * battery at fabrication time while charging, expressed in microohms. + * The charging process will affect the internal resistance of the battery + * so this value provides a better resistance under these circumstances. + * This resistance will vary depending on the lifetime and charge of the + * battery, so this is just a nominal ballpark figure. * @ocv_temp: array indicating the open circuit voltage (OCV) capacity * temperature indices. This is an array of temperatures in degrees Celsius * indicating which capacity table to use for a certain temperature, since @@ -498,6 +510,21 @@ struct power_supply_maintenance_charge_table { * by temperature: highest temperature with lowest resistance first, lowest * temperature with highest resistance last. * @resist_table_size: the number of items in the resist_table. + * @vbat2ri_discharging: this is a table that correlates Battery voltage (VBAT) + * to internal resistance (Ri). The resistance is given in microohm for the + * corresponding voltage in microvolts. The internal resistance is used to + * determine the open circuit voltage so that we can determine the capacity + * of the battery. These voltages to resistance tables apply when the battery + * is discharging. The table must be ordered descending by voltage: highest + * voltage first. + * @vbat2ri_discharging_size: the number of items in the vbat2ri_discharging + * table. + * @vbat2ri_charging: same function as vbat2ri_discharging but for the state + * when the battery is charging. Being under charge changes the battery's + * internal resistance characteristics so a separate table is needed.* + * The table must be ordered descending by voltage: highest voltage first. + * @vbat2ri_charging_size: the number of items in the vbat2ri_charging + * table. * @bti_resistance_ohm: The Battery Type Indicator (BIT) nominal resistance * in ohms for this battery, if an identification resistor is mounted * between a third battery terminal and ground. This scheme is used by a lot @@ -512,7 +539,9 @@ struct power_supply_maintenance_charge_table { * use these for consistency. * * Its field names must correspond to elements in enum power_supply_property. - * The default field value is -EINVAL. + * The default field value is -EINVAL or NULL for pointers. + * + * CC/CV CHARGING: * * The charging parameters here assume a CC/CV charging scheme. This method * is most common with Lithium Ion batteries (other methods are possible) and @@ -597,6 +626,66 @@ struct power_supply_maintenance_charge_table { * Overcharging Lithium Ion cells can be DANGEROUS and lead to fire or * explosions. * + * DETERMINING BATTERY CAPACITY: + * + * Several members of the struct deal with trying to determine the remaining + * capacity in the battery, usually as a percentage of charge. In practice + * many chargers uses a so-called fuel gauge or coloumb counter that measure + * how much charge goes into the battery and how much goes out (+/- leak + * consumption). This does not help if we do not know how much capacity the + * battery has to begin with, such as when it is first used or was taken out + * and charged in a separate charger. Therefore many capacity algorithms use + * the open circuit voltage with a look-up table to determine the rough + * capacity of the battery. The open circuit voltage can be conceptualized + * with an ideal voltage source (V) in series with an internal resistance (Ri) + * like this: + * + * +-------> IBAT >----------------+ + * | ^ | + * [ ] Ri | | + * | | VBAT | + * o <---------- | | + * +| ^ | [ ] Rload + * .---. | | | + * | V | | OCV | | + * '---' | | | + * | | | | + * GND +-------------------------------+ + * + * If we disconnect the load (here simplified as a fixed resistance Rload) + * and measure VBAT with a infinite impedance voltage meter we will get + * VBAT = OCV and this assumption is sometimes made even under load, assuming + * Rload is insignificant. However this will be of dubious quality because the + * load is rarely that small and Ri is strongly nonlinear depending on + * temperature and how much capacity is left in the battery due to the + * chemistry involved. + * + * In many practical applications we cannot just disconnect the battery from + * the load, so instead we often try to measure the instantaneous IBAT (the + * current out from the battery), estimate the Ri and thus calculate the + * voltage drop over Ri and compensate like this: + * + * OCV = VBAT - (IBAT * Ri) + * + * The tables vbat2ri_discharging and vbat2ri_charging are used to determine + * (by interpolation) the Ri from the VBAT under load. These curves are highly + * nonlinear and may need many datapoints but can be found in datasheets for + * some batteries. This gives the compensated open circuit voltage (OCV) for + * the battery even under load. Using this method will also compensate for + * temperature changes in the environment: this will also make the internal + * resistance change, and it will affect the VBAT under load, so correlating + * VBAT to Ri takes both remaining capacity and temperature into consideration. + * + * Alternatively a manufacturer can specify how the capacity of the battery + * is dependent on the battery temperature which is the main factor affecting + * Ri. As we know all checmical reactions are faster when it is warm and slower + * when it is cold. You can put in 1500mAh and only get 800mAh out before the + * voltage drops too low for example. This effect is also highly nonlinear and + * the purpose of the table resist_table: this will take a temperature and + * tell us how big percentage of Ri the specified temperature correlates to. + * Usually we have 100% of the factory_internal_resistance_uohm at 25 degrees + * Celsius. + * * The power supply class itself doesn't use this struct as of now. */ @@ -621,6 +710,7 @@ struct power_supply_battery_info { int alert_high_temp_charge_current_ua; int alert_high_temp_charge_voltage_uv; int factory_internal_resistance_uohm; + int factory_internal_resistance_charging_uohm; int ocv_temp[POWER_SUPPLY_OCV_TEMP_MAX]; int temp_ambient_alert_min; int temp_ambient_alert_max; @@ -632,6 +722,10 @@ struct power_supply_battery_info { int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX]; struct power_supply_resistance_temp_table *resist_table; int resist_table_size; + struct power_supply_vbat_ri_table *vbat2ri_discharging; + int vbat2ri_discharging_size; + struct power_supply_vbat_ri_table *vbat2ri_charging; + int vbat2ri_charging_size; int bti_resistance_ohm; int bti_resistance_tolerance; }; @@ -675,6 +769,8 @@ extern int power_supply_batinfo_ocv2cap(struct power_supply_battery_info *info, extern int power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *table, int table_len, int temp); +extern int power_supply_vbat2ri(struct power_supply_battery_info *info, + int vbat_uv, bool charging); extern struct power_supply_maintenance_charge_table * power_supply_get_maintenance_charging_setting(struct power_supply_battery_info *info, int index); extern bool power_supply_battery_bti_in_range(struct power_supply_battery_info *info, @@ -696,6 +792,19 @@ power_supply_supports_maintenance_charging(struct power_supply_battery_info *inf return (mt != NULL); } +static inline bool +power_supply_supports_vbat2ri(struct power_supply_battery_info *info) +{ + return ((info->vbat2ri_discharging != NULL) && + info->vbat2ri_discharging_size > 0); +} + +static inline bool +power_supply_supports_temp2ri(struct power_supply_battery_info *info) +{ + return ((info->resist_table != NULL) && + info->resist_table_size > 0); +} #ifdef CONFIG_POWER_SUPPLY extern int power_supply_is_system_supplied(void); -- cgit v1.2.3 From 73c022e129630fb16e5eb64d98ce5f14d9b74efd Mon Sep 17 00:00:00 2001 From: Chun-Jie Chen Date: Sun, 30 Jan 2022 09:21:00 +0800 Subject: dt-bindings: power: Add MT8195 power domains Add power domains dt-bindings for MT8195. Signed-off-by: Chun-Jie Chen Acked-by: Rob Herring Reviewed-by: Enric Balletbo i Serra Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220130012104.5292-2-chun-jie.chen@mediatek.com Signed-off-by: Matthias Brugger --- include/dt-bindings/power/mt8195-power.h | 46 ++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 include/dt-bindings/power/mt8195-power.h (limited to 'include') diff --git a/include/dt-bindings/power/mt8195-power.h b/include/dt-bindings/power/mt8195-power.h new file mode 100644 index 000000000000..b20ca4b3e3a8 --- /dev/null +++ b/include/dt-bindings/power/mt8195-power.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* + * Copyright (c) 2021 MediaTek Inc. + * Author: Chun-Jie Chen + */ + +#ifndef _DT_BINDINGS_POWER_MT8195_POWER_H +#define _DT_BINDINGS_POWER_MT8195_POWER_H + +#define MT8195_POWER_DOMAIN_PCIE_MAC_P0 0 +#define MT8195_POWER_DOMAIN_PCIE_MAC_P1 1 +#define MT8195_POWER_DOMAIN_PCIE_PHY 2 +#define MT8195_POWER_DOMAIN_SSUSB_PCIE_PHY 3 +#define MT8195_POWER_DOMAIN_CSI_RX_TOP 4 +#define MT8195_POWER_DOMAIN_ETHER 5 +#define MT8195_POWER_DOMAIN_ADSP 6 +#define MT8195_POWER_DOMAIN_AUDIO 7 +#define MT8195_POWER_DOMAIN_MFG0 8 +#define MT8195_POWER_DOMAIN_MFG1 9 +#define MT8195_POWER_DOMAIN_MFG2 10 +#define MT8195_POWER_DOMAIN_MFG3 11 +#define MT8195_POWER_DOMAIN_MFG4 12 +#define MT8195_POWER_DOMAIN_MFG5 13 +#define MT8195_POWER_DOMAIN_MFG6 14 +#define MT8195_POWER_DOMAIN_VPPSYS0 15 +#define MT8195_POWER_DOMAIN_VDOSYS0 16 +#define MT8195_POWER_DOMAIN_VPPSYS1 17 +#define MT8195_POWER_DOMAIN_VDOSYS1 18 +#define MT8195_POWER_DOMAIN_DP_TX 19 +#define MT8195_POWER_DOMAIN_EPD_TX 20 +#define MT8195_POWER_DOMAIN_HDMI_TX 21 +#define MT8195_POWER_DOMAIN_WPESYS 22 +#define MT8195_POWER_DOMAIN_VDEC0 23 +#define MT8195_POWER_DOMAIN_VDEC1 24 +#define MT8195_POWER_DOMAIN_VDEC2 25 +#define MT8195_POWER_DOMAIN_VENC 26 +#define MT8195_POWER_DOMAIN_VENC_CORE1 27 +#define MT8195_POWER_DOMAIN_IMG 28 +#define MT8195_POWER_DOMAIN_DIP 29 +#define MT8195_POWER_DOMAIN_IPE 30 +#define MT8195_POWER_DOMAIN_CAM 31 +#define MT8195_POWER_DOMAIN_CAM_RAWA 32 +#define MT8195_POWER_DOMAIN_CAM_RAWB 33 +#define MT8195_POWER_DOMAIN_CAM_MRAW 34 + +#endif /* _DT_BINDINGS_POWER_MT8195_POWER_H */ -- cgit v1.2.3 From 342479c86d3e8f9e946a07ff0cafbd36511ae30a Mon Sep 17 00:00:00 2001 From: Chun-Jie Chen Date: Sun, 30 Jan 2022 09:21:04 +0800 Subject: soc: mediatek: pm-domains: Add support for mt8195 Add domain control data including bus protection data size change due to more protection steps in mt8195. Signed-off-by: Chun-Jie Chen Reviewed-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220130012104.5292-6-chun-jie.chen@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/infracfg.h | 82 +++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'include') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 4615a228da51..d858e0bab7a2 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -2,6 +2,88 @@ #ifndef __SOC_MEDIATEK_INFRACFG_H #define __SOC_MEDIATEK_INFRACFG_H +#define MT8195_TOP_AXI_PROT_EN_STA1 0x228 +#define MT8195_TOP_AXI_PROT_EN_1_STA1 0x258 +#define MT8195_TOP_AXI_PROT_EN_SET 0x2a0 +#define MT8195_TOP_AXI_PROT_EN_CLR 0x2a4 +#define MT8195_TOP_AXI_PROT_EN_1_SET 0x2a8 +#define MT8195_TOP_AXI_PROT_EN_1_CLR 0x2ac +#define MT8195_TOP_AXI_PROT_EN_MM_SET 0x2d4 +#define MT8195_TOP_AXI_PROT_EN_MM_CLR 0x2d8 +#define MT8195_TOP_AXI_PROT_EN_MM_STA1 0x2ec +#define MT8195_TOP_AXI_PROT_EN_2_SET 0x714 +#define MT8195_TOP_AXI_PROT_EN_2_CLR 0x718 +#define MT8195_TOP_AXI_PROT_EN_2_STA1 0x724 +#define MT8195_TOP_AXI_PROT_EN_VDNR_SET 0xb84 +#define MT8195_TOP_AXI_PROT_EN_VDNR_CLR 0xb88 +#define MT8195_TOP_AXI_PROT_EN_VDNR_STA1 0xb90 +#define MT8195_TOP_AXI_PROT_EN_VDNR_1_SET 0xba4 +#define MT8195_TOP_AXI_PROT_EN_VDNR_1_CLR 0xba8 +#define MT8195_TOP_AXI_PROT_EN_VDNR_1_STA1 0xbb0 +#define MT8195_TOP_AXI_PROT_EN_VDNR_2_SET 0xbb8 +#define MT8195_TOP_AXI_PROT_EN_VDNR_2_CLR 0xbbc +#define MT8195_TOP_AXI_PROT_EN_VDNR_2_STA1 0xbc4 +#define MT8195_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_SET 0xbcc +#define MT8195_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_CLR 0xbd0 +#define MT8195_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_STA1 0xbd8 +#define MT8195_TOP_AXI_PROT_EN_MM_2_SET 0xdcc +#define MT8195_TOP_AXI_PROT_EN_MM_2_CLR 0xdd0 +#define MT8195_TOP_AXI_PROT_EN_MM_2_STA1 0xdd8 + +#define MT8195_TOP_AXI_PROT_EN_VDOSYS0 BIT(6) +#define MT8195_TOP_AXI_PROT_EN_VPPSYS0 BIT(10) +#define MT8195_TOP_AXI_PROT_EN_MFG1 BIT(11) +#define MT8195_TOP_AXI_PROT_EN_MFG1_2ND GENMASK(22, 21) +#define MT8195_TOP_AXI_PROT_EN_VPPSYS0_2ND BIT(23) +#define MT8195_TOP_AXI_PROT_EN_1_MFG1 GENMASK(20, 19) +#define MT8195_TOP_AXI_PROT_EN_1_CAM BIT(22) +#define MT8195_TOP_AXI_PROT_EN_2_CAM BIT(0) +#define MT8195_TOP_AXI_PROT_EN_2_MFG1_2ND GENMASK(6, 5) +#define MT8195_TOP_AXI_PROT_EN_2_MFG1 BIT(7) +#define MT8195_TOP_AXI_PROT_EN_2_AUDIO (BIT(9) | BIT(11)) +#define MT8195_TOP_AXI_PROT_EN_2_ADSP (BIT(12) | GENMASK(16, 14)) +#define MT8195_TOP_AXI_PROT_EN_MM_CAM (BIT(0) | BIT(2) | BIT(4)) +#define MT8195_TOP_AXI_PROT_EN_MM_IPE BIT(1) +#define MT8195_TOP_AXI_PROT_EN_MM_IMG BIT(3) +#define MT8195_TOP_AXI_PROT_EN_MM_VDOSYS0 GENMASK(21, 17) +#define MT8195_TOP_AXI_PROT_EN_MM_VPPSYS1 GENMASK(8, 5) +#define MT8195_TOP_AXI_PROT_EN_MM_VENC (BIT(9) | BIT(11)) +#define MT8195_TOP_AXI_PROT_EN_MM_VENC_CORE1 (BIT(10) | BIT(12)) +#define MT8195_TOP_AXI_PROT_EN_MM_VDEC0 BIT(13) +#define MT8195_TOP_AXI_PROT_EN_MM_VDEC1 BIT(14) +#define MT8195_TOP_AXI_PROT_EN_MM_VDOSYS1_2ND BIT(22) +#define MT8195_TOP_AXI_PROT_EN_MM_VPPSYS1_2ND BIT(23) +#define MT8195_TOP_AXI_PROT_EN_MM_CAM_2ND BIT(24) +#define MT8195_TOP_AXI_PROT_EN_MM_IMG_2ND BIT(25) +#define MT8195_TOP_AXI_PROT_EN_MM_VENC_2ND BIT(26) +#define MT8195_TOP_AXI_PROT_EN_MM_WPESYS BIT(27) +#define MT8195_TOP_AXI_PROT_EN_MM_VDEC0_2ND BIT(28) +#define MT8195_TOP_AXI_PROT_EN_MM_VDEC1_2ND BIT(29) +#define MT8195_TOP_AXI_PROT_EN_MM_VDOSYS1 GENMASK(31, 30) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VPPSYS0_2ND (GENMASK(1, 0) | BIT(4) | BIT(11)) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VENC BIT(2) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VENC_CORE1 (BIT(3) | BIT(15)) +#define MT8195_TOP_AXI_PROT_EN_MM_2_CAM (BIT(5) | BIT(17)) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VPPSYS1 (GENMASK(7, 6) | BIT(18)) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VPPSYS0 GENMASK(9, 8) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VDOSYS1 BIT(10) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VDEC2_2ND BIT(12) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VDEC0_2ND BIT(13) +#define MT8195_TOP_AXI_PROT_EN_MM_2_WPESYS_2ND BIT(14) +#define MT8195_TOP_AXI_PROT_EN_MM_2_IPE BIT(16) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VDEC2 BIT(21) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VDEC0 BIT(22) +#define MT8195_TOP_AXI_PROT_EN_MM_2_WPESYS GENMASK(24, 23) +#define MT8195_TOP_AXI_PROT_EN_VDNR_1_EPD_TX BIT(1) +#define MT8195_TOP_AXI_PROT_EN_VDNR_1_DP_TX BIT(2) +#define MT8195_TOP_AXI_PROT_EN_VDNR_PCIE_MAC_P0 (BIT(11) | BIT(28)) +#define MT8195_TOP_AXI_PROT_EN_VDNR_PCIE_MAC_P1 (BIT(12) | BIT(29)) +#define MT8195_TOP_AXI_PROT_EN_VDNR_1_PCIE_MAC_P0 BIT(13) +#define MT8195_TOP_AXI_PROT_EN_VDNR_1_PCIE_MAC_P1 BIT(14) +#define MT8195_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_MFG1 (BIT(17) | BIT(19)) +#define MT8195_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_VPPSYS0 BIT(20) +#define MT8195_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_VDOSYS0 BIT(21) + #define MT8192_TOP_AXI_PROT_EN_STA1 0x228 #define MT8192_TOP_AXI_PROT_EN_1_STA1 0x258 #define MT8192_TOP_AXI_PROT_EN_SET 0x2a0 -- cgit v1.2.3 From c8a006896fce9410cb7c479b6b52de553b69bfd9 Mon Sep 17 00:00:00 2001 From: Chun-Jie Chen Date: Tue, 15 Feb 2022 18:49:16 +0800 Subject: dt-bindings: power: Add MT8186 power domains Add power domains dt-bindings for MT8186. Signed-off-by: Chun-Jie Chen Acked-by: Rob Herring Link: https://lore.kernel.org/r/20220215104917.5726-2-chun-jie.chen@mediatek.com Signed-off-by: Matthias Brugger --- include/dt-bindings/power/mt8186-power.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/dt-bindings/power/mt8186-power.h (limited to 'include') diff --git a/include/dt-bindings/power/mt8186-power.h b/include/dt-bindings/power/mt8186-power.h new file mode 100644 index 000000000000..429f7197f6b6 --- /dev/null +++ b/include/dt-bindings/power/mt8186-power.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ +/* + * Copyright (c) 2022 MediaTek Inc. + * Author: Chun-Jie Chen + */ + +#ifndef _DT_BINDINGS_POWER_MT8186_POWER_H +#define _DT_BINDINGS_POWER_MT8186_POWER_H + +#define MT8186_POWER_DOMAIN_MFG0 0 +#define MT8186_POWER_DOMAIN_MFG1 1 +#define MT8186_POWER_DOMAIN_MFG2 2 +#define MT8186_POWER_DOMAIN_MFG3 3 +#define MT8186_POWER_DOMAIN_SSUSB 4 +#define MT8186_POWER_DOMAIN_SSUSB_P1 5 +#define MT8186_POWER_DOMAIN_DIS 6 +#define MT8186_POWER_DOMAIN_IMG 7 +#define MT8186_POWER_DOMAIN_IMG2 8 +#define MT8186_POWER_DOMAIN_IPE 9 +#define MT8186_POWER_DOMAIN_CAM 10 +#define MT8186_POWER_DOMAIN_CAM_RAWA 11 +#define MT8186_POWER_DOMAIN_CAM_RAWB 12 +#define MT8186_POWER_DOMAIN_VENC 13 +#define MT8186_POWER_DOMAIN_VDEC 14 +#define MT8186_POWER_DOMAIN_WPE 15 +#define MT8186_POWER_DOMAIN_CONN_ON 16 +#define MT8186_POWER_DOMAIN_CSIRX_TOP 17 +#define MT8186_POWER_DOMAIN_ADSP_AO 18 +#define MT8186_POWER_DOMAIN_ADSP_INFRA 19 +#define MT8186_POWER_DOMAIN_ADSP_TOP 20 + +#endif /* _DT_BINDINGS_POWER_MT8186_POWER_H */ -- cgit v1.2.3 From 88590cbc17033c86c8591d9f22401325961a8a59 Mon Sep 17 00:00:00 2001 From: Chun-Jie Chen Date: Tue, 15 Feb 2022 18:49:17 +0800 Subject: soc: mediatek: pm-domains: Add support for mt8186 Add power domain control data in mt8186. Signed-off-by: Chun-Jie Chen Link: https://lore.kernel.org/r/20220215104917.5726-3-chun-jie.chen@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/infracfg.h | 48 +++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index d858e0bab7a2..8a1c2040a28e 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -140,6 +140,54 @@ #define MT8192_TOP_AXI_PROT_EN_MM_2_MDP_2ND BIT(13) #define MT8192_TOP_AXI_PROT_EN_VDNR_CAM BIT(21) +#define MT8186_TOP_AXI_PROT_EN_SET (0x2A0) +#define MT8186_TOP_AXI_PROT_EN_CLR (0x2A4) +#define MT8186_TOP_AXI_PROT_EN_STA (0x228) +#define MT8186_TOP_AXI_PROT_EN_1_SET (0x2A8) +#define MT8186_TOP_AXI_PROT_EN_1_CLR (0x2AC) +#define MT8186_TOP_AXI_PROT_EN_1_STA (0x258) +#define MT8186_TOP_AXI_PROT_EN_2_SET (0x2B0) +#define MT8186_TOP_AXI_PROT_EN_2_CLR (0x2B4) +#define MT8186_TOP_AXI_PROT_EN_2_STA (0x26C) +#define MT8186_TOP_AXI_PROT_EN_3_SET (0x2B8) +#define MT8186_TOP_AXI_PROT_EN_3_CLR (0x2BC) +#define MT8186_TOP_AXI_PROT_EN_3_STA (0x2C8) + +/* MFG1 */ +#define MT8186_TOP_AXI_PROT_EN_1_MFG1_STEP1 (GENMASK(28, 27)) +#define MT8186_TOP_AXI_PROT_EN_MFG1_STEP2 (GENMASK(22, 21)) +#define MT8186_TOP_AXI_PROT_EN_MFG1_STEP3 (BIT(25)) +#define MT8186_TOP_AXI_PROT_EN_1_MFG1_STEP4 (BIT(29)) +/* DIS */ +#define MT8186_TOP_AXI_PROT_EN_1_DIS_STEP1 (GENMASK(12, 11)) +#define MT8186_TOP_AXI_PROT_EN_DIS_STEP2 (GENMASK(2, 1) | GENMASK(11, 10)) +/* IMG */ +#define MT8186_TOP_AXI_PROT_EN_1_IMG_STEP1 (BIT(23)) +#define MT8186_TOP_AXI_PROT_EN_1_IMG_STEP2 (BIT(15)) +/* IPE */ +#define MT8186_TOP_AXI_PROT_EN_1_IPE_STEP1 (BIT(24)) +#define MT8186_TOP_AXI_PROT_EN_1_IPE_STEP2 (BIT(16)) +/* CAM */ +#define MT8186_TOP_AXI_PROT_EN_1_CAM_STEP1 (GENMASK(22, 21)) +#define MT8186_TOP_AXI_PROT_EN_1_CAM_STEP2 (GENMASK(14, 13)) +/* VENC */ +#define MT8186_TOP_AXI_PROT_EN_1_VENC_STEP1 (BIT(31)) +#define MT8186_TOP_AXI_PROT_EN_1_VENC_STEP2 (BIT(19)) +/* VDEC */ +#define MT8186_TOP_AXI_PROT_EN_1_VDEC_STEP1 (BIT(30)) +#define MT8186_TOP_AXI_PROT_EN_1_VDEC_STEP2 (BIT(17)) +/* WPE */ +#define MT8186_TOP_AXI_PROT_EN_2_WPE_STEP1 (BIT(17)) +#define MT8186_TOP_AXI_PROT_EN_2_WPE_STEP2 (BIT(16)) +/* CONN_ON */ +#define MT8186_TOP_AXI_PROT_EN_1_CONN_ON_STEP1 (BIT(18)) +#define MT8186_TOP_AXI_PROT_EN_CONN_ON_STEP2 (BIT(14)) +#define MT8186_TOP_AXI_PROT_EN_CONN_ON_STEP3 (BIT(13)) +#define MT8186_TOP_AXI_PROT_EN_CONN_ON_STEP4 (BIT(16)) +/* ADSP_TOP */ +#define MT8186_TOP_AXI_PROT_EN_3_ADSP_TOP_STEP1 (GENMASK(12, 11)) +#define MT8186_TOP_AXI_PROT_EN_3_ADSP_TOP_STEP2 (GENMASK(1, 0)) + #define MT8183_TOP_AXI_PROT_EN_STA1 0x228 #define MT8183_TOP_AXI_PROT_EN_STA1_1 0x258 #define MT8183_TOP_AXI_PROT_EN_SET 0x2a0 -- cgit v1.2.3 From b8cd5831c61cadee4493248babef8386f836f31c Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 24 Feb 2022 10:29:07 +0000 Subject: net: flow_offload: add tc police action parameters The current police offload action entry is missing exceed/notexceed actions and parameters that can be configured by tc police action. Add the missing parameters as a pre-step for offloading police actions to hardware. Signed-off-by: Jianbo Liu Signed-off-by: Roi Dayan Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/flow_offload.h | 9 +++++++++ include/net/tc_act/tc_police.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 5b8c54eb7a6b..74f44d44abe3 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -148,6 +148,8 @@ enum flow_action_id { FLOW_ACTION_MPLS_MANGLE, FLOW_ACTION_GATE, FLOW_ACTION_PPPOE_PUSH, + FLOW_ACTION_JUMP, + FLOW_ACTION_PIPE, NUM_FLOW_ACTIONS, }; @@ -235,9 +237,16 @@ struct flow_action_entry { struct { /* FLOW_ACTION_POLICE */ u32 burst; u64 rate_bytes_ps; + u64 peakrate_bytes_ps; + u32 avrate; + u16 overhead; u64 burst_pkt; u64 rate_pkt_ps; u32 mtu; + struct { + enum flow_action_id act_id; + u32 extval; + } exceed, notexceed; } police; struct { /* FLOW_ACTION_CT */ int action; diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h index 72649512dcdd..283bde711a42 100644 --- a/include/net/tc_act/tc_police.h +++ b/include/net/tc_act/tc_police.h @@ -159,4 +159,34 @@ static inline u32 tcf_police_tcfp_mtu(const struct tc_action *act) return params->tcfp_mtu; } +static inline u64 tcf_police_peakrate_bytes_ps(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + return params->peak.rate_bytes_ps; +} + +static inline u32 tcf_police_tcfp_ewma_rate(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + return params->tcfp_ewma_rate; +} + +static inline u16 tcf_police_rate_overhead(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + return params->rate.overhead; +} + #endif /* __NET_TC_POLICE_H */ -- cgit v1.2.3 From d97b4b105ce71f9f907f1511986cc1d224126772 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 24 Feb 2022 10:29:08 +0000 Subject: flow_offload: reject offload for all drivers with invalid police parameters As more police parameters are passed to flow_offload, driver can check them to make sure hardware handles packets in the way indicated by tc. The conform-exceed control should be drop/pipe or drop/ok. Besides, for drop/ok, the police should be the last action. As hardware can't configure peakrate/avrate/overhead, offload should not be supported if any of them is configured. Signed-off-by: Jianbo Liu Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/flow_offload.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 74f44d44abe3..92267d23083e 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -311,6 +311,12 @@ static inline bool flow_offload_has_one_action(const struct flow_action *action) return action->num_entries == 1; } +static inline bool flow_action_is_last_entry(const struct flow_action *action, + const struct flow_action_entry *entry) +{ + return entry == &action->entries[action->num_entries - 1]; +} + #define flow_action_for_each(__i, __act, __actions) \ for (__i = 0, __act = &(__actions)->entries[0]; \ __i < (__actions)->num_entries; \ -- cgit v1.2.3 From e096242e1ee2e800df5e8d9a5508135902e8e1b5 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 25 Feb 2022 17:32:37 +0530 Subject: dt-bindings: pinctrl: k3: Introduce pinmux definitions for AM62 Add pinctrl macros for AM62x SoCs. These macro definitions are similar to that of previous platforms, but adding new definitions to avoid any naming confusions in the SoC dts files. checkpatch insists the following error exists: ERROR: Macros with complex values should be enclosed in parentheses However, we do not need parentheses enclosing the values for this macro as we do intend it to generate two separate values as has been done for other similar platforms. Signed-off-by: Suman Anna Signed-off-by: Vignesh Raghavendra Acked-by: Rob Herring Reviewed-by: Bryan Brattlof Signed-off-by: Nishanth Menon Link: https://lore.kernel.org/r/20220225120239.1303821-4-vigneshr@ti.com --- include/dt-bindings/pinctrl/k3.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/pinctrl/k3.h b/include/dt-bindings/pinctrl/k3.h index 63e038e36ca3..a5204ab91d3e 100644 --- a/include/dt-bindings/pinctrl/k3.h +++ b/include/dt-bindings/pinctrl/k3.h @@ -41,4 +41,7 @@ #define J721S2_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode)) #define J721S2_WKUP_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode)) +#define AM62X_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode)) +#define AM62X_MCU_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode)) + #endif -- cgit v1.2.3 From e65b831a1e191caff3fc0d06bc7019cdaf8f868e Mon Sep 17 00:00:00 2001 From: Qinghua Jin Date: Fri, 7 Jan 2022 10:22:58 +0800 Subject: nvme-fc: fix a typo subsytem -> subsystem Signed-off-by: Qinghua Jin Signed-off-by: Christoph Hellwig --- include/linux/nvme-fc-driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index cb909edb76c4..5358a5facdee 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -721,7 +721,7 @@ enum { * * Fields with static values for the port. Initialized by the * port_info struct supplied to the registration call. - * @port_num: NVME-FC transport subsytem port number + * @port_num: NVME-FC transport subsystem port number * @node_name: FC WWNN for the port * @port_name: FC WWPN for the port * @private: pointer to memory allocated alongside the local port -- cgit v1.2.3 From bd83fe6f2cd2133beaac7c423fd36c3515048fc8 Mon Sep 17 00:00:00 2001 From: Alan Adamson Date: Thu, 3 Feb 2022 00:11:53 -0800 Subject: nvme: add verbose error logging Improves logging of NVMe errors. If NVME_VERBOSE_ERRORS is configured, a verbose description of the error is logged, otherwise only status codes/bits is logged. Signed-off-by: Chaitanya Kulkarni [kch]: fix several nits, cosmetics, and trim down code. Signed-off-by: Martin K. Petersen Signed-off-by: Alan Adamson Reviewed-by: Himanshu Madhani Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 855dd9b3e84b..1f946e5bf7c1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1636,6 +1636,7 @@ enum { NVME_SC_HOST_ABORTED_CMD = 0x371, NVME_SC_CRD = 0x1800, + NVME_SC_MORE = 0x2000, NVME_SC_DNR = 0x4000, }; -- cgit v1.2.3 From 89377bc1975c2993bde4a498a3a4e5817ac0ae2c Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Thu, 10 Feb 2022 11:07:55 +0530 Subject: nvme: add vectored-io support for user-passthrough Add a new NVME_IOCTL_IO64_CMD_VEC ioctl that works like the existing NVME_IOCTL_IO64_CMD ioctl except that it takes and array of iovecs and thus supports vectored I/O. - cmd.addr is base address of user iovec array - cmd.vec_cnt is count of iovec array elements This patch does not include vectored-variant for admin-commands as most of them are light on buffers and likely to have low invocation frequency. Signed-off-by: Kanchan Joshi Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/uapi/linux/nvme_ioctl.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index d99b5a772698..b2e43185e3b5 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -55,7 +55,10 @@ struct nvme_passthru_cmd64 { __u64 metadata; __u64 addr; __u32 metadata_len; - __u32 data_len; + union { + __u32 data_len; /* for non-vectored io */ + __u32 vec_cnt; /* for vectored io */ + }; __u32 cdw10; __u32 cdw11; __u32 cdw12; @@ -78,5 +81,6 @@ struct nvme_passthru_cmd64 { #define NVME_IOCTL_RESCAN _IO('N', 0x46) #define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64) #define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64) +#define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64) #endif /* _UAPI_LINUX_NVME_IOCTL_H */ -- cgit v1.2.3 From 86c2457a8e8112f16af8fd10a3e1dd7a302c3c3e Mon Sep 17 00:00:00 2001 From: Martin Belanger Date: Tue, 8 Feb 2022 14:33:46 -0500 Subject: nvme: expose cntrltype and dctype through sysfs TP8010 introduces the Discovery Controller Type attribute (dctype). The dctype is returned in the response to the Identify command. This patch exposes the dctype through the sysfs. Since the dctype depends on the Controller Type (cntrltype), another attribute of the Identify response, the patch also exposes the cntrltype as well. The dctype will only be displayed for discovery controllers. A note about the naming of this attribute: Although TP8010 calls this attribute the Discovery Controller Type, note that the dctype is now part of the response to the Identify command for all controller types. I/O, Discovery, and Admin controllers all share the same Identify response PDU structure. Non-discovery controllers as well as pre-TP8010 discovery controllers will continue to set this field to 0 (which has always been the default for reserved bytes). Per TP8010, the value 0 now means "Discovery controller type is not reported" instead of "Reserved". One could argue that this definition is correct even for non-discovery controllers, and by extension, exposing it in the sysfs for non-discovery controllers is appropriate. Signed-off-by: Martin Belanger Reviewed-by: Chaitanya Kulkarni Reviewed-by: John Meneghini Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 1f946e5bf7c1..9dbc3ef4daf7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -43,6 +43,12 @@ enum nvme_ctrl_type { NVME_CTRL_ADMIN = 3, /* Administrative controller */ }; +enum nvme_dctype { + NVME_DCTYPE_NOT_REPORTED = 0, + NVME_DCTYPE_DDC = 1, /* Direct Discovery Controller */ + NVME_DCTYPE_CDC = 2, /* Central Discovery Controller */ +}; + /* Address Family codes for Discovery Log Page entry ADRFAM field */ enum { NVMF_ADDR_FAMILY_PCI = 0, /* PCIe */ @@ -320,7 +326,9 @@ struct nvme_id_ctrl { __le16 icdoff; __u8 ctrattr; __u8 msdbd; - __u8 rsvd1804[244]; + __u8 rsvd1804[2]; + __u8 dctype; + __u8 rsvd1807[241]; struct nvme_id_power_state psd[32]; __u8 vs[1024]; }; -- cgit v1.2.3 From a5081bad2eac5108593ac36b6551201f0df9e897 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 26 Feb 2022 14:56:22 +0000 Subject: net: phylink: remove phylink_set_pcs() As all users of phylink_set_pcs() have now been updated to use the mac_select_pcs() method, it can be removed from the phylink kernel API and its functionality moved into phylink_major_config(). Removing phylink_set_pcs() gives us a single approach for attaching a PCS within phylink. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/phylink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 9ef9b7047f19..223781622b33 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -532,7 +532,6 @@ void phylink_generic_validate(struct phylink_config *config, struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, phy_interface_t iface, const struct phylink_mac_ops *mac_ops); -void phylink_set_pcs(struct phylink *, struct phylink_pcs *pcs); void phylink_destroy(struct phylink *); int phylink_connect_phy(struct phylink *, struct phy_device *); -- cgit v1.2.3 From aa3766def7506e5d9bd6c8387dcfe3629eb2a1f2 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 18 Jan 2022 14:58:30 +0200 Subject: habanalabs: expose number of user interrupts Currently we only expose to the user the ID of the first available user interrupt. To make user interrupts allocation truly dynamic, we need to also expose the number of user interrupts. Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 371dfc4243b3..12976f7a8d84 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -404,6 +404,8 @@ enum hl_server_type { * @cpucp_version: The CPUCP f/w version. * @card_name: The card name as passed by the f/w. * @dram_page_size: The DRAM physical page size. + * @number_of_user_interrupts: The number of interrupts that are available to the userspace + * application to use. Relevant for Gaudi2 and later. */ struct hl_info_hw_ip_info { __u64 sram_base_address; @@ -428,6 +430,9 @@ struct hl_info_hw_ip_info { __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; __u64 reserved2; __u64 dram_page_size; + __u32 reserved3; + __u16 number_of_user_interrupts; + __u16 pad2; }; struct hl_info_dram_usage { -- cgit v1.2.3 From 9158bf69e74f98fea6847cca93bbf33a589bebcd Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Thu, 23 Dec 2021 13:24:34 +0200 Subject: habanalabs: Timestamps buffers registration Timestamp registration API allows the user to register a timestamp record event which will make the driver set timestamp when CQ counter reaches the target value and write it to a specific location specified by the user. This is a non blocking API, unlike the wait_for_interrupt which is a blocking one. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 12976f7a8d84..e21db03196ae 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note * - * Copyright 2016-2020 HabanaLabs, Ltd. + * Copyright 2016-2021 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -30,6 +30,9 @@ */ #define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 72 +/* Max number of elements in timestamps registration buffers */ +#define TS_MAX_ELEMENTS_NUM (1 << 20) /* 1MB */ + /* * Goya queue Numbering * @@ -695,10 +698,12 @@ struct hl_cb_in { __u64 cb_handle; /* HL_CB_OP_* */ __u32 op; + /* Size of CB. Maximum size is HL_MAX_CB_SIZE. The minimum size that * will be allocated, regardless of this parameter's value, is PAGE_SIZE */ __u32 cb_size; + /* Context ID - Currently not in use */ __u32 ctx_id; /* HL_CB_FLAGS_* */ @@ -964,6 +969,7 @@ union hl_cs_args { #define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 #define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 #define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10 +#define HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT 0x20 #define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32 @@ -1036,6 +1042,20 @@ struct hl_wait_cs_in { * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set */ __u64 cq_counters_offset; + + /* + * Timestamp_handle timestamps buffer handle. + * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set + */ + __u64 timestamp_handle; + + /* + * Timestamp_offset is offset inside the timestamp buffer pointed by timestamp_handle above. + * upon interrupt, if the cq reached the target value then driver will write + * timestamp to this offset. + * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set + */ + __u64 timestamp_offset; }; #define HL_WAIT_CS_STATUS_COMPLETED 0 @@ -1082,6 +1102,14 @@ union hl_wait_cs_args { */ #define HL_MEM_OP_EXPORT_DMABUF_FD 5 +/* Opcode to create timestamps pool for user interrupts registration support + * The memory will be allocated by the kernel driver, A timestamp buffer which the user + * will get handle to it for mmap, and another internal buffer used by the + * driver for registration management + * The memory will be freed when the user closes the file descriptor(ctx close) + */ +#define HL_MEM_OP_TS_ALLOC 6 + /* Memory flags */ #define HL_MEM_CONTIGUOUS 0x1 #define HL_MEM_SHARED 0x2 @@ -1173,9 +1201,14 @@ struct hl_mem_in { * DMA-BUF file/FD flags. */ __u32 flags; + /* Context ID - Currently not in use */ __u32 ctx_id; - __u32 pad; + + /* number of timestamp elements + * used only when HL_MEM_OP_TS_ALLOC opcode + */ + __u32 num_of_elements; }; struct hl_mem_out { -- cgit v1.2.3 From 9349a321d327d08a4e91f9b6aca9c1f3a3a25306 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Mon, 14 Feb 2022 15:20:08 +0200 Subject: habanalabs: use kernel-doc for memory ioctl documentation Re-format the comments for the memory ioctl structure to be in kernel-doc style. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 111 +++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index e21db03196ae..a2a953a91b5e 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1116,98 +1116,101 @@ union hl_wait_cs_args { #define HL_MEM_USERPTR 0x4 #define HL_MEM_FORCE_HINT 0x8 +/** + * structure hl_mem_in - structure that handle input args for memory IOCTL + * @union arg: union of structures to be used based on the input operation + * @op: specify the requested memory operation (one of the HL_MEM_OP_* definitions). + * @flags: flags for the memory operation (one of the HL_MEM_* definitions). + * For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the DMA-BUF file/FD flags. + * @ctx_id: context ID - currently not in use. + * @num_of_elements: number of timestamp elements used only with HL_MEM_OP_TS_ALLOC opcode. + */ struct hl_mem_in { union { - /* HL_MEM_OP_ALLOC- allocate device memory */ + /** + * structure for device memory allocation (used with the HL_MEM_OP_ALLOC op) + * @mem_size: memory size to allocate + */ struct { - /* Size to alloc */ __u64 mem_size; } alloc; - /* HL_MEM_OP_FREE - free device memory */ + /** + * structure for free-ing device memory (used with the HL_MEM_OP_FREE op) + * @handle: handle returned from HL_MEM_OP_ALLOC + */ struct { - /* Handle returned from HL_MEM_OP_ALLOC */ __u64 handle; } free; - /* HL_MEM_OP_MAP - map device memory */ + /** + * structure for mapping device memory (used with the HL_MEM_OP_MAP op) + * @hint_addr: requested virtual address of mapped memory. + * the driver will try to map the requested region to this hint + * address, as long as the address is valid and not already mapped. + * the user should check the returned address of the IOCTL to make + * sure he got the hint address. + * passing 0 here means that the driver will choose the address itself. + * @handle: handle returned from HL_MEM_OP_ALLOC. + */ struct { - /* - * Requested virtual address of mapped memory. - * The driver will try to map the requested region to - * this hint address, as long as the address is valid - * and not already mapped. The user should check the - * returned address of the IOCTL to make sure he got - * the hint address. Passing 0 here means that the - * driver will choose the address itself. - */ __u64 hint_addr; - /* Handle returned from HL_MEM_OP_ALLOC */ __u64 handle; } map_device; - /* HL_MEM_OP_MAP - map host memory */ + /** + * structure for mapping host memory (used with the HL_MEM_OP_MAP op) + * @host_virt_addr: address of allocated host memory. + * @hint_addr: requested virtual address of mapped memory. + * the driver will try to map the requested region to this hint + * address, as long as the address is valid and not already mapped. + * the user should check the returned address of the IOCTL to make + * sure he got the hint address. + * passing 0 here means that the driver will choose the address itself. + * @size: size of allocated host memory. + */ struct { - /* Address of allocated host memory */ __u64 host_virt_addr; - /* - * Requested virtual address of mapped memory. - * The driver will try to map the requested region to - * this hint address, as long as the address is valid - * and not already mapped. The user should check the - * returned address of the IOCTL to make sure he got - * the hint address. Passing 0 here means that the - * driver will choose the address itself. - */ __u64 hint_addr; - /* Size of allocated host memory */ __u64 mem_size; } map_host; - /* HL_MEM_OP_MAP_BLOCK - map a hw block */ + /** + * structure for mapping hw block (used with the HL_MEM_OP_MAP_BLOCK op) + * @block_addr:HW block address to map, a handle and size will be returned + * to the user and will be used to mmap the relevant block. + * only addresses from configuration space are allowed. + */ struct { - /* - * HW block address to map, a handle and size will be - * returned to the user and will be used to mmap the - * relevant block. Only addresses from configuration - * space are allowed. - */ __u64 block_addr; } map_block; - /* HL_MEM_OP_UNMAP - unmap host memory */ + /** + * structure for unmapping host memory (used with the HL_MEM_OP_UNMAP op) + * @device_virt_addr: virtual address returned from HL_MEM_OP_MAP + */ struct { - /* Virtual address returned from HL_MEM_OP_MAP */ __u64 device_virt_addr; } unmap; - /* HL_MEM_OP_EXPORT_DMABUF_FD */ + /** + * structure for exporting DMABUF object (used with + * the HL_MEM_OP_EXPORT_DMABUF_FD op) + * @handle: handle returned from HL_MEM_OP_ALLOC. + * in Gaudi, where we don't have MMU for the device memory, the + * driver expects a physical address (instead of a handle) in the + * device memory space. + * @mem_size: size of memory allocation. Relevant only for GAUDI + */ struct { - /* Handle returned from HL_MEM_OP_ALLOC. In Gaudi, - * where we don't have MMU for the device memory, the - * driver expects a physical address (instead of - * a handle) in the device memory space. - */ __u64 handle; - /* Size of memory allocation. Relevant only for GAUDI */ __u64 mem_size; } export_dmabuf_fd; }; - /* HL_MEM_OP_* */ __u32 op; - /* HL_MEM_* flags. - * For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the - * DMA-BUF file/FD flags. - */ __u32 flags; - - /* Context ID - Currently not in use */ __u32 ctx_id; - - /* number of timestamp elements - * used only when HL_MEM_OP_TS_ALLOC opcode - */ __u32 num_of_elements; }; -- cgit v1.2.3 From f23f280277d5a701de99c6652623b6bf8801c534 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Mon, 14 Feb 2022 15:22:00 +0200 Subject: habanalabs: allow user to set allocation page size In future ASICs the MMU will be able to work with multiple page sizes, thus a new flag is added to allow the user to set the requested page size. This flag is added since the whole DRAM is allocated for the user and the user also should be familiar with the memory usage use case. As such, the user may choose to "over allocate" memory in favor of performance (for instance- large page allocations covers more memory in less TLB entries). For example: say available page sizes are of 1MB and 32MB. If user wants to allocate 40MB the user can either set page size to 1MB and allocate the exact amount of memory (but will result in 40 TLB entries) or the user can use 32MB pages, "waste" 8MB of physical memory but occupy only 2 TLB entries. Note that this feature will be available only to ASIC that supports multiple DRAM page sizes. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index a2a953a91b5e..1d6b4f0c4159 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note * - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -1130,9 +1130,12 @@ struct hl_mem_in { /** * structure for device memory allocation (used with the HL_MEM_OP_ALLOC op) * @mem_size: memory size to allocate + * @page_size: page size to use on allocation. when the value is 0 the default page + * size will be taken. */ struct { __u64 mem_size; + __u64 page_size; } alloc; /** -- cgit v1.2.3 From 989192ac6ad54acccd5dd1c058055dacd6b94b30 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:41 +0800 Subject: iommu/vt-d: Remove guest pasid related callbacks The guest pasid related callbacks are not called in the tree. Remove them to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 10 ---------- include/linux/intel-svm.h | 12 ------------ 2 files changed, 22 deletions(-) (limited to 'include') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 69230fd695ea..beaacb589abd 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -525,12 +525,6 @@ struct context_entry { */ #define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(1) -/* - * Domain represents a virtual machine which demands iommu nested - * translation mode support. - */ -#define DOMAIN_FLAG_NESTING_MODE BIT(2) - struct dmar_domain { int nid; /* node id */ @@ -765,9 +759,6 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); extern void intel_svm_check(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); -int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, - struct iommu_gpasid_bind_data *data); -int intel_svm_unbind_gpasid(struct device *dev, u32 pasid); struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata); void intel_svm_unbind(struct iommu_sva *handle); @@ -795,7 +786,6 @@ struct intel_svm { unsigned int flags; u32 pasid; - int gpasid; /* In case that guest PASID is different from host PASID */ struct list_head devs; }; #else diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 1b73bab7eeff..b3b125b332aa 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -25,17 +25,5 @@ * do such IOTLB flushes automatically. */ #define SVM_FLAG_SUPERVISOR_MODE BIT(0) -/* - * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest - * processes. Compared to the host bind, the primary differences are: - * 1. mm life cycle management - * 2. fault reporting - */ -#define SVM_FLAG_GUEST_MODE BIT(1) -/* - * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space, - * which requires guest and host PASID translation at both directions. - */ -#define SVM_FLAG_GUEST_PASID BIT(2) #endif /* __INTEL_SVM_H__ */ -- cgit v1.2.3 From 0c9f178778919bb500443f340647b44227778fb2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:42 +0800 Subject: iommu: Remove guest pasid related interfaces and definitions The guest pasid related uapi interfaces and definitions are not referenced anywhere in the tree. We've also reached a consensus to replace them with a new iommufd design. Remove them to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 44 ----------- include/uapi/linux/iommu.h | 181 --------------------------------------------- 2 files changed, 225 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index de0c57a567c8..cde1d0aad60f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -229,9 +229,6 @@ struct iommu_iotlb_gather { * @sva_unbind: Unbind process address space from device * @sva_get_pasid: Get PASID associated to a SVA handle * @page_response: handle page request response - * @cache_invalidate: invalidate translation caches - * @sva_bind_gpasid: bind guest pasid and mm - * @sva_unbind_gpasid: unbind guest pasid and mm * @def_domain_type: device default domain type, return value: * - IOMMU_DOMAIN_IDENTITY: must use an identity domain * - IOMMU_DOMAIN_DMA: must use a dma domain @@ -301,12 +298,6 @@ struct iommu_ops { int (*page_response)(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); - int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev, - struct iommu_cache_invalidate_info *inv_info); - int (*sva_bind_gpasid)(struct iommu_domain *domain, - struct device *dev, struct iommu_gpasid_bind_data *data); - - int (*sva_unbind_gpasid)(struct device *dev, u32 pasid); int (*def_domain_type)(struct device *dev); @@ -421,14 +412,6 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); -extern int iommu_uapi_cache_invalidate(struct iommu_domain *domain, - struct device *dev, - void __user *uinfo); - -extern int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata); -extern int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata); extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); @@ -1051,33 +1034,6 @@ static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) return IOMMU_PASID_INVALID; } -static inline int -iommu_uapi_cache_invalidate(struct iommu_domain *domain, - struct device *dev, - struct iommu_cache_invalidate_info *inv_info) -{ - return -ENODEV; -} - -static inline int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata) -{ - return -ENODEV; -} - -static inline int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata) -{ - return -ENODEV; -} - -static inline int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, - ioasid_t pasid) -{ - return -ENODEV; -} - static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { return NULL; diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index 59178fc229ca..65d8b0234f69 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -158,185 +158,4 @@ struct iommu_page_response { __u32 code; }; -/* defines the granularity of the invalidation */ -enum iommu_inv_granularity { - IOMMU_INV_GRANU_DOMAIN, /* domain-selective invalidation */ - IOMMU_INV_GRANU_PASID, /* PASID-selective invalidation */ - IOMMU_INV_GRANU_ADDR, /* page-selective invalidation */ - IOMMU_INV_GRANU_NR, /* number of invalidation granularities */ -}; - -/** - * struct iommu_inv_addr_info - Address Selective Invalidation Structure - * - * @flags: indicates the granularity of the address-selective invalidation - * - If the PASID bit is set, the @pasid field is populated and the invalidation - * relates to cache entries tagged with this PASID and matching the address - * range. - * - If ARCHID bit is set, @archid is populated and the invalidation relates - * to cache entries tagged with this architecture specific ID and matching - * the address range. - * - Both PASID and ARCHID can be set as they may tag different caches. - * - If neither PASID or ARCHID is set, global addr invalidation applies. - * - The LEAF flag indicates whether only the leaf PTE caching needs to be - * invalidated and other paging structure caches can be preserved. - * @pasid: process address space ID - * @archid: architecture-specific ID - * @addr: first stage/level input address - * @granule_size: page/block size of the mapping in bytes - * @nb_granules: number of contiguous granules to be invalidated - */ -struct iommu_inv_addr_info { -#define IOMMU_INV_ADDR_FLAGS_PASID (1 << 0) -#define IOMMU_INV_ADDR_FLAGS_ARCHID (1 << 1) -#define IOMMU_INV_ADDR_FLAGS_LEAF (1 << 2) - __u32 flags; - __u32 archid; - __u64 pasid; - __u64 addr; - __u64 granule_size; - __u64 nb_granules; -}; - -/** - * struct iommu_inv_pasid_info - PASID Selective Invalidation Structure - * - * @flags: indicates the granularity of the PASID-selective invalidation - * - If the PASID bit is set, the @pasid field is populated and the invalidation - * relates to cache entries tagged with this PASID and matching the address - * range. - * - If the ARCHID bit is set, the @archid is populated and the invalidation - * relates to cache entries tagged with this architecture specific ID and - * matching the address range. - * - Both PASID and ARCHID can be set as they may tag different caches. - * - At least one of PASID or ARCHID must be set. - * @pasid: process address space ID - * @archid: architecture-specific ID - */ -struct iommu_inv_pasid_info { -#define IOMMU_INV_PASID_FLAGS_PASID (1 << 0) -#define IOMMU_INV_PASID_FLAGS_ARCHID (1 << 1) - __u32 flags; - __u32 archid; - __u64 pasid; -}; - -/** - * struct iommu_cache_invalidate_info - First level/stage invalidation - * information - * @argsz: User filled size of this data - * @version: API version of this structure - * @cache: bitfield that allows to select which caches to invalidate - * @granularity: defines the lowest granularity used for the invalidation: - * domain > PASID > addr - * @padding: reserved for future use (should be zero) - * @pasid_info: invalidation data when @granularity is %IOMMU_INV_GRANU_PASID - * @addr_info: invalidation data when @granularity is %IOMMU_INV_GRANU_ADDR - * - * Not all the combinations of cache/granularity are valid: - * - * +--------------+---------------+---------------+---------------+ - * | type / | DEV_IOTLB | IOTLB | PASID | - * | granularity | | | cache | - * +==============+===============+===============+===============+ - * | DOMAIN | N/A | Y | Y | - * +--------------+---------------+---------------+---------------+ - * | PASID | Y | Y | Y | - * +--------------+---------------+---------------+---------------+ - * | ADDR | Y | Y | N/A | - * +--------------+---------------+---------------+---------------+ - * - * Invalidations by %IOMMU_INV_GRANU_DOMAIN don't take any argument other than - * @version and @cache. - * - * If multiple cache types are invalidated simultaneously, they all - * must support the used granularity. - */ -struct iommu_cache_invalidate_info { - __u32 argsz; -#define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1 - __u32 version; -/* IOMMU paging structure cache */ -#define IOMMU_CACHE_INV_TYPE_IOTLB (1 << 0) /* IOMMU IOTLB */ -#define IOMMU_CACHE_INV_TYPE_DEV_IOTLB (1 << 1) /* Device IOTLB */ -#define IOMMU_CACHE_INV_TYPE_PASID (1 << 2) /* PASID cache */ -#define IOMMU_CACHE_INV_TYPE_NR (3) - __u8 cache; - __u8 granularity; - __u8 padding[6]; - union { - struct iommu_inv_pasid_info pasid_info; - struct iommu_inv_addr_info addr_info; - } granu; -}; - -/** - * struct iommu_gpasid_bind_data_vtd - Intel VT-d specific data on device and guest - * SVA binding. - * - * @flags: VT-d PASID table entry attributes - * @pat: Page attribute table data to compute effective memory type - * @emt: Extended memory type - * - * Only guest vIOMMU selectable and effective options are passed down to - * the host IOMMU. - */ -struct iommu_gpasid_bind_data_vtd { -#define IOMMU_SVA_VTD_GPASID_SRE (1 << 0) /* supervisor request */ -#define IOMMU_SVA_VTD_GPASID_EAFE (1 << 1) /* extended access enable */ -#define IOMMU_SVA_VTD_GPASID_PCD (1 << 2) /* page-level cache disable */ -#define IOMMU_SVA_VTD_GPASID_PWT (1 << 3) /* page-level write through */ -#define IOMMU_SVA_VTD_GPASID_EMTE (1 << 4) /* extended mem type enable */ -#define IOMMU_SVA_VTD_GPASID_CD (1 << 5) /* PASID-level cache disable */ -#define IOMMU_SVA_VTD_GPASID_WPE (1 << 6) /* Write protect enable */ -#define IOMMU_SVA_VTD_GPASID_LAST (1 << 7) - __u64 flags; - __u32 pat; - __u32 emt; -}; - -#define IOMMU_SVA_VTD_GPASID_MTS_MASK (IOMMU_SVA_VTD_GPASID_CD | \ - IOMMU_SVA_VTD_GPASID_EMTE | \ - IOMMU_SVA_VTD_GPASID_PCD | \ - IOMMU_SVA_VTD_GPASID_PWT) - -/** - * struct iommu_gpasid_bind_data - Information about device and guest PASID binding - * @argsz: User filled size of this data - * @version: Version of this data structure - * @format: PASID table entry format - * @flags: Additional information on guest bind request - * @gpgd: Guest page directory base of the guest mm to bind - * @hpasid: Process address space ID used for the guest mm in host IOMMU - * @gpasid: Process address space ID used for the guest mm in guest IOMMU - * @addr_width: Guest virtual address width - * @padding: Reserved for future use (should be zero) - * @vtd: Intel VT-d specific data - * - * Guest to host PASID mapping can be an identity or non-identity, where guest - * has its own PASID space. For non-identify mapping, guest to host PASID lookup - * is needed when VM programs guest PASID into an assigned device. VMM may - * trap such PASID programming then request host IOMMU driver to convert guest - * PASID to host PASID based on this bind data. - */ -struct iommu_gpasid_bind_data { - __u32 argsz; -#define IOMMU_GPASID_BIND_VERSION_1 1 - __u32 version; -#define IOMMU_PASID_FORMAT_INTEL_VTD 1 -#define IOMMU_PASID_FORMAT_LAST 2 - __u32 format; - __u32 addr_width; -#define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */ - __u64 flags; - __u64 gpgd; - __u64 hpasid; - __u64 gpasid; - __u8 padding[8]; - /* Vendor specific data */ - union { - struct iommu_gpasid_bind_data_vtd vtd; - } vendor; -}; - #endif /* _UAPI_IOMMU_H */ -- cgit v1.2.3 From 241469685d8d54075f12a6f3eb0628f85e13ff37 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:43 +0800 Subject: iommu/vt-d: Remove aux-domain related callbacks The aux-domain related callbacks are not called in the tree. Remove them to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index beaacb589abd..5cfda90b2cca 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -542,7 +542,6 @@ struct dmar_domain { u8 iommu_snooping: 1; /* indicate snooping control feature */ struct list_head devices; /* all devices' list */ - struct list_head subdevices; /* all subdevices' list */ struct iova_domain iovad; /* iova's that belong to this domain */ struct dma_pte *pgd; /* virtual address */ @@ -557,11 +556,6 @@ struct dmar_domain { 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ u64 max_addr; /* maximum mapped address */ - u32 default_pasid; /* - * The default pasid used for non-SVM - * traffic on mediated devices. - */ - struct iommu_domain domain; /* generic domain data structure for iommu core */ }; @@ -614,21 +608,11 @@ struct intel_iommu { void *perf_statistic; }; -/* Per subdevice private data */ -struct subdev_domain_info { - struct list_head link_phys; /* link to phys device siblings */ - struct list_head link_domain; /* link to domain siblings */ - struct device *pdev; /* physical device derived from */ - struct dmar_domain *domain; /* aux-domain */ - int users; /* user count */ -}; - /* PCI domain-device relationship */ struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ struct list_head table; /* link to pasid table */ - struct list_head subdevices; /* subdevices sibling */ u32 segment; /* PCI segment number */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ @@ -639,7 +623,6 @@ struct device_domain_info { u8 pri_enabled:1; u8 ats_supported:1; u8 ats_enabled:1; - u8 auxd_enabled:1; /* Multiple domains per device */ u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ -- cgit v1.2.3 From 8652d875939b6a1fe6d5c93cf4fb91df61cda5a7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:44 +0800 Subject: iommu: Remove aux-domain related interfaces and iommu_ops The aux-domain related interfaces and iommu_ops are not referenced anywhere in the tree. We've also reached a consensus to redesign it based the new iommufd framework. Remove them to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index cde1d0aad60f..9983a01373b2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -144,7 +144,6 @@ struct iommu_resv_region { /** * enum iommu_dev_features - Per device IOMMU features - * @IOMMU_DEV_FEAT_AUX: Auxiliary domain feature * @IOMMU_DEV_FEAT_SVA: Shared Virtual Addresses * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. Generally * enabling %IOMMU_DEV_FEAT_SVA requires @@ -157,7 +156,6 @@ struct iommu_resv_region { * iommu_dev_has_feature(), and enable it using iommu_dev_enable_feature(). */ enum iommu_dev_features { - IOMMU_DEV_FEAT_AUX, IOMMU_DEV_FEAT_SVA, IOMMU_DEV_FEAT_IOPF, }; @@ -223,8 +221,6 @@ struct iommu_iotlb_gather { * @dev_has/enable/disable_feat: per device entries to check/enable/disable * iommu specific features. * @dev_feat_enabled: check enabled feature - * @aux_attach/detach_dev: aux-domain specific attach/detach entries. - * @aux_get_pasid: get the pasid given an aux-domain * @sva_bind: Bind process address space to device * @sva_unbind: Unbind process address space from device * @sva_get_pasid: Get PASID associated to a SVA handle @@ -285,11 +281,6 @@ struct iommu_ops { int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - /* Aux-domain specific attach/detach entries */ - int (*aux_attach_dev)(struct iommu_domain *domain, struct device *dev); - void (*aux_detach_dev)(struct iommu_domain *domain, struct device *dev); - int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev); - struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm, void *drvdata); void (*sva_unbind)(struct iommu_sva *handle); @@ -655,9 +646,6 @@ void iommu_release_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f); -int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev); -void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev); -int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev); struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, @@ -1002,23 +990,6 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) return -ENODEV; } -static inline int -iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) -{ - return -ENODEV; -} - -static inline void -iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) -{ -} - -static inline int -iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) -{ - return -ENODEV; -} - static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) { -- cgit v1.2.3 From 71fe30698dc3be6fd277c49c0c7b220c7ae92ffd Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:45 +0800 Subject: iommu: Remove apply_resv_region The apply_resv_region callback in iommu_ops was introduced to reserve an IOVA range in the given DMA domain when the IOMMU driver manages the IOVA by itself. As all drivers converted to use dma-iommu in the core, there's no driver using this anymore. Remove it to avoid dead code. Suggested-by: Robin Murphy Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9983a01373b2..9ffa2e88f3d0 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -214,7 +214,6 @@ struct iommu_iotlb_gather { * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) * @get_resv_regions: Request list of reserved regions for a device * @put_resv_regions: Free list of reserved regions for a device - * @apply_resv_region: Temporary helper call-back for iova reserved ranges * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) @@ -268,9 +267,6 @@ struct iommu_ops { /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); void (*put_resv_regions)(struct device *dev, struct list_head *list); - void (*apply_resv_region)(struct device *dev, - struct iommu_domain *domain, - struct iommu_resv_region *region); int (*of_xlate)(struct device *dev, struct of_phandle_args *args); bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); -- cgit v1.2.3 From 3f6634d997dba8140b3a7cba01776b9638d70dff Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:47 +0800 Subject: iommu: Use right way to retrieve iommu_ops The common iommu_ops is hooked to both device and domain. When a helper has both device and domain pointer, the way to get the iommu_ops looks messy in iommu core. This sorts out the way to get iommu_ops. The device related helpers go through device pointer, while the domain related ones go through domain pointer. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9ffa2e88f3d0..90f1b5e3809d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -381,6 +381,17 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) }; } +static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) +{ + /* + * Assume that valid ops must be installed if iommu_probe_device() + * has succeeded. The device ops are essentially for internal use + * within the IOMMU subsystem itself, so we should be able to trust + * ourselves not to misuse the helper. + */ + return dev->iommu->iommu_dev->ops; +} + #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ -- cgit v1.2.3 From 41bb23e70b50b89b6137cbecd37009d782454860 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:48 +0800 Subject: iommu: Remove unused argument in is_attach_deferred The is_attach_deferred iommu_ops callback is a device op. The domain argument is unnecessary and never used. Remove it to make code clean. Suggested-by: Robin Murphy Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-9-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 90f1b5e3809d..1ded6076a75c 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -269,7 +269,7 @@ struct iommu_ops { void (*put_resv_regions)(struct device *dev, struct list_head *list); int (*of_xlate)(struct device *dev, struct of_phandle_args *args); - bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); + bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f); -- cgit v1.2.3 From 9a630a4b41a2639b65d024a5d2d88ed3ecca130a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:49 +0800 Subject: iommu: Split struct iommu_ops Move the domain specific operations out of struct iommu_ops into a new structure that only has domain specific operations. This solves the problem of needing to know if the method vector for a given operation needs to be retrieved from the device or the domain. Logically the domain ops are the ones that make sense for external subsystems and endpoint drivers to use, while device ops, with the sole exception of domain_alloc, are IOMMU API internals. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-10-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 91 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1ded6076a75c..9208eca4b0d1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -37,6 +37,7 @@ struct iommu_group; struct bus_type; struct device; struct iommu_domain; +struct iommu_domain_ops; struct notifier_block; struct iommu_sva; struct iommu_fault_event; @@ -88,7 +89,7 @@ struct iommu_domain_geometry { struct iommu_domain { unsigned type; - const struct iommu_ops *ops; + const struct iommu_domain_ops *ops; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ iommu_fault_handler_t handler; void *handler_token; @@ -192,26 +193,11 @@ struct iommu_iotlb_gather { * struct iommu_ops - iommu ops and capabilities * @capable: check capability * @domain_alloc: allocate iommu domain - * @domain_free: free iommu domain - * @attach_dev: attach device to an iommu domain - * @detach_dev: detach device from an iommu domain - * @map: map a physically contiguous memory region to an iommu domain - * @map_pages: map a physically contiguous set of pages of the same size to - * an iommu domain. - * @unmap: unmap a physically contiguous memory region from an iommu domain - * @unmap_pages: unmap a number of pages of the same size from an iommu domain - * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain - * @iotlb_sync_map: Sync mappings created recently using @map to the hardware - * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush - * queue - * @iova_to_phys: translate iova to physical address * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain * @device_group: find iommu group for a particular device - * @enable_nesting: Enable nesting - * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) * @get_resv_regions: Request list of reserved regions for a device * @put_resv_regions: Free list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping @@ -228,6 +214,7 @@ struct iommu_iotlb_gather { * - IOMMU_DOMAIN_IDENTITY: must use an identity domain * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting + * @default_domain_ops: the default ops for domains * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops */ @@ -236,33 +223,11 @@ struct iommu_ops { /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); - void (*domain_free)(struct iommu_domain *); - int (*attach_dev)(struct iommu_domain *domain, struct device *dev); - void (*detach_dev)(struct iommu_domain *domain, struct device *dev); - int (*map)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp); - int (*map_pages)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t pgsize, size_t pgcount, - int prot, gfp_t gfp, size_t *mapped); - size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *iotlb_gather); - size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, - size_t pgsize, size_t pgcount, - struct iommu_iotlb_gather *iotlb_gather); - void (*flush_iotlb_all)(struct iommu_domain *domain); - void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, - size_t size); - void (*iotlb_sync)(struct iommu_domain *domain, - struct iommu_iotlb_gather *iotlb_gather); - phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); - int (*enable_nesting)(struct iommu_domain *domain); - int (*set_pgtable_quirks)(struct iommu_domain *domain, - unsigned long quirks); /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); @@ -288,10 +253,60 @@ struct iommu_ops { int (*def_domain_type)(struct device *dev); + const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; }; +/** + * struct iommu_domain_ops - domain specific operations + * @attach_dev: attach an iommu domain to a device + * @detach_dev: detach an iommu domain from a device + * @map: map a physically contiguous memory region to an iommu domain + * @map_pages: map a physically contiguous set of pages of the same size to + * an iommu domain. + * @unmap: unmap a physically contiguous memory region from an iommu domain + * @unmap_pages: unmap a number of pages of the same size from an iommu domain + * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain + * @iotlb_sync_map: Sync mappings created recently using @map to the hardware + * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush + * queue + * @iova_to_phys: translate iova to physical address + * @enable_nesting: Enable nesting + * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) + * @free: Release the domain after use. + */ +struct iommu_domain_ops { + int (*attach_dev)(struct iommu_domain *domain, struct device *dev); + void (*detach_dev)(struct iommu_domain *domain, struct device *dev); + + int (*map)(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp); + int (*map_pages)(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped); + size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, + size_t size, struct iommu_iotlb_gather *iotlb_gather); + size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *iotlb_gather); + + void (*flush_iotlb_all)(struct iommu_domain *domain); + void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, + size_t size); + void (*iotlb_sync)(struct iommu_domain *domain, + struct iommu_iotlb_gather *iotlb_gather); + + phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, + dma_addr_t iova); + + int (*enable_nesting)(struct iommu_domain *domain); + int (*set_pgtable_quirks)(struct iommu_domain *domain, + unsigned long quirks); + + void (*free)(struct iommu_domain *domain); +}; + /** * struct iommu_device - IOMMU core representation of one IOMMU hardware * instance -- cgit v1.2.3 From 6bb477df04366e0f69dd2f49e1ae1099069326bc Mon Sep 17 00:00:00 2001 From: Yun Zhou Date: Thu, 17 Feb 2022 22:12:34 +0800 Subject: spi: use specific last_cs instead of last_cs_enable Commit d40f0b6f2e21 instroduced last_cs_enable to avoid setting chipselect if it's not necessary, but it also introduces a bug. The chipselect may not be set correctly on multi-device SPI busses. The reason is that we can't judge the chipselect by bool last_cs_enable, since chipselect may be modified after other devices were accessed. So we should record the specific state of chipselect in case of confusion. Signed-off-by: Yun Zhou Link: https://lore.kernel.org/r/20220217141234.72737-1-yun.zhou@windriver.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 579d71cdf6fa..7d005fa4631c 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -370,7 +370,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @cur_msg_prepared: spi_prepare_message was called for the currently * in-flight message * @cur_msg_mapped: message has been mapped for DMA - * @last_cs_enable: was enable true on the last call to set_cs. + * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip + * selected * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. * @xfer_completion: used by core transfer_one_message() * @busy: message pump is busy @@ -603,7 +604,7 @@ struct spi_controller { bool auto_runtime_pm; bool cur_msg_prepared; bool cur_msg_mapped; - bool last_cs_enable; + char last_cs; bool last_cs_mode_high; bool fallback; struct completion xfer_completion; -- cgit v1.2.3 From 20f01f163203666010ee1560852590a0c0572726 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 24 Jan 2022 13:59:38 -0800 Subject: blk-crypto: show crypto capabilities in sysfs Add sysfs files that expose the inline encryption capabilities of request queues: /sys/block/$disk/queue/crypto/max_dun_bits /sys/block/$disk/queue/crypto/modes/$mode /sys/block/$disk/queue/crypto/num_keyslots Userspace can use these new files to decide what encryption settings to use, or whether to use inline encryption at all. This also brings the crypto capabilities in line with the other queue properties, which are already discoverable via the queue directory in sysfs. Design notes: - Place the new files in a new subdirectory "crypto" to group them together and to avoid complicating the main "queue" directory. This also makes it possible to replace "crypto" with a symlink later if we ever make the blk_crypto_profiles into real kobjects (see below). - It was necessary to define a new kobject that corresponds to the crypto subdirectory. For now, this kobject just contains a pointer to the blk_crypto_profile. Note that multiple queues (and hence multiple such kobjects) may refer to the same blk_crypto_profile. An alternative design would more closely match the current kernel data structures: the blk_crypto_profile could be a kobject itself, located directly under the host controller device's kobject, while /sys/block/$disk/queue/crypto would be a symlink to it. I decided not to do that for now because it would require a lot more changes, such as no longer embedding blk_crypto_profile in other structures, and also because I'm not sure we can rule out moving the crypto capabilities into 'struct queue_limits' in the future. (Even if multiple queues share the same crypto engine, maybe the supported data unit sizes could differ due to other queue properties.) It would also still be possible to switch to that design later without breaking userspace, by replacing the directory with a symlink. - Use "max_dun_bits" instead of "max_dun_bytes". Currently, the kernel internally stores this value in bytes, but that's an implementation detail. It probably makes more sense to talk about this value in bits, and choosing bits is more future-proof. - "modes" is a sub-subdirectory, since there may be multiple supported crypto modes, sysfs is supposed to have one value per file, and it makes sense to group all the mode files together. - Each mode had to be named. The crypto API names like "xts(aes)" are not appropriate because they don't specify the key size. Therefore, I assigned new names. The exact names chosen are arbitrary, but they happen to match the names used in log messages in fs/crypto/. - The "num_keyslots" file is a bit different from the others in that it is only useful to know for performance reasons. However, it's included as it can still be useful. For example, a user might not want to use inline encryption if there aren't very many keyslots. Reviewed-by: Hannes Reinecke Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20220124215938.2769-4-ebiggers@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f757f9c2871f..e19947d84f12 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -413,6 +413,7 @@ struct request_queue { #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct blk_crypto_profile *crypto_profile; + struct kobject *crypto_kobject; #endif unsigned int rq_timeout; -- cgit v1.2.3 From 25d490eb46486e88c16e64d9eb7cfd33a642d596 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Sat, 18 Dec 2021 09:30:40 +0100 Subject: ARM: 9172/1: amba: Cleanup amba pclk operation There is no user about amba_pclk_[un]prepare() besides pl330.c, directly use clk_[un]prepare(). After this, all the function about amba pclk operation, enable, disable, [un]prepare could be killed. Acked-by: Vinod Koul Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) --- include/linux/amba/bus.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 6c7f47846971..09174970b855 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -121,26 +121,6 @@ struct amba_device *amba_find_device(const char *, struct device *, unsigned int int amba_request_regions(struct amba_device *, const char *); void amba_release_regions(struct amba_device *); -static inline int amba_pclk_enable(struct amba_device *dev) -{ - return clk_enable(dev->pclk); -} - -static inline void amba_pclk_disable(struct amba_device *dev) -{ - clk_disable(dev->pclk); -} - -static inline int amba_pclk_prepare(struct amba_device *dev) -{ - return clk_prepare(dev->pclk); -} - -static inline void amba_pclk_unprepare(struct amba_device *dev) -{ - clk_unprepare(dev->pclk); -} - /* Some drivers don't use the struct amba_device */ #define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff) #define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f) -- cgit v1.2.3 From dacf3ca134d0dc105caee77651a349a86bd77456 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Sat, 18 Dec 2021 09:30:41 +0100 Subject: ARM: 9173/1: amba: kill amba_find_match() There is no one use amba_find_match(), kill it. Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) --- include/linux/amba/bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 09174970b855..6562f543c3e0 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -117,7 +117,6 @@ void amba_device_put(struct amba_device *); int amba_device_add(struct amba_device *, struct resource *); int amba_device_register(struct amba_device *, struct resource *); void amba_device_unregister(struct amba_device *); -struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int); int amba_request_regions(struct amba_device *, const char *); void amba_release_regions(struct amba_device *); -- cgit v1.2.3 From 1a93b82c59ab45f2d9f14c47f09d2e341ff02381 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Feb 2022 07:07:08 -0500 Subject: NFS: constify nfs_server_capable() and nfs_have_writebacks() Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 72a732a5103c..6e10725887d1 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -363,7 +363,7 @@ static inline void nfs_mark_for_revalidate(struct inode *inode) spin_unlock(&inode->i_lock); } -static inline int nfs_server_capable(struct inode *inode, int cap) +static inline int nfs_server_capable(const struct inode *inode, int cap) { return NFS_SERVER(inode)->caps & cap; } @@ -587,12 +587,11 @@ extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); extern void nfs_commit_free(struct nfs_commit_data *data); bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); -static inline int -nfs_have_writebacks(struct inode *inode) +static inline bool nfs_have_writebacks(const struct inode *inode) { if (S_ISREG(inode->i_mode)) return atomic_long_read(&NFS_I(inode)->nrequests) != 0; - return 0; + return false; } /* -- cgit v1.2.3 From d07c9ad622474616e94572e59e725c2c4a494fb4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 11 Jan 2022 12:43:55 -0500 Subject: tracing: Introduce helpers to safely handle dynamic-sized sockaddrs Enable a struct sockaddr to be stored in a trace record as a dynamically-sized field. The common cases are AF_INET and AF_INET6 which are different sizes, and are vastly smaller than a struct sockaddr_storage. These are safer because, when used properly, the size of the sockaddr destination field in each trace record is now guaranteed to be the same as the source address that is being copied into it. Link: https://lore.kernel.org/all/164182978641.8391.8277203495236105391.stgit@bazille.1015granger.net/ Signed-off-by: Chuck Lever --- include/trace/bpf_probe.h | 6 +++++ include/trace/perf.h | 6 +++++ include/trace/trace_events.h | 55 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 65 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index 7660a7846586..6a13220d2d27 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -21,6 +21,9 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) +#undef __get_sockaddr +#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) + #undef __get_rel_dynamic_array #define __get_rel_dynamic_array(field) \ ((void *)(&__entry->__rel_loc_##field) + \ @@ -37,6 +40,9 @@ #undef __get_rel_bitmask #define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) +#undef __get_rel_sockaddr +#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) + #undef __perf_count #define __perf_count(c) (c) diff --git a/include/trace/perf.h b/include/trace/perf.h index 5d48c46a3008..5800d13146c3 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -21,6 +21,9 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) +#undef __get_sockaddr +#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) + #undef __get_rel_dynamic_array #define __get_rel_dynamic_array(field) \ ((void *)__entry + \ @@ -38,6 +41,9 @@ #undef __get_rel_bitmask #define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) +#undef __get_rel_sockaddr +#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) + #undef __perf_count #define __perf_count(c) (__count = (c)) diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 3d29919045af..7c86cc541c7a 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -108,6 +108,9 @@ TRACE_MAKE_SYSTEM_STR(); #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) + #undef __rel_dynamic_array #define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item; @@ -120,6 +123,9 @@ TRACE_MAKE_SYSTEM_STR(); #undef __rel_bitmask #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1) +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) + #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args @@ -212,11 +218,14 @@ TRACE_MAKE_SYSTEM_STR(); #undef __string #define __string(item, src) __dynamic_array(char, item, -1) +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) -#undef __string_len -#define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) #undef __rel_dynamic_array #define __rel_dynamic_array(type, item, len) u32 item; @@ -230,6 +239,9 @@ TRACE_MAKE_SYSTEM_STR(); #undef __rel_bitmask #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ struct trace_event_data_offsets_##call { \ @@ -349,6 +361,12 @@ TRACE_MAKE_SYSTEM_STR(); trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ }) +#undef __get_sockaddr +#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) + +#undef __get_rel_sockaddr +#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) + #undef __print_flags #define __print_flags(flag, delim, flag_array...) \ ({ \ @@ -518,6 +536,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) + #undef __rel_dynamic_array #define __rel_dynamic_array(_type, _item, _len) { \ .type = "__rel_loc " #_type "[]", .name = #_item, \ @@ -533,6 +554,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ #undef __rel_bitmask #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static struct trace_event_fields trace_event_fields_##call[] = { \ @@ -624,6 +648,12 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \ __bitmask_size_in_longs(nr_bits)) +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) + +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static inline notrace int trace_event_get_offsets_##call( \ @@ -788,6 +818,15 @@ static inline notrace int trace_event_get_offsets_##call( \ #define __assign_bitmask(dst, src, nr_bits) \ memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) + +#undef __get_sockaddr +#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) + +#define __assign_sockaddr(dest, src, len) \ + memcpy(__get_dynamic_array(dest), src, len) + #undef __rel_dynamic_array #define __rel_dynamic_array(type, item, len) \ __entry->__rel_loc_##item = __data_offsets.item; @@ -819,6 +858,16 @@ static inline notrace int trace_event_get_offsets_##call( \ #define __assign_rel_bitmask(dst, src, nr_bits) \ memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) + +#undef __get_rel_sockaddr +#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) + +#define __assign_rel_sockaddr(dest, src, len) \ + memcpy(__get_rel_dynamic_array(dest), src, len) + + #undef TP_fast_assign #define TP_fast_assign(args...) args @@ -883,10 +932,12 @@ static inline void ftrace_test_probe_##call(void) \ #undef __get_dynamic_array_len #undef __get_str #undef __get_bitmask +#undef __get_sockaddr #undef __get_rel_dynamic_array #undef __get_rel_dynamic_array_len #undef __get_rel_str #undef __get_rel_bitmask +#undef __get_rel_sockaddr #undef __print_array #undef __print_hex_dump -- cgit v1.2.3 From 26ce14e77a827fd73a650cffea4db7ddcc62ebc4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 24 Jan 2022 11:31:45 -0500 Subject: SUNRPC: Improve sockaddr handling in the svc_xprt_create_error trace point Clean up: Use the new __sockaddr field to record the socket address. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 29982d60b68a..39ed91dca5ab 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1774,18 +1774,18 @@ TRACE_EVENT(svc_xprt_create_err, __field(long, error) __string(program, program) __string(protocol, protocol) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __sockaddr(addr, salen) ), TP_fast_assign( __entry->error = PTR_ERR(xprt); __assign_str(program, program); __assign_str(protocol, protocol); - memcpy(__entry->addr, sap, min(salen, sizeof(__entry->addr))); + __assign_sockaddr(addr, sap, salen); ), TP_printk("addr=%pISpc program=%s protocol=%s error=%ld", - __entry->addr, __get_str(program), __get_str(protocol), + __get_sockaddr(addr), __get_str(program), __get_str(protocol), __entry->error) ); -- cgit v1.2.3 From aca3ed791553f1f9f994273a12b30c35b40f2769 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 19 Oct 2021 17:07:44 -0400 Subject: SUNRPC: Same as SVC_RQST_ENDPOINT, but without the xid Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 112 ++++++++++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 39ed91dca5ab..84f585319695 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1789,50 +1789,99 @@ TRACE_EVENT(svc_xprt_create_err, __entry->error) ); +#define SVC_XPRT_ENDPOINT_FIELDS(x) \ + __sockaddr(server, (x)->xpt_locallen) \ + __sockaddr(client, (x)->xpt_remotelen) \ + __field(unsigned long, flags) \ + __field(unsigned int, netns_ino) + +#define SVC_XPRT_ENDPOINT_ASSIGNMENTS(x) \ + do { \ + __assign_sockaddr(server, &(x)->xpt_local, \ + (x)->xpt_locallen); \ + __assign_sockaddr(client, &(x)->xpt_remote, \ + (x)->xpt_remotelen); \ + __entry->flags = (x)->xpt_flags; \ + __entry->netns_ino = (x)->xpt_net->ns.inum; \ + } while (0) + +#define SVC_XPRT_ENDPOINT_FORMAT \ + "server=%pISpc client=%pISpc flags=%s" + +#define SVC_XPRT_ENDPOINT_VARARGS \ + __get_sockaddr(server), __get_sockaddr(client), \ + show_svc_xprt_flags(__entry->flags) + TRACE_EVENT(svc_xprt_enqueue, - TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst), + TP_PROTO( + const struct svc_xprt *xprt, + const struct svc_rqst *rqst + ), TP_ARGS(xprt, rqst), TP_STRUCT__entry( + SVC_XPRT_ENDPOINT_FIELDS(xprt) + __field(int, pid) - __field(unsigned long, flags) - __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( + SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt); + __entry->pid = rqst? rqst->rq_task->pid : 0; - __entry->flags = xprt->xpt_flags; - __assign_str(addr, xprt->xpt_remotebuf); ), - TP_printk("addr=%s pid=%d flags=%s", __get_str(addr), - __entry->pid, show_svc_xprt_flags(__entry->flags)) + TP_printk(SVC_XPRT_ENDPOINT_FORMAT " pid=%d", + SVC_XPRT_ENDPOINT_VARARGS, __entry->pid) +); + +TRACE_EVENT(svc_xprt_dequeue, + TP_PROTO( + const struct svc_rqst *rqst + ), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + SVC_XPRT_ENDPOINT_FIELDS(rqst->rq_xprt) + + __field(unsigned long, wakeup) + ), + + TP_fast_assign( + SVC_XPRT_ENDPOINT_ASSIGNMENTS(rqst->rq_xprt); + + __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(), + rqst->rq_qtime)); + ), + + TP_printk(SVC_XPRT_ENDPOINT_FORMAT " wakeup-us=%lu", + SVC_XPRT_ENDPOINT_VARARGS, __entry->wakeup) ); DECLARE_EVENT_CLASS(svc_xprt_event, - TP_PROTO(struct svc_xprt *xprt), + TP_PROTO( + const struct svc_xprt *xprt + ), TP_ARGS(xprt), TP_STRUCT__entry( - __field(unsigned long, flags) - __string(addr, xprt->xpt_remotebuf) + SVC_XPRT_ENDPOINT_FIELDS(xprt) ), TP_fast_assign( - __entry->flags = xprt->xpt_flags; - __assign_str(addr, xprt->xpt_remotebuf); + SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt); ), - TP_printk("addr=%s flags=%s", __get_str(addr), - show_svc_xprt_flags(__entry->flags)) + TP_printk(SVC_XPRT_ENDPOINT_FORMAT, SVC_XPRT_ENDPOINT_VARARGS) ); #define DEFINE_SVC_XPRT_EVENT(name) \ DEFINE_EVENT(svc_xprt_event, svc_xprt_##name, \ TP_PROTO( \ - struct svc_xprt *xprt \ + const struct svc_xprt *xprt \ ), \ TP_ARGS(xprt)) @@ -1850,44 +1899,25 @@ TRACE_EVENT(svc_xprt_accept, TP_ARGS(xprt, service), TP_STRUCT__entry( - __string(addr, xprt->xpt_remotebuf) + SVC_XPRT_ENDPOINT_FIELDS(xprt) + __string(protocol, xprt->xpt_class->xcl_name) __string(service, service) ), TP_fast_assign( - __assign_str(addr, xprt->xpt_remotebuf); + SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt); + __assign_str(protocol, xprt->xpt_class->xcl_name); __assign_str(service, service); ), - TP_printk("addr=%s protocol=%s service=%s", - __get_str(addr), __get_str(protocol), __get_str(service) + TP_printk(SVC_XPRT_ENDPOINT_FORMAT " protocol=%s service=%s", + SVC_XPRT_ENDPOINT_VARARGS, + __get_str(protocol), __get_str(service) ) ); -TRACE_EVENT(svc_xprt_dequeue, - TP_PROTO(struct svc_rqst *rqst), - - TP_ARGS(rqst), - - TP_STRUCT__entry( - __field(unsigned long, flags) - __field(unsigned long, wakeup) - __string(addr, rqst->rq_xprt->xpt_remotebuf) - ), - - TP_fast_assign( - __entry->flags = rqst->rq_xprt->xpt_flags; - __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(), - rqst->rq_qtime)); - __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); - ), - - TP_printk("addr=%s flags=%s wakeup-us=%lu", __get_str(addr), - show_svc_xprt_flags(__entry->flags), __entry->wakeup) -); - TRACE_EVENT(svc_wake_up, TP_PROTO(int pid), -- cgit v1.2.3 From 70a60cbfb613d8f6ffd1d9ade187d0a868066500 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 19 Oct 2021 13:47:24 -0400 Subject: SUNRPC: Record endpoint information in trace log To make server-side trace events more useful in container-ized environments, capture not just the remote's IP address, but the local IP address and network namespace as well. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 126 ++++++++++++++++++++++++++---------------- 1 file changed, 78 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 84f585319695..ab8ae1f6ba84 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1625,26 +1625,53 @@ TRACE_DEFINE_ENUM(SVC_COMPLETE); { SVC_PENDING, "SVC_PENDING" }, \ { SVC_COMPLETE, "SVC_COMPLETE" }) +#define SVC_RQST_ENDPOINT_FIELDS(r) \ + __sockaddr(server, (r)->rq_xprt->xpt_locallen) \ + __sockaddr(client, (r)->rq_xprt->xpt_remotelen) \ + __field(unsigned int, netns_ino) \ + __field(u32, xid) + +#define SVC_RQST_ENDPOINT_ASSIGNMENTS(r) \ + do { \ + struct svc_xprt *xprt = (r)->rq_xprt; \ + __assign_sockaddr(server, &xprt->xpt_local, \ + xprt->xpt_locallen); \ + __assign_sockaddr(client, &xprt->xpt_remote, \ + xprt->xpt_remotelen); \ + __entry->netns_ino = xprt->xpt_net->ns.inum; \ + __entry->xid = be32_to_cpu((r)->rq_xid); \ + } while (0) + +#define SVC_RQST_ENDPOINT_FORMAT \ + "xid=0x%08x server=%pISpc client=%pISpc" + +#define SVC_RQST_ENDPOINT_VARARGS \ + __entry->xid, __get_sockaddr(server), __get_sockaddr(client) + TRACE_EVENT(svc_authenticate, TP_PROTO(const struct svc_rqst *rqst, int auth_res), TP_ARGS(rqst, auth_res), TP_STRUCT__entry( - __field(u32, xid) + SVC_RQST_ENDPOINT_FIELDS(rqst) + __field(unsigned long, svc_status) __field(unsigned long, auth_stat) ), TP_fast_assign( - __entry->xid = be32_to_cpu(rqst->rq_xid); + SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst); + __entry->svc_status = auth_res; __entry->auth_stat = be32_to_cpu(rqst->rq_auth_stat); ), - TP_printk("xid=0x%08x auth_res=%s auth_stat=%s", - __entry->xid, svc_show_status(__entry->svc_status), - rpc_show_auth_stat(__entry->auth_stat)) + TP_printk(SVC_RQST_ENDPOINT_FORMAT + " auth_res=%s auth_stat=%s", + SVC_RQST_ENDPOINT_VARARGS, + svc_show_status(__entry->svc_status), + rpc_show_auth_stat(__entry->auth_stat)) ); TRACE_EVENT(svc_process, @@ -1680,7 +1707,6 @@ TRACE_EVENT(svc_process, ); DECLARE_EVENT_CLASS(svc_rqst_event, - TP_PROTO( const struct svc_rqst *rqst ), @@ -1688,20 +1714,20 @@ DECLARE_EVENT_CLASS(svc_rqst_event, TP_ARGS(rqst), TP_STRUCT__entry( - __field(u32, xid) + SVC_RQST_ENDPOINT_FIELDS(rqst) + __field(unsigned long, flags) - __string(addr, rqst->rq_xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xid = be32_to_cpu(rqst->rq_xid); + SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst); + __entry->flags = rqst->rq_flags; - __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); ), - TP_printk("addr=%s xid=0x%08x flags=%s", - __get_str(addr), __entry->xid, - show_rqstp_flags(__entry->flags)) + TP_printk(SVC_RQST_ENDPOINT_FORMAT " flags=%s", + SVC_RQST_ENDPOINT_VARARGS, + show_rqstp_flags(__entry->flags)) ); #define DEFINE_SVC_RQST_EVENT(name) \ DEFINE_EVENT(svc_rqst_event, svc_##name, \ @@ -1714,34 +1740,63 @@ DEFINE_SVC_RQST_EVENT(defer); DEFINE_SVC_RQST_EVENT(drop); DECLARE_EVENT_CLASS(svc_rqst_status, - - TP_PROTO(struct svc_rqst *rqst, int status), + TP_PROTO( + const struct svc_rqst *rqst, + int status + ), TP_ARGS(rqst, status), TP_STRUCT__entry( - __field(u32, xid) + SVC_RQST_ENDPOINT_FIELDS(rqst) + __field(int, status) __field(unsigned long, flags) - __string(addr, rqst->rq_xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xid = be32_to_cpu(rqst->rq_xid); + SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst); + __entry->status = status; __entry->flags = rqst->rq_flags; - __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); ), - TP_printk("addr=%s xid=0x%08x status=%d flags=%s", - __get_str(addr), __entry->xid, - __entry->status, show_rqstp_flags(__entry->flags)) + TP_printk(SVC_RQST_ENDPOINT_FORMAT " status=%d flags=%s", + SVC_RQST_ENDPOINT_VARARGS, + __entry->status, show_rqstp_flags(__entry->flags)) ); DEFINE_EVENT(svc_rqst_status, svc_send, - TP_PROTO(struct svc_rqst *rqst, int status), + TP_PROTO(const struct svc_rqst *rqst, int status), TP_ARGS(rqst, status)); +TRACE_EVENT(svc_stats_latency, + TP_PROTO( + const struct svc_rqst *rqst + ), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + SVC_RQST_ENDPOINT_FIELDS(rqst) + + __field(unsigned long, execute) + __string(procedure, svc_proc_name(rqst)) + ), + + TP_fast_assign( + SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst); + + __entry->execute = ktime_to_us(ktime_sub(ktime_get(), + rqst->rq_stime)); + __assign_str(procedure, svc_proc_name(rqst)); + ), + + TP_printk(SVC_RQST_ENDPOINT_FORMAT " proc=%s execute-us=%lu", + SVC_RQST_ENDPOINT_VARARGS, + __get_str(procedure), __entry->execute) +); + #define show_svc_xprt_flags(flags) \ __print_flags(flags, "|", \ { (1UL << XPT_BUSY), "XPT_BUSY"}, \ @@ -1952,31 +2007,6 @@ TRACE_EVENT(svc_alloc_arg_err, TP_printk("pages=%u", __entry->pages) ); -TRACE_EVENT(svc_stats_latency, - TP_PROTO(const struct svc_rqst *rqst), - - TP_ARGS(rqst), - - TP_STRUCT__entry( - __field(u32, xid) - __field(unsigned long, execute) - __string(procedure, svc_proc_name(rqst)) - __string(addr, rqst->rq_xprt->xpt_remotebuf) - ), - - TP_fast_assign( - __entry->xid = be32_to_cpu(rqst->rq_xid); - __entry->execute = ktime_to_us(ktime_sub(ktime_get(), - rqst->rq_stime)); - __assign_str(procedure, svc_proc_name(rqst)); - __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); - ), - - TP_printk("addr=%s xid=0x%08x proc=%s execute-us=%lu", - __get_str(addr), __entry->xid, __get_str(procedure), - __entry->execute) -); - DECLARE_EVENT_CLASS(svc_deferred_event, TP_PROTO( const struct svc_deferred_req *dr -- cgit v1.2.3 From a9ff2e99e9fa501ec965da03c18a5422b37a2f44 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 25 Jan 2022 10:17:59 -0500 Subject: SUNRPC: Remove the .svo_enqueue_xprt method We have never been able to track down and address the underlying cause of the performance issues with workqueue-based service support. svo_enqueue_xprt is called multiple times per RPC, so it adds instruction path length, but always ends up at the same function: svc_xprt_do_enqueue(). We do not anticipate needing this flexibility for dynamic nfsd thread management support. As a micro-optimization, remove .svo_enqueue_xprt because Spectre/Meltdown makes virtual function calls more costly. This change essentially reverts commit b9e13cdfac70 ("nfsd/sunrpc: turn enqueueing a svc_xprt into a svc_serv operation"). Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 3 --- include/linux/sunrpc/svc_xprt.h | 1 - 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index f35c22b3355f..6ef9c1cafd0b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -61,9 +61,6 @@ struct svc_serv_ops { /* function for service threads to run */ int (*svo_function)(void *); - /* queue up a transport for servicing */ - void (*svo_enqueue_xprt)(struct svc_xprt *); - /* optional module to count when adding threads. * Thread function must call module_put_and_kthread_exit() to exit. */ diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 571f605bc91e..a3ba027fb4ba 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -131,7 +131,6 @@ int svc_create_xprt(struct svc_serv *, const char *, struct net *, const int, const unsigned short, int, const struct cred *); void svc_xprt_received(struct svc_xprt *xprt); -void svc_xprt_do_enqueue(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); -- cgit v1.2.3 From 87cdd8641c8a1ec6afd2468265e20840a57fd888 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 25 Jan 2022 13:49:29 -0500 Subject: SUNRPC: Remove svo_shutdown method Clean up. Neil observed that "any code that calls svc_shutdown_net() knows what the shutdown function should be, and so can call it directly." Signed-off-by: Chuck Lever Reviewed-by: NeilBrown --- include/linux/sunrpc/svc.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 6ef9c1cafd0b..63794d772eb3 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -55,9 +55,6 @@ struct svc_pool { struct svc_serv; struct svc_serv_ops { - /* Callback to use when last thread exits. */ - void (*svo_shutdown)(struct svc_serv *, struct net *); - /* function for service threads to run */ int (*svo_function)(void *); -- cgit v1.2.3 From 352ad31448fecc78a2e9b78da64eea5d63b8d0ce Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 26 Jan 2022 11:42:08 -0500 Subject: SUNRPC: Rename svc_create_xprt() Clean up: Use the "svc_xprt_" function naming convention as is used for other external APIs. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index a3ba027fb4ba..a7f6f17c3dc5 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -127,9 +127,10 @@ int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); -int svc_create_xprt(struct svc_serv *, const char *, struct net *, - const int, const unsigned short, int, - const struct cred *); +int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, + struct net *net, const int family, + const unsigned short port, int flags, + const struct cred *cred); void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); -- cgit v1.2.3 From 4355d767a21b9445958fc11bce9a9701f76529d3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 31 Jan 2022 13:34:29 -0500 Subject: SUNRPC: Rename svc_close_xprt() Clean up: Use the "svc_xprt_" function naming convention as is used for other external APIs. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index a7f6f17c3dc5..bf7d029fb48c 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -135,7 +135,7 @@ void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); -void svc_close_xprt(struct svc_xprt *xprt); +void svc_xprt_close(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, -- cgit v1.2.3 From c7d7ec8f043e53ad16e30f5ebb8b9df415ec0f2b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 26 Jan 2022 11:30:55 -0500 Subject: SUNRPC: Remove svc_shutdown_net() Clean up: svc_shutdown_net() now does nothing but call svc_close_net(). Replace all external call sites. svc_close_net() is renamed to be the inverse of svc_xprt_create(). Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 1 - include/linux/sunrpc/svc_xprt.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 63794d772eb3..5603158b2aa7 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -508,7 +508,6 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, const struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); -void svc_shutdown_net(struct svc_serv *, struct net *); int svc_process(struct svc_rqst *); int bc_svc_process(struct svc_serv *, struct rpc_rqst *, struct svc_rqst *); diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index bf7d029fb48c..42e113742429 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -131,6 +131,7 @@ int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred); +void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net); void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); -- cgit v1.2.3 From f49169c97fceb21ad6a0aaf671c50b0f520f15a5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 16 Feb 2022 12:31:09 -0500 Subject: NFSD: Remove svc_serv_ops::svo_module struct svc_serv_ops is about to be removed. Neil Brown says: > I suspect svo_module can go as well - I don't think the thread is > ever the thing that primarily keeps a module active. A random sample of kthread_create() callers shows sunrpc is the only one that manages module reference count in this way. Suggested-by: Neil Brown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 5603158b2aa7..dfc9283f412f 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -57,11 +57,6 @@ struct svc_serv; struct svc_serv_ops { /* function for service threads to run */ int (*svo_function)(void *); - - /* optional module to count when adding threads. - * Thread function must call module_put_and_kthread_exit() to exit. - */ - struct module *svo_module; }; /* -- cgit v1.2.3 From 37902c6313090235c847af89c5515591261ee338 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 16 Feb 2022 12:16:27 -0500 Subject: NFSD: Move svc_serv_ops::svo_function into struct svc_serv Hoist svo_function back into svc_serv and remove struct svc_serv_ops, since the struct is now devoid of fields. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dfc9283f412f..a5dda4987e8b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -52,13 +52,6 @@ struct svc_pool { unsigned long sp_flags; } ____cacheline_aligned_in_smp; -struct svc_serv; - -struct svc_serv_ops { - /* function for service threads to run */ - int (*svo_function)(void *); -}; - /* * RPC service. * @@ -91,7 +84,8 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ - const struct svc_serv_ops *sv_ops; /* server operations */ + int (*sv_threadfn)(void *data); + #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same @@ -492,7 +486,7 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, - const struct svc_serv_ops *); + int (*threadfn)(void *data)); struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node); void svc_rqst_replace_page(struct svc_rqst *rqstp, @@ -500,7 +494,7 @@ void svc_rqst_replace_page(struct svc_rqst *rqstp, void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - const struct svc_serv_ops *); + int (*threadfn)(void *data)); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); int svc_process(struct svc_rqst *); -- cgit v1.2.3 From 74aaf96feaca80285912cc6f19575b3e97177918 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Feb 2022 13:10:52 -0500 Subject: SUNRPC: Teach server to recognize RPC_AUTH_TLS Initial support for the RPC_AUTH_TLS authentication flavor enables NFSD to eventually accept an RPC_AUTH_TLS probe from clients. This patch simply prevents NFSD from rejecting these probes completely. In the meantime, graft this support in now so that RPC_AUTH_TLS support keeps up with generic code and API changes in the RPC server. Down the road, server-side transport implementations will populate xpo_start_tls when they can support RPC-with-TLS. For example, TCP will eventually populate it, but RDMA won't. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 42e113742429..20068ccfd0cc 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -28,6 +28,7 @@ struct svc_xprt_ops { void (*xpo_free)(struct svc_xprt *); void (*xpo_secure_port)(struct svc_rqst *rqstp); void (*xpo_kill_temp_xprt)(struct svc_xprt *); + void (*xpo_start_tls)(struct svc_xprt *); }; struct svc_xprt_class { -- cgit v1.2.3 From 797bd4d41c8b41afc10fad39146ac9558cb39e94 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 24 Feb 2022 10:55:54 +0100 Subject: tty: serial: define UART_LCR_WLEN() macro Define a generic UART_LCR_WLEN() macro with a size argument. It can be used to encode byte size into an LCR value. Therefore we can use it to simplify the drivers using tty_get_char_size() in the next patches. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220224095558.30929-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/serial.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/serial.h b/include/linux/serial.h index 0916107c77f9..0b8b7d7c8f33 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -12,6 +12,8 @@ #include #include +/* Helper for dealing with UART_LCR_WLEN* defines */ +#define UART_LCR_WLEN(x) ((x) - 5) /* * Counters of the input lines (CTS, DSR, RI, CD) interrupts -- cgit v1.2.3 From 17a8f31bba7bac8cce4bd12bab50697da96e7710 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 28 Feb 2022 04:18:05 +0100 Subject: netfilter: egress: silence egress hook lockdep splats Netfilter assumes its called with rcu_read_lock held, but in egress hook case it may be called with BH readlock. This triggers lockdep splat. In order to avoid to change all rcu_dereference() to rcu_dereference_check(..., rcu_read_lock_bh_held()), wrap nf_hook_slow with read lock/unlock pair. Reported-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_netdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index b4dd96e4dc8d..e6487a691136 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -101,7 +101,11 @@ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, nf_hook_state_init(&state, NF_NETDEV_EGRESS, NFPROTO_NETDEV, dev, NULL, NULL, dev_net(dev), NULL); + + /* nf assumes rcu_read_lock, not just read_lock_bh */ + rcu_read_lock(); ret = nf_hook_slow(skb, &state, e, 0); + rcu_read_unlock(); if (ret == 1) { return skb; -- cgit v1.2.3 From dcfd5192563909219f6304b4e3e10db071158eef Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 15 Feb 2022 13:46:51 -0500 Subject: soc: mediatek: mtk-infracfg: Disable ACP on MT8192 MT8192 contains an experimental Accelerator Coherency Port implementation, which does not work correctly but was unintentionally enabled by default. For correct operation of the GPU, we must set a chicken bit disabling ACP on MT8192. Adapted from the following downstream change to the out-of-tree, legacy Mali GPU driver: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/2781271/5 Note this change is required for both Panfrost and the legacy kernel driver. Co-developed-by: Robin Murphy Signed-off-by: Robin Murphy Signed-off-by: Alyssa Rosenzweig Cc: Nick Fan Cc: Nicolas Boichat Cc: Chen-Yu Tsai Cc: Stephen Boyd Cc: AngeloGioacchino Del Regno Tested-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220215184651.12168-1-alyssa.rosenzweig@collabora.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/infracfg.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 8a1c2040a28e..50804ac748bd 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -277,6 +277,9 @@ #define INFRA_TOPAXI_PROTECTEN_SET 0x0260 #define INFRA_TOPAXI_PROTECTEN_CLR 0x0264 +#define MT8192_INFRA_CTRL 0x290 +#define MT8192_INFRA_CTRL_DISABLE_MFG2ACP BIT(9) + #define REG_INFRA_MISC 0xf00 #define F_DDR_4GB_SUPPORT_EN BIT(13) -- cgit v1.2.3 From cb66b9ba5cdacab5592bce31a4083fc4ac172ea5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 28 Feb 2022 22:58:28 -0800 Subject: Input: add input_copy_abs() function Add a new helper function to copy absinfo from one input_dev to another input_dev. This is useful to e.g. setup a pen/stylus input-device for combined touchscreen/pen hardware where the pen uses the same coordinates as the touchscreen. Suggested-by: Dmitry Torokhov Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220131143539.109142-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index 0354b298d874..49790c1bd2c4 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -475,6 +475,8 @@ static inline void input_set_events_per_packet(struct input_dev *dev, int n_even void input_alloc_absinfo(struct input_dev *dev); void input_set_abs_params(struct input_dev *dev, unsigned int axis, int min, int max, int fuzz, int flat); +void input_copy_abs(struct input_dev *dst, unsigned int dst_axis, + const struct input_dev *src, unsigned int src_axis); #define INPUT_GENERATE_ABS_ACCESSORS(_suffix, _item) \ static inline int input_abs_get_##_suffix(struct input_dev *dev, \ -- cgit v1.2.3 From 7b8135f4df98b155b23754b6065c157861e268f1 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 1 Mar 2022 05:04:34 +0000 Subject: rtnetlink: add new rtm tunnel api for tunnel id filtering This patch adds new rtm tunnel msg and api for tunnel id filtering in dst_metadata devices. First dst_metadata device to use the api is vxlan driver with AF_BRIDGE family. This and later changes add ability in vxlan driver to do tunnel id filtering (or vni filtering) on dst_metadata devices. This is similar to vlan api in the vlan filtering bridge. this patch includes selinux nlmsg_route_perms support for RTM_*TUNNEL api from Benjamin Poirier. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 26 ++++++++++++++++++++++++++ include/uapi/linux/rtnetlink.h | 9 +++++++++ 2 files changed, 35 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index be09d2ad4b5d..3dfc9ff2ec9b 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -713,7 +713,32 @@ enum ipvlan_mode { #define IPVLAN_F_PRIVATE 0x01 #define IPVLAN_F_VEPA 0x02 +/* Tunnel RTM header */ +struct tunnel_msg { + __u8 family; + __u8 reserved1; + __u16 reserved2; + __u32 ifindex; +}; + /* VXLAN section */ +enum { + VXLAN_VNIFILTER_ENTRY_UNSPEC, + VXLAN_VNIFILTER_ENTRY_START, + VXLAN_VNIFILTER_ENTRY_END, + VXLAN_VNIFILTER_ENTRY_GROUP, + VXLAN_VNIFILTER_ENTRY_GROUP6, + __VXLAN_VNIFILTER_ENTRY_MAX +}; +#define VXLAN_VNIFILTER_ENTRY_MAX (__VXLAN_VNIFILTER_ENTRY_MAX - 1) + +enum { + VXLAN_VNIFILTER_UNSPEC, + VXLAN_VNIFILTER_ENTRY, + __VXLAN_VNIFILTER_MAX +}; +#define VXLAN_VNIFILTER_MAX (__VXLAN_VNIFILTER_MAX - 1) + enum { IFLA_VXLAN_UNSPEC, IFLA_VXLAN_ID, @@ -745,6 +770,7 @@ enum { IFLA_VXLAN_GPE, IFLA_VXLAN_TTL_INHERIT, IFLA_VXLAN_DF, + IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 93d934cc4613..0970cb4b1b88 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -185,6 +185,13 @@ enum { RTM_GETNEXTHOPBUCKET, #define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET + RTM_NEWTUNNEL = 120, +#define RTM_NEWTUNNEL RTM_NEWTUNNEL + RTM_DELTUNNEL, +#define RTM_DELTUNNEL RTM_DELTUNNEL + RTM_GETTUNNEL, +#define RTM_GETTUNNEL RTM_GETTUNNEL + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; @@ -756,6 +763,8 @@ enum rtnetlink_groups { #define RTNLGRP_BRVLAN RTNLGRP_BRVLAN RTNLGRP_MCTP_IFADDR, #define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR + RTNLGRP_TUNNEL, +#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) -- cgit v1.2.3 From f9c4bb0b245cee35ef66f75bf409c9573d934cf9 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 1 Mar 2022 05:04:36 +0000 Subject: vxlan: vni filtering support on collect metadata device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds vnifiltering support to collect metadata device. Motivation: You can only use a single vxlan collect metadata device for a given vxlan udp port in the system today. The vxlan collect metadata device terminates all received vxlan packets. As shown in the below diagram, there are use-cases where you need to support multiple such vxlan devices in independent bridge domains. Each vxlan device must terminate the vni's it is configured for. Example usecase: In a service provider network a service provider typically supports multiple bridge domains with overlapping vlans. One bridge domain per customer. Vlans in each bridge domain are mapped to globally unique vxlan ranges assigned to each customer. vnifiltering support in collect metadata devices terminates only configured vnis. This is similar to vlan filtering in bridge driver. The vni filtering capability is provided by a new flag on collect metadata device. In the below pic: - customer1 is mapped to br1 bridge domain - customer2 is mapped to br2 bridge domain - customer1 vlan 10-11 is mapped to vni 1001-1002 - customer2 vlan 10-11 is mapped to vni 2001-2002 - br1 and br2 are vlan filtering bridges - vxlan1 and vxlan2 are collect metadata devices with vnifiltering enabled ┌──────────────────────────────────────────────────────────────────┐ │ switch │ │ │ │ ┌───────────┐ ┌───────────┐ │ │ │ │ │ │ │ │ │ br1 │ │ br2 │ │ │ └┬─────────┬┘ └──┬───────┬┘ │ │ vlans│ │ vlans │ │ │ │ 10,11│ │ 10,11│ │ │ │ │ vlanvnimap: │ vlanvnimap: │ │ │ 10-1001,11-1002 │ 10-2001,11-2002 │ │ │ │ │ │ │ │ ┌──────┴┐ ┌──┴─────────┐ ┌───┴────┐ │ │ │ │ swp1 │ │vxlan1 │ │ swp2 │ ┌┴─────────────┐ │ │ │ │ │ vnifilter:│ │ │ │vxlan2 │ │ │ └───┬───┘ │ 1001,1002│ └───┬────┘ │ vnifilter: │ │ │ │ └────────────┘ │ │ 2001,2002 │ │ │ │ │ └──────────────┘ │ │ │ │ │ └───────┼──────────────────────────────────┼───────────────────────┘ │ │ │ │ ┌─────┴───────┐ │ │ customer1 │ ┌─────┴──────┐ │ host/VM │ │customer2 │ └─────────────┘ │ host/VM │ └────────────┘ With this implementation, vxlan dst metadata device can be associated with range of vnis. struct vxlan_vni_node is introduced to represent a configured vni. We start with vni and its associated remote_ip in this structure. This structure can be extended to bring in other per vni attributes if there are usecases for it. A vni inherits an attribute from the base vxlan device if there is no per vni attributes defined. struct vxlan_dev gets a new rhashtable for vnis called vxlan_vni_group. vxlan_vnifilter.c implements the necessary netlink api, notifications and helper functions to process and manage lifecycle of vxlan_vni_node. This patch also adds new helper functions in vxlan_multicast.c to handle per vni remote_ip multicast groups which are part of vxlan_vni_group. Fix build problems: Reported-by: kernel test robot Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/net/vxlan.h | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 5a934bebe630..8eb961bb9589 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -232,6 +232,25 @@ struct vxlan_dev_node { struct vxlan_dev *vxlan; }; +struct vxlan_vni_node { + struct rhash_head vnode; + struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ +#if IS_ENABLED(CONFIG_IPV6) + struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */ +#endif + struct list_head vlist; + __be32 vni; + union vxlan_addr remote_ip; /* default remote ip for this vni */ + + struct rcu_head rcu; +}; + +struct vxlan_vni_group { + struct rhashtable vni_hash; + struct list_head vni_list; + u32 num_vnis; +}; + /* Pseudo network device */ struct vxlan_dev { struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ @@ -254,6 +273,8 @@ struct vxlan_dev { struct vxlan_config cfg; + struct vxlan_vni_group __rcu *vnigrp; + struct hlist_head fdb_head[FDB_HASH_SIZE]; }; @@ -274,6 +295,7 @@ struct vxlan_dev { #define VXLAN_F_GPE 0x4000 #define VXLAN_F_IPV6_LINKLOCAL 0x8000 #define VXLAN_F_TTL_INHERIT 0x10000 +#define VXLAN_F_VNIFILTER 0x20000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable @@ -283,7 +305,8 @@ struct vxlan_dev { VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_REMCSUM_RX | \ VXLAN_F_REMCSUM_NOPARTIAL | \ - VXLAN_F_COLLECT_METADATA) + VXLAN_F_COLLECT_METADATA | \ + VXLAN_F_VNIFILTER) /* Flags that can be set together with VXLAN_F_GPE. */ #define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ @@ -292,7 +315,8 @@ struct vxlan_dev { VXLAN_F_UDP_ZERO_CSUM_TX | \ VXLAN_F_UDP_ZERO_CSUM6_TX | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ - VXLAN_F_COLLECT_METADATA) + VXLAN_F_COLLECT_METADATA | \ + VXLAN_F_VNIFILTER) struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); -- cgit v1.2.3 From 4095e0e1328a3cd9e3b30174d6cb0edb3824256d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 1 Mar 2022 05:04:38 +0000 Subject: drivers: vxlan: vnifilter: per vni stats Add per-vni statistics for vni filter mode. Counting Rx/Tx bytes/packets/drops/errors at the appropriate places. This patch changes vxlan_vs_find_vni to also return the vxlan_vni_node in cases where the vni belongs to a vni filtering vxlan device Signed-off-by: Nikolay Aleksandrov Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/net/vxlan.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 8eb961bb9589..bca5b01af247 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -227,6 +227,31 @@ struct vxlan_config { enum ifla_vxlan_df df; }; +enum { + VXLAN_VNI_STATS_RX, + VXLAN_VNI_STATS_RX_DROPS, + VXLAN_VNI_STATS_RX_ERRORS, + VXLAN_VNI_STATS_TX, + VXLAN_VNI_STATS_TX_DROPS, + VXLAN_VNI_STATS_TX_ERRORS, +}; + +struct vxlan_vni_stats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_drops; + u64 rx_errors; + u64 tx_packets; + u64 tx_bytes; + u64 tx_drops; + u64 tx_errors; +}; + +struct vxlan_vni_stats_pcpu { + struct vxlan_vni_stats stats; + struct u64_stats_sync syncp; +}; + struct vxlan_dev_node { struct hlist_node hlist; struct vxlan_dev *vxlan; @@ -241,6 +266,7 @@ struct vxlan_vni_node { struct list_head vlist; __be32 vni; union vxlan_addr remote_ip; /* default remote ip for this vni */ + struct vxlan_vni_stats_pcpu __percpu *stats; struct rcu_head rcu; }; -- cgit v1.2.3 From 445b2f36bb4efb81f064e931f28b9ec19f114355 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 1 Mar 2022 05:04:39 +0000 Subject: drivers: vxlan: vnifilter: add support for stats dumping Add support for VXLAN vni filter entries' stats dumping Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 3dfc9ff2ec9b..e315e53125f4 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -716,18 +716,41 @@ enum ipvlan_mode { /* Tunnel RTM header */ struct tunnel_msg { __u8 family; - __u8 reserved1; + __u8 flags; __u16 reserved2; __u32 ifindex; }; /* VXLAN section */ + +/* include statistics in the dump */ +#define TUNNEL_MSG_FLAG_STATS 0x01 + +#define TUNNEL_MSG_VALID_USER_FLAGS TUNNEL_MSG_FLAG_STATS + +/* Embedded inside VXLAN_VNIFILTER_ENTRY_STATS */ +enum { + VNIFILTER_ENTRY_STATS_UNSPEC, + VNIFILTER_ENTRY_STATS_RX_BYTES, + VNIFILTER_ENTRY_STATS_RX_PKTS, + VNIFILTER_ENTRY_STATS_RX_DROPS, + VNIFILTER_ENTRY_STATS_RX_ERRORS, + VNIFILTER_ENTRY_STATS_TX_BYTES, + VNIFILTER_ENTRY_STATS_TX_PKTS, + VNIFILTER_ENTRY_STATS_TX_DROPS, + VNIFILTER_ENTRY_STATS_TX_ERRORS, + VNIFILTER_ENTRY_STATS_PAD, + __VNIFILTER_ENTRY_STATS_MAX +}; +#define VNIFILTER_ENTRY_STATS_MAX (__VNIFILTER_ENTRY_STATS_MAX - 1) + enum { VXLAN_VNIFILTER_ENTRY_UNSPEC, VXLAN_VNIFILTER_ENTRY_START, VXLAN_VNIFILTER_ENTRY_END, VXLAN_VNIFILTER_ENTRY_GROUP, VXLAN_VNIFILTER_ENTRY_GROUP6, + VXLAN_VNIFILTER_ENTRY_STATS, __VXLAN_VNIFILTER_ENTRY_MAX }; #define VXLAN_VNIFILTER_ENTRY_MAX (__VXLAN_VNIFILTER_ENTRY_MAX - 1) -- cgit v1.2.3 From 50bb467c9e76743fbc8441d29113cdad62dbc4fe Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 18 Feb 2022 09:38:58 +0000 Subject: rfkill: define rfill_soft_blocked() if !RFKILL If CONFIG_RFKILL is not set, the Intel WiFi driver will not build the iw_mvm driver part due to the missing rfill_soft_blocked() call. Adding a inline declaration of rfill_soft_blocked() if CONFIG_RFKILL=n fixes the following error: drivers/net/wireless/intel/iwlwifi/mvm/mvm.h: In function 'iwl_mvm_mei_set_sw_rfkill_state': drivers/net/wireless/intel/iwlwifi/mvm/mvm.h:2215:38: error: implicit declaration of function 'rfkill_soft_blocked'; did you mean 'rfkill_blocked'? [-Werror=implicit-function-declaration] 2215 | mvm->hw_registered ? rfkill_soft_blocked(mvm->hw->wiphy->rfkill) : false; | ^~~~~~~~~~~~~~~~~~~ | rfkill_blocked Signed-off-by: Ben Dooks Reported-by: Neill Whillans Fixes: 5bc9a9dd7535 ("rfkill: allow to get the software rfkill state") Link: https://lore.kernel.org/r/20220218093858.1245677-1-ben.dooks@codethink.co.uk Signed-off-by: Johannes Berg --- include/linux/rfkill.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index c35f3962dc4f..373003ace639 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -308,6 +308,11 @@ static inline bool rfkill_blocked(struct rfkill *rfkill) return false; } +static inline bool rfkill_soft_blocked(struct rfkill *rfkill) +{ + return false; +} + static inline enum rfkill_type rfkill_find_type(const char *name) { return RFKILL_TYPE_ALL; -- cgit v1.2.3 From c3873070247d9e3c7a6b0cf9bf9b45e8018427b1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 28 Feb 2022 06:22:22 +0100 Subject: netfilter: nf_queue: fix possible use-after-free Eric Dumazet says: The sock_hold() side seems suspect, because there is no guarantee that sk_refcnt is not already 0. On failure, we cannot queue the packet and need to indicate an error. The packet will be dropped by the caller. v2: split skb prefetch hunk into separate change Fixes: 271b72c7fa82c ("udp: RCU handling for Unicast packets.") Reported-by: Eric Dumazet Reviewed-by: Eric Dumazet Signed-off-by: Florian Westphal --- include/net/netfilter/nf_queue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 9eed51e920e8..980daa6e1e3a 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -37,7 +37,7 @@ void nf_register_queue_handler(const struct nf_queue_handler *qh); void nf_unregister_queue_handler(void); void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); -void nf_queue_entry_get_refs(struct nf_queue_entry *entry); +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); void nf_queue_entry_free(struct nf_queue_entry *entry); static inline void init_hashrandom(u32 *jhash_initval) -- cgit v1.2.3 From 2f6f66ccd21e854cd3743bc8def68f8b4e7d91fc Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Feb 2022 18:22:44 +0000 Subject: KVM: Drop kvm_reload_remote_mmus(), open code request in x86 users Remove the generic kvm_reload_remote_mmus() and open code its functionality into the two x86 callers. x86 is (obviously) the only architecture that uses the hook, and is also the only architecture that uses KVM_REQ_MMU_RELOAD in a way that's consistent with the name. That will change in a future patch, as x86's usage when zapping a single shadow page x86 doesn't actually _need_ to reload all vCPUs' MMUs, only MMUs whose root is being zapped actually need to be reloaded. s390 also uses KVM_REQ_MMU_RELOAD, but for a slightly different purpose. Drop the generic code in anticipation of implementing s390 and x86 arch specific requests, which will allow dropping KVM_REQ_MMU_RELOAD entirely. Opportunistically reword the x86 TDP MMU comment to avoid making references to functions (and requests!) when possible, and to remove the rather ambiguous "this". No functional change intended. Cc: Ben Gardon Signed-off-by: Sean Christopherson Reviewed-by: Ben Gardon Message-Id: <20220225182248.3812651-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f11039944c08..0aeb47cffd43 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1325,7 +1325,6 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible); void kvm_flush_remote_tlbs(struct kvm *kvm); -void kvm_reload_remote_mmus(struct kvm *kvm); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); -- cgit v1.2.3 From e65a3b46b5b1fab92c3273bcf71e028a4d307400 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Feb 2022 18:22:47 +0000 Subject: KVM: Drop KVM_REQ_MMU_RELOAD and update vcpu-requests.rst documentation Remove the now unused KVM_REQ_MMU_RELOAD, shift KVM_REQ_VM_DEAD into the unoccupied space, and update vcpu-requests.rst, which was missing an entry for KVM_REQ_VM_DEAD. Switching KVM_REQ_VM_DEAD to entry '1' also fixes the stale comment about bits 4-7 being reserved. Reviewed-by: Claudio Imbrenda Signed-off-by: Sean Christopherson Reviewed-by: Ben Gardon Message-Id: <20220225182248.3812651-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0aeb47cffd43..9536ffa0473b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -153,10 +153,9 @@ static inline bool is_error_page(struct page *page) * Bits 4-7 are reserved for more arch-independent bits. */ #define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_UNBLOCK 2 #define KVM_REQ_UNHALT 3 -#define KVM_REQ_VM_DEAD (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_GPC_INVALIDATE (5 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQUEST_ARCH_BASE 8 -- cgit v1.2.3 From 462791bbfa350189e309a5a94541f6b63cd874e8 Mon Sep 17 00:00:00 2001 From: Dust Li Date: Tue, 1 Mar 2022 17:43:56 +0800 Subject: net/smc: add sysctl interface for SMC This patch add sysctl interface to support container environment for SMC as we talk in the mail list. Link: https://lore.kernel.org/netdev/20220224020253.GF5443@linux.alibaba.com Co-developed-by: Tony Lu Signed-off-by: Tony Lu Signed-off-by: Dust Li Signed-off-by: David S. Miller --- include/net/netns/smc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index 47b166684fd8..1682eae50579 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -14,5 +14,8 @@ struct netns_smc { struct smc_stats_rsn *fback_rsn; bool limit_smc_hs; /* constraint on handshake */ +#ifdef CONFIG_SYSCTL + struct ctl_table_header *smc_hdr; +#endif }; #endif -- cgit v1.2.3 From 12bbb0d163a90d81a2677cf7808d364697290207 Mon Sep 17 00:00:00 2001 From: Dust Li Date: Tue, 1 Mar 2022 17:43:58 +0800 Subject: net/smc: add sysctl for autocorking This add a new sysctl: net.smc.autocorking_size We can dynamically change the behaviour of autocorking by change the value of autocorking_size. Setting to 0 disables autocorking in SMC Signed-off-by: Dust Li Signed-off-by: David S. Miller --- include/net/netns/smc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index 1682eae50579..e5389eeaf8bd 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -17,5 +17,6 @@ struct netns_smc { #ifdef CONFIG_SYSCTL struct ctl_table_header *smc_hdr; #endif + unsigned int sysctl_autocorking_size; }; #endif -- cgit v1.2.3 From e45f1c1d70cae0d7a28ad60f9c6391e210354f0b Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Tue, 1 Mar 2022 11:07:35 +0200 Subject: interconnect: Add stubs for the bulk API Add stub functions for the bulk API to allow compile testing. Reviewed-by: Alex Elder Link: https://lore.kernel.org/r/20220301090735.26599-1-djakov@kernel.org Signed-off-by: Georgi Djakov --- include/linux/interconnect.h | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h index f2dd2fc8d3cd..f685777b875e 100644 --- a/include/linux/interconnect.h +++ b/include/linux/interconnect.h @@ -38,13 +38,6 @@ struct icc_bulk_data { u32 peak_bw; }; -int __must_check of_icc_bulk_get(struct device *dev, int num_paths, - struct icc_bulk_data *paths); -void icc_bulk_put(int num_paths, struct icc_bulk_data *paths); -int icc_bulk_set_bw(int num_paths, const struct icc_bulk_data *paths); -int icc_bulk_enable(int num_paths, const struct icc_bulk_data *paths); -void icc_bulk_disable(int num_paths, const struct icc_bulk_data *paths); - #if IS_ENABLED(CONFIG_INTERCONNECT) struct icc_path *icc_get(struct device *dev, const int src_id, @@ -58,6 +51,12 @@ int icc_disable(struct icc_path *path); int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw); void icc_set_tag(struct icc_path *path, u32 tag); const char *icc_get_name(struct icc_path *path); +int __must_check of_icc_bulk_get(struct device *dev, int num_paths, + struct icc_bulk_data *paths); +void icc_bulk_put(int num_paths, struct icc_bulk_data *paths); +int icc_bulk_set_bw(int num_paths, const struct icc_bulk_data *paths); +int icc_bulk_enable(int num_paths, const struct icc_bulk_data *paths); +void icc_bulk_disable(int num_paths, const struct icc_bulk_data *paths); #else @@ -112,6 +111,29 @@ static inline const char *icc_get_name(struct icc_path *path) return NULL; } +static inline int of_icc_bulk_get(struct device *dev, int num_paths, struct icc_bulk_data *paths) +{ + return 0; +} + +static inline void icc_bulk_put(int num_paths, struct icc_bulk_data *paths) +{ +} + +static inline int icc_bulk_set_bw(int num_paths, const struct icc_bulk_data *paths) +{ + return 0; +} + +static inline int icc_bulk_enable(int num_paths, const struct icc_bulk_data *paths) +{ + return 0; +} + +static inline void icc_bulk_disable(int num_paths, const struct icc_bulk_data *paths) +{ +} + #endif /* CONFIG_INTERCONNECT */ #endif /* __LINUX_INTERCONNECT_H */ -- cgit v1.2.3 From 1c1813a743fe84d0dcf53743baa4edc1f74c44c8 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 3 Feb 2022 15:32:15 +0100 Subject: HID: core: statically allocate read buffers This is a preparation patch for rethinking the generic processing of HID reports. We can actually pre-allocate all of our memory instead of dynamically allocating/freeing it whenever we parse a report. Signed-off-by: Benjamin Tissoires Reviewed-by: Ping Cheng Acked-by: Peter Hutterer Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 7487b0586fe6..3fbfe0986659 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -476,6 +476,7 @@ struct hid_field { unsigned report_count; /* number of this field in the report */ unsigned report_type; /* (input,output,feature) */ __s32 *value; /* last known value(s) */ + __s32 *new_value; /* newly read value(s) */ __s32 logical_minimum; __s32 logical_maximum; __s32 physical_minimum; -- cgit v1.2.3 From b79c1abae5e19726c5060749e4e7c9e426b045c8 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 3 Feb 2022 15:32:17 +0100 Subject: HID: core: split data fetching from processing in hid_input_field() This is a preparatory patch for being able to process the usages out of order. We split the retrieval of the data in a separate function and also split out the processing of the usages depending if the field is an array or a variable. No functional changes from this patch. Signed-off-by: Benjamin Tissoires Reviewed-by: Ping Cheng Acked-by: Peter Hutterer Signed-off-by: Jiri Kosina --- include/linux/hid.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 3fbfe0986659..cf79eb3da465 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -342,7 +342,7 @@ struct hid_item { * HID device quirks. */ -/* +/* * Increase this if you need to configure more HID quirks at module load time */ #define MAX_USBHID_BOOT_QUIRKS 4 @@ -483,6 +483,7 @@ struct hid_field { __s32 physical_maximum; __s32 unit_exponent; unsigned unit; + bool ignored; /* this field is ignored in this event */ struct hid_report *report; /* associated report */ unsigned index; /* index into report->field[] */ /* hidinput data */ -- cgit v1.2.3 From 22f4b026c3ddd4b26c5baa202bd3ee38feaa2e9a Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 3 Feb 2022 15:32:21 +0100 Subject: HID: compute an ordered list of input fields to process This will be used in a later commit: we build a list of input fields (and usage_index) that is ordered based on a usage priority. Changing the usage priority allows to re-order the processed list, meaning that we can enforce some usages to be process before others. For instance, before processing InRange in the HID tablets, we need to know if we are using the eraser (side or button). Enforcing a higher (lower number) priority for Invert allows to force the input stack to process that field before. Signed-off-by: Benjamin Tissoires Reviewed-by: Ping Cheng Acked-by: Peter Hutterer Signed-off-by: Jiri Kosina --- include/linux/hid.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index cf79eb3da465..f25020c0d6b8 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -477,6 +477,7 @@ struct hid_field { unsigned report_type; /* (input,output,feature) */ __s32 *value; /* last known value(s) */ __s32 *new_value; /* newly read value(s) */ + __s32 *usages_priorities; /* priority of each usage when reading the report */ __s32 logical_minimum; __s32 logical_maximum; __s32 physical_minimum; @@ -493,13 +494,22 @@ struct hid_field { #define HID_MAX_FIELDS 256 +struct hid_field_entry { + struct list_head list; + struct hid_field *field; + unsigned int index; + __s32 priority; +}; + struct hid_report { struct list_head list; struct list_head hidinput_list; + struct list_head field_entry_list; /* ordered list of input fields */ unsigned int id; /* id of this report */ unsigned int type; /* report type */ unsigned int application; /* application usage for this report */ struct hid_field *field[HID_MAX_FIELDS]; /* fields of the report */ + struct hid_field_entry *field_entries; /* allocated memory of input field_entry */ unsigned maxfield; /* maximum valid field index */ unsigned size; /* size of the report (bits) */ struct hid_device *device; /* associated device */ -- cgit v1.2.3 From 048cddfd440583a07530774fe20c7d26d7378155 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 3 Feb 2022 15:32:23 +0100 Subject: HID: input: enforce Invert usage to be processed before InRange When a device exposes both Invert and InRange, Invert must be processed before InRange. If we keep the order of the device and we process them out of order, InRange will first set BTN_TOOL_PEN, and then Invert will set BTN_TOOL_RUBBER. Userspace knows how to deal with that situation, but fixing it in the kernel is now easier. Signed-off-by: Benjamin Tissoires Reviewed-by: Ping Cheng Acked-by: Peter Hutterer Signed-off-by: Jiri Kosina --- include/linux/hid.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index f25020c0d6b8..eaad0655b05c 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -477,7 +477,9 @@ struct hid_field { unsigned report_type; /* (input,output,feature) */ __s32 *value; /* last known value(s) */ __s32 *new_value; /* newly read value(s) */ - __s32 *usages_priorities; /* priority of each usage when reading the report */ + __s32 *usages_priorities; /* priority of each usage when reading the report + * bits 8-16 are reserved for hid-input usage + */ __s32 logical_minimum; __s32 logical_maximum; __s32 physical_minimum; -- cgit v1.2.3 From 87562fcd134214a68e58d0714b820f2f2da75b1f Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 3 Feb 2022 15:32:24 +0100 Subject: HID: input: remove the need for HID_QUIRK_INVERT HID_QUIRK_INVERT is kind of complex to deal with and was bogus. Furthermore, it didn't make sense to use a global per struct hid_device quirk for something dynamic as the current state. Store the current tool information in the report itself, and re-order the processing of the fields to enforce having all the tablet "state" fields before getting to In Range and other input fields. This way, we now have all the information whether a tool is present or not while processing In Range. This new behavior enforces that only one tool gets forwarded to userspace at the same time, and that if either eraser or invert is set, we enforce BTN_TOOL_RUBBER. Note that the release of the previous tool now happens in its own EV_SYN report so userspace doesn't get confused by having 2 tools. These changes are tested in the following hid-tools regression tests: https://gitlab.freedesktop.org/libevdev/hid-tools/-/merge_requests/127 Signed-off-by: Benjamin Tissoires Reviewed-by: Ping Cheng Acked-by: Peter Hutterer Signed-off-by: Jiri Kosina --- include/linux/hid.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index eaad0655b05c..feb8df61168f 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -347,7 +347,7 @@ struct hid_item { */ #define MAX_USBHID_BOOT_QUIRKS 4 -#define HID_QUIRK_INVERT BIT(0) +/* BIT(0) reserved for backward compatibility, was HID_QUIRK_INVERT */ #define HID_QUIRK_NOTOUCH BIT(1) #define HID_QUIRK_IGNORE BIT(2) #define HID_QUIRK_NOGET BIT(3) @@ -515,6 +515,10 @@ struct hid_report { unsigned maxfield; /* maximum valid field index */ unsigned size; /* size of the report (bits) */ struct hid_device *device; /* associated device */ + + /* tool related state */ + bool tool_active; /* whether the current tool is active */ + unsigned int tool; /* BTN_TOOL_* */ }; #define HID_MAX_IDS 256 -- cgit v1.2.3 From 5c20000a4756f57c824e3045c978ef19136a676d Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 3 Feb 2022 15:32:25 +0100 Subject: HID: input: accommodate priorities for slotted devices Multitouch devices in hybrid mode are reporting multiple times the same collection. We should accommodate for this in our handling of priorities by defining the slots they belong to. Signed-off-by: Benjamin Tissoires Reviewed-by: Ping Cheng Acked-by: Peter Hutterer Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index feb8df61168f..4363a63b9775 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -492,6 +492,7 @@ struct hid_field { /* hidinput data */ struct hid_input *hidinput; /* associated input structure */ __u16 dpad; /* dpad input code */ + unsigned int slot_idx; /* slot index in a report */ }; #define HID_MAX_FIELDS 256 -- cgit v1.2.3 From dc6e0818bc9a0336d9accf3ea35d146d72aa7a18 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Sun, 20 Feb 2022 13:14:25 +0800 Subject: sched/cpuacct: Optimize away RCU read lock Since cpuacct_charge() is called from the scheduler update_curr(), we must already have rq lock held, then the RCU read lock can be optimized away. And do the same thing in it's wrapper cgroup_account_cputime(), but we can't use lockdep_assert_rq_held() there, which defined in kernel/sched/sched.h. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220220051426.5274-2-zhouchengming@bytedance.com --- include/linux/cgroup.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 75c151413fda..9a109c6ac0e0 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -791,11 +791,9 @@ static inline void cgroup_account_cputime(struct task_struct *task, cpuacct_charge(task, delta_exec); - rcu_read_lock(); cgrp = task_dfl_cgroup(task); if (cgroup_parent(cgrp)) __cgroup_account_cputime(cgrp, delta_exec); - rcu_read_unlock(); } static inline void cgroup_account_cputime_field(struct task_struct *task, -- cgit v1.2.3 From 3eba0505d03a9c1eb30d40c2330c0880b22d1b3a Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Sun, 20 Feb 2022 13:14:26 +0800 Subject: sched/cpuacct: Remove redundant RCU read lock The cpuacct_account_field() and it's cgroup v2 wrapper cgroup_account_cputime_field() is only called from cputime in task_group_account_field(), which is already in RCU read-side critical section. So remove these redundant RCU read lock. Suggested-by: Tejun Heo Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220220051426.5274-3-zhouchengming@bytedance.com --- include/linux/cgroup.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9a109c6ac0e0..1e356c222756 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -804,11 +804,9 @@ static inline void cgroup_account_cputime_field(struct task_struct *task, cpuacct_account_field(task, index, delta_exec); - rcu_read_lock(); cgrp = task_dfl_cgroup(task); if (cgroup_parent(cgrp)) __cgroup_account_cputime_field(cgrp, index, delta_exec); - rcu_read_unlock(); } #else /* CONFIG_CGROUPS */ -- cgit v1.2.3 From fa2c3254d7cfff5f7a916ab928a562d1165f17bb Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 20 Jan 2022 16:25:19 +0000 Subject: sched/tracing: Don't re-read p->state when emitting sched_switch event As of commit c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") the following sequence becomes possible: p->__state = TASK_INTERRUPTIBLE; __schedule() deactivate_task(p); ttwu() READ !p->on_rq p->__state=TASK_WAKING trace_sched_switch() __trace_sched_switch_state() task_state_index() return 0; TASK_WAKING isn't in TASK_REPORT, so the task appears as TASK_RUNNING in the trace event. Prevent this by pushing the value read from __schedule() down the trace event. Reported-by: Abhijeet Dharmapurikar Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20220120162520.570782-2-valentin.schneider@arm.com --- include/linux/sched.h | 11 ++++++++--- include/trace/events/sched.h | 11 +++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f00132e7ef6e..457c8a058b77 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1620,10 +1620,10 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) -static inline unsigned int task_state_index(struct task_struct *tsk) +static inline unsigned int __task_state_index(unsigned int tsk_state, + unsigned int tsk_exit_state) { - unsigned int tsk_state = READ_ONCE(tsk->__state); - unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; + unsigned int state = (tsk_state | tsk_exit_state) & TASK_REPORT; BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); @@ -1633,6 +1633,11 @@ static inline unsigned int task_state_index(struct task_struct *tsk) return fls(state); } +static inline unsigned int task_state_index(struct task_struct *tsk) +{ + return __task_state_index(READ_ONCE(tsk->__state), tsk->exit_state); +} + static inline char task_index_to_char(unsigned int state) { static const char state_char[] = "RSDTtXZPI"; diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 94640482cfe7..65e786756321 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -187,7 +187,9 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, TP_ARGS(p)); #ifdef CREATE_TRACE_POINTS -static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) +static inline long __trace_sched_switch_state(bool preempt, + unsigned int prev_state, + struct task_struct *p) { unsigned int state; @@ -208,7 +210,7 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * * it for left shift operation to get the correct task->state * mapping. */ - state = task_state_index(p); + state = __task_state_index(prev_state, p->exit_state); return state ? (1 << (state - 1)) : state; } @@ -220,10 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * TRACE_EVENT(sched_switch, TP_PROTO(bool preempt, + unsigned int prev_state, struct task_struct *prev, struct task_struct *next), - TP_ARGS(preempt, prev, next), + TP_ARGS(preempt, prev_state, prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) @@ -239,7 +242,7 @@ TRACE_EVENT(sched_switch, memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; - __entry->prev_state = __trace_sched_switch_state(preempt, prev); + __entry->prev_state = __trace_sched_switch_state(preempt, prev_state, prev); memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; -- cgit v1.2.3 From 25795ef6299f07ce3838f3253a9cb34f64efcfae Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 20 Jan 2022 16:25:20 +0000 Subject: sched/tracing: Report TASK_RTLOCK_WAIT tasks as TASK_UNINTERRUPTIBLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TASK_RTLOCK_WAIT currently isn't part of TASK_REPORT, thus a task blocking on an rtlock will appear as having a task state == 0, IOW TASK_RUNNING. The actual state is saved in p->saved_state, but reading it after reading p->__state has a few issues: o that could still be TASK_RUNNING in the case of e.g. rt_spin_lock o ttwu_state_match() might have changed that to TASK_RUNNING As pointed out by Eric, adding TASK_RTLOCK_WAIT to TASK_REPORT implies exposing a new state to userspace tools which way not know what to do with them. The only information that needs to be conveyed here is that a task is waiting on an rt_mutex, which matches TASK_UNINTERRUPTIBLE - there's no need for a new state. Reported-by: Uwe Kleine-König Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20220120162520.570782-3-valentin.schneider@arm.com --- include/linux/sched.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 457c8a058b77..78b606c9ab38 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1630,6 +1630,14 @@ static inline unsigned int __task_state_index(unsigned int tsk_state, if (tsk_state == TASK_IDLE) state = TASK_REPORT_IDLE; + /* + * We're lying here, but rather than expose a completely new task state + * to userspace, we can make this appear as if the task has gone through + * a regular rt_mutex_lock() call. + */ + if (tsk_state == TASK_RTLOCK_WAIT) + state = TASK_UNINTERRUPTIBLE; + return fls(state); } -- cgit v1.2.3 From cedd3614e5d9c80908099c19f8716714ce0610b1 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 24 Feb 2022 11:06:54 +0530 Subject: perf: Add irq and exception return branch types This expands generic branch type classification by adding two more entries there in i.e irq and exception return. Also updates the x86 implementation to process X86_BR_IRET and X86_BR_IRQ records as appropriate. This changes branch types reported to user space on x86 platform but it should not be a problem. The possible scenarios and impacts are enumerated here. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1645681014-3346-1-git-send-email-anshuman.khandual@arm.com --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1b65042ab1db..7dc71768749d 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -251,6 +251,8 @@ enum { PERF_BR_SYSRET = 8, /* syscall return */ PERF_BR_COND_CALL = 9, /* conditional function call */ PERF_BR_COND_RET = 10, /* conditional function return */ + PERF_BR_ERET = 11, /* exception return */ + PERF_BR_IRQ = 12, /* irq */ PERF_BR_MAX, }; -- cgit v1.2.3 From 6ddf5f165f13ab623d04aee2a473d35818255199 Mon Sep 17 00:00:00 2001 From: Milind Changire Date: Mon, 14 Feb 2022 05:01:01 +0000 Subject: ceph: add getvxattr op Problem: Some directory vxattrs (e.g. ceph.dir.pin.random) are governed by information that isn't necessarily shared with the client. Add support for the new GETVXATTR operation, which allows the client to query the MDS directly for vxattrs. When the client is queried for a vxattr that doesn't have a special handler, have it issue a GETVXATTR to the MDS directly. Solution: Adds new getvxattr op to fetch ceph.dir.pin*, ceph.dir.layout* and ceph.file.layout* vxattrs. If the entire layout for a dir or a file is being set, then it is expected that the layout be set in standard JSON format. Individual field value retrieval is not wrapped in JSON. The JSON format also applies while setting the vxattr if the entire layout is being set in one go. As a temporary measure, setting a vxattr can also be done in the old format. The old format will be deprecated in the future. URL: https://tracker.ceph.com/issues/51062 Signed-off-by: Milind Changire Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 7ad6c3d0db7d..66db21ac5f0c 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -328,6 +328,7 @@ enum { CEPH_MDS_OP_LOOKUPPARENT = 0x00103, CEPH_MDS_OP_LOOKUPINO = 0x00104, CEPH_MDS_OP_LOOKUPNAME = 0x00105, + CEPH_MDS_OP_GETVXATTR = 0x00106, CEPH_MDS_OP_SETXATTR = 0x01105, CEPH_MDS_OP_RMXATTR = 0x01106, -- cgit v1.2.3 From ab58a5a1c0487b67f7409f39d3c8593d416d4e7f Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 15 Feb 2022 20:23:14 +0800 Subject: ceph: move to a dedicated slabcache for ceph_cap_snap There could be huge number of capsnaps around at any given time. On x86_64 the structure is 248 bytes, which will be rounded up to 256 bytes by kzalloc. Move this to a dedicated slabcache to save 8 bytes for each. [ jlayton: use kmem_cache_zalloc ] Signed-off-by: Xiubo Li Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index edf62eaa6285..00af2c98da75 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -284,6 +284,7 @@ DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) extern struct kmem_cache *ceph_inode_cachep; extern struct kmem_cache *ceph_cap_cachep; +extern struct kmem_cache *ceph_cap_snap_cachep; extern struct kmem_cache *ceph_cap_flush_cachep; extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; -- cgit v1.2.3 From 1753629ea0f34900467185b7d8b0db11a64f4728 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 23 Feb 2022 09:04:55 +0800 Subject: ceph: remove incorrect and unused CEPH_INO_DOTDOT macro Ceph have removed this macro and the 0x3 will be use for global dummy snaprealm. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 66db21ac5f0c..f14f9bc290e6 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -29,7 +29,6 @@ #define CEPH_INO_ROOT 1 #define CEPH_INO_CEPH 2 /* hidden .ceph dir */ -#define CEPH_INO_DOTDOT 3 /* used by ceph fuse for parent (..) */ /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ #define CEPH_MAX_MON 31 -- cgit v1.2.3 From 5ed91587e201c77b35a5555c8c082655bb5834fe Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 23 Feb 2022 09:04:56 +0800 Subject: ceph: do not release the global snaprealm until unmounting The global snaprealm would be created and then destroyed immediately every time when updating it. URL: https://tracker.ceph.com/issues/54362 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index f14f9bc290e6..86bf82dbd8b8 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -28,7 +28,8 @@ #define CEPH_INO_ROOT 1 -#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_GLOBAL_SNAPREALM 3 /* global dummy snaprealm */ /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ #define CEPH_MAX_MON 31 -- cgit v1.2.3 From 2cbfae0f50f7a0f8fc9d552bd856f6cd7b7608b6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 24 Feb 2022 01:56:20 +0200 Subject: ACPI: platform: Constify properties parameter in acpi_create_platform_device() Properties are not and should not be changed in the callee, hence constify properties parameter in acpi_create_platform_device(). Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6274758648e3..9ac545379447 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -691,7 +691,7 @@ int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *); int acpi_device_modalias(struct device *, char *, int); struct platform_device *acpi_create_platform_device(struct acpi_device *, - struct property_entry *); + const struct property_entry *); #define ACPI_PTR(_ptr) (_ptr) static inline void acpi_device_set_enumerated(struct acpi_device *adev) @@ -930,7 +930,7 @@ static inline int acpi_device_modalias(struct device *dev, static inline struct platform_device * acpi_create_platform_device(struct acpi_device *adev, - struct property_entry *properties) + const struct property_entry *properties) { return NULL; } -- cgit v1.2.3 From db6140e5e35a48405e669353bd54042c1d4c3841 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Mon, 28 Feb 2022 11:23:49 +0200 Subject: net/sched: act_ct: Fix flow table lookup failure with no originating ifindex After cited commit optimizted hw insertion, flow table entries are populated with ifindex information which was intended to only be used for HW offload. This tuple ifindex is hashed in the flow table key, so it must be filled for lookup to be successful. But tuple ifindex is only relevant for the netfilter flowtables (nft), so it's not filled in act_ct flow table lookup, resulting in lookup failure, and no SW offload and no offload teardown for TCP connection FIN/RST packets. To fix this, add new tc ifindex field to tuple, which will only be used for offloading, not for lookup, as it will not be part of the tuple hash. Fixes: 9795ded7f924 ("net/sched: act_ct: Fill offloading tuple iifidx") Signed-off-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index a3647fadf1cc..bd59e950f4d6 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -96,6 +96,7 @@ enum flow_offload_xmit_type { FLOW_OFFLOAD_XMIT_NEIGH, FLOW_OFFLOAD_XMIT_XFRM, FLOW_OFFLOAD_XMIT_DIRECT, + FLOW_OFFLOAD_XMIT_TC, }; #define NF_FLOW_TABLE_ENCAP_MAX 2 @@ -127,7 +128,7 @@ struct flow_offload_tuple { struct { } __hash; u8 dir:2, - xmit_type:2, + xmit_type:3, encap_num:2, in_vlan_ingress:2; u16 mtu; @@ -142,6 +143,9 @@ struct flow_offload_tuple { u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; } out; + struct { + u32 iifidx; + } tc; }; }; -- cgit v1.2.3 From d65bc29be0ae4ca2368df25dc6f6247aefb57f07 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 13 Feb 2022 22:25:20 +0300 Subject: binfmt: move more stuff undef CONFIG_COREDUMP struct linux_binfmt::core_dump and struct min_coredump::min_coredump are used under CONFIG_COREDUMP only. Shrink those embedded configs a bit. Signed-off-by: Alexey Dobriyan Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/YglbIFyN+OtwVyjW@localhost.localdomain --- include/linux/binfmts.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 049cf9421d83..5d651c219c99 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -98,8 +98,10 @@ struct linux_binfmt { struct module *module; int (*load_binary)(struct linux_binprm *); int (*load_shlib)(struct file *); +#ifdef CONFIG_COREDUMP int (*core_dump)(struct coredump_params *cprm); unsigned long min_coredump; /* minimal dump size */ +#endif } __randomize_layout; extern void __register_binfmt(struct linux_binfmt *fmt, int insert); -- cgit v1.2.3 From 4f0bfdfd8323e5b461fc1042143a1097dba9fced Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 23 Feb 2022 19:34:31 -0800 Subject: ELF: Properly redefine PT_GNU_* in terms of PT_LOOS The PT_GNU_* program header types are actually offsets from PT_LOOS, so redefine them as such, reorder them, and add the missing PT_GNU_RELRO. Cc: Eric Biederman Cc: Peter Collingbourne Cc: Catalin Marinas Cc: Dave Martin Signed-off-by: Kees Cook --- include/uapi/linux/elf.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 61bf4774b8f2..6438d55529bf 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -35,10 +35,11 @@ typedef __s64 Elf64_Sxword; #define PT_HIOS 0x6fffffff /* OS-specific */ #define PT_LOPROC 0x70000000 #define PT_HIPROC 0x7fffffff -#define PT_GNU_EH_FRAME 0x6474e550 -#define PT_GNU_PROPERTY 0x6474e553 - +#define PT_GNU_EH_FRAME (PT_LOOS + 0x474e550) #define PT_GNU_STACK (PT_LOOS + 0x474e551) +#define PT_GNU_RELRO (PT_LOOS + 0x474e552) +#define PT_GNU_PROPERTY (PT_LOOS + 0x474e553) + /* * Extended Numbering -- cgit v1.2.3 From dd0ca255f3d27a1bb43d8e9529fb3645f9a341a3 Mon Sep 17 00:00:00 2001 From: Daniel Braunwarth Date: Mon, 28 Feb 2022 14:30:28 +0100 Subject: if_ether.h: add PROFINET Ethertype Add the Ethertype for PROFINET protocol. Signed-off-by: Daniel Braunwarth Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index c0c2f3ed5729..4f4ed35a16db 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -86,6 +86,7 @@ * over Ethernet */ #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ +#define ETH_P_PROFINET 0x8892 /* PROFINET */ #define ETH_P_REALTEK 0x8899 /* Multiple proprietary protocols */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ -- cgit v1.2.3 From cd73cda742fbe1f33ed7306c7a01aa64f4e6ebd5 Mon Sep 17 00:00:00 2001 From: Daniel Braunwarth Date: Mon, 28 Feb 2022 14:30:29 +0100 Subject: if_ether.h: add EtherCAT Ethertype Add the Ethertype for EtherCAT protocol. Signed-off-by: Daniel Braunwarth Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 4f4ed35a16db..1d0bccc3fa54 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -89,6 +89,7 @@ #define ETH_P_PROFINET 0x8892 /* PROFINET */ #define ETH_P_REALTEK 0x8899 /* Multiple proprietary protocols */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ +#define ETH_P_ETHERCAT 0x88A4 /* EtherCAT */ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ #define ETH_P_PREAUTH 0x88C7 /* 802.11 Preauthentication */ -- cgit v1.2.3 From ce70fd9a551af7424a7dace2a1ba05a7de8eae27 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Feb 2022 18:55:47 +0100 Subject: scsi: core: Remove the cmd field from struct scsi_request Now that each scsi_request is backed by a scsi_cmnd, there is no need to indirect the CDB storage. Change all submitters of SCSI passthrough requests to store the CDB information directly in the scsi_cmnd, and while doing so allocate the full 32 bytes that cover all Linux supported SCSI hosts instead of requiring dynamic allocation for > 16 byte CDBs. On 64-bit systems this does not change the size of the scsi_cmnd at all, while on 32-bit systems it slightly increases it for now, but that increase will be made up by the removal of the remaining scsi_request fields. Link: https://lore.kernel.org/r/20220224175552.988286-4-hch@lst.de Reviewed-by: Bart Van Assche Reviewed-by: John Garry Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/scsi/scsi_cmnd.h | 7 +------ include/scsi/scsi_eh.h | 4 +--- include/scsi/scsi_request.h | 11 ----------- 3 files changed, 2 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 7a19c8bbaed9..3e432e25645a 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -28,9 +28,6 @@ struct scsi_driver; * supports without specifying a cmd_len by ULD's */ #define MAX_COMMAND_SIZE 16 -#if (MAX_COMMAND_SIZE > BLK_MAX_CDB) -# error MAX_COMMAND_SIZE can not be bigger than BLK_MAX_CDB -#endif struct scsi_data_buffer { struct sg_table table; @@ -100,9 +97,7 @@ struct scsi_cmnd { unsigned short cmd_len; enum dma_data_direction sc_data_direction; - /* These elements define the operation we are about to perform */ - unsigned char *cmnd; - + unsigned char cmnd[32]; /* SCSI CDB */ /* These elements define the operation we ultimately want to perform */ struct scsi_data_buffer sdb; diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h index 468094254b3c..1ae08e81339f 100644 --- a/include/scsi/scsi_eh.h +++ b/include/scsi/scsi_eh.h @@ -38,10 +38,8 @@ struct scsi_eh_save { unsigned underflow; unsigned char cmd_len; unsigned char prot_op; - unsigned char *cmnd; + unsigned char cmnd[32]; struct scsi_data_buffer sdb; - /* new command support */ - unsigned char eh_cmnd[BLK_MAX_CDB]; struct scatterlist sense_sgl; }; diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h index 9129b23e12bc..aeee0611bcbe 100644 --- a/include/scsi/scsi_request.h +++ b/include/scsi/scsi_request.h @@ -4,12 +4,7 @@ #include -#define BLK_MAX_CDB 16 - struct scsi_request { - unsigned char __cmd[BLK_MAX_CDB]; - unsigned char *cmd; - unsigned short cmd_len; int result; unsigned int sense_len; unsigned int resid_len; /* residual count */ @@ -22,10 +17,4 @@ static inline struct scsi_request *scsi_req(struct request *rq) return blk_mq_rq_to_pdu(rq); } -static inline void scsi_req_free_cmd(struct scsi_request *req) -{ - if (req->cmd != req->__cmd) - kfree(req->cmd); -} - #endif /* _SCSI_SCSI_REQUEST_H */ -- cgit v1.2.3 From 5b794f98074a8b7e8eb77dd43746062940b51160 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Feb 2022 18:55:48 +0100 Subject: scsi: core: Remove the sense and sense_len fields from struct scsi_request Just use the sense_buffer field in struct scsi_cmnd for the sense data and move the sense_len field over to struct scsi_cmnd. Link: https://lore.kernel.org/r/20220224175552.988286-5-hch@lst.de Reviewed-by: Bart Van Assche Reviewed-by: John Garry Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/scsi/scsi_cmnd.h | 1 + include/scsi/scsi_request.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 3e432e25645a..47add5b32f46 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -112,6 +112,7 @@ struct scsi_cmnd { reconnects. Probably == sector size */ + unsigned sense_len; unsigned char *sense_buffer; /* obtained by REQUEST SENSE when * CHECK CONDITION is received on original diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h index aeee0611bcbe..bed1cc49132a 100644 --- a/include/scsi/scsi_request.h +++ b/include/scsi/scsi_request.h @@ -6,10 +6,8 @@ struct scsi_request { int result; - unsigned int sense_len; unsigned int resid_len; /* residual count */ int retries; - void *sense; }; static inline struct scsi_request *scsi_req(struct request *rq) -- cgit v1.2.3 From a9a4ea1166d640d1b397f24afc1cd7e96c46cd03 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Feb 2022 18:55:49 +0100 Subject: scsi: core: Move the resid_len field from struct scsi_request to struct scsi_cmnd Prepare for removing the scsi_request structure by moving the resid_len field to struct scsi_cmnd. Link: https://lore.kernel.org/r/20220224175552.988286-6-hch@lst.de Reviewed-by: Bart Van Assche Reviewed-by: John Garry Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/scsi/scsi_cmnd.h | 6 +++--- include/scsi/scsi_request.h | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 47add5b32f46..5ff0a6e8460c 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -111,7 +111,7 @@ struct scsi_cmnd { (ie, between disconnect / reconnects. Probably == sector size */ - + unsigned resid_len; /* residual count */ unsigned sense_len; unsigned char *sense_buffer; /* obtained by REQUEST SENSE when @@ -200,12 +200,12 @@ static inline unsigned scsi_bufflen(struct scsi_cmnd *cmd) static inline void scsi_set_resid(struct scsi_cmnd *cmd, unsigned int resid) { - cmd->req.resid_len = resid; + cmd->resid_len = resid; } static inline unsigned int scsi_get_resid(struct scsi_cmnd *cmd) { - return cmd->req.resid_len; + return cmd->resid_len; } #define scsi_for_each_sg(cmd, sg, nseg, __i) \ diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h index bed1cc49132a..74be75336a54 100644 --- a/include/scsi/scsi_request.h +++ b/include/scsi/scsi_request.h @@ -6,7 +6,6 @@ struct scsi_request { int result; - unsigned int resid_len; /* residual count */ int retries; }; -- cgit v1.2.3 From dbb4c84d87af7416bb7e35f8e6dd8d87d5f221d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Feb 2022 18:55:50 +0100 Subject: scsi: core: Move the result field from struct scsi_request to struct scsi_cmnd Prepare for removing the scsi_request structure by moving the result field to struct scsi_cmnd. Link: https://lore.kernel.org/r/20220224175552.988286-7-hch@lst.de Reviewed-by: Bart Van Assche Reviewed-by: John Garry Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/scsi/scsi_request.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h index 74be75336a54..929c7bd5c72f 100644 --- a/include/scsi/scsi_request.h +++ b/include/scsi/scsi_request.h @@ -5,7 +5,6 @@ #include struct scsi_request { - int result; int retries; }; -- cgit v1.2.3 From 6aded12b10e0c9536ee2c8ee33a1f7ed52f9cb34 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Feb 2022 18:55:51 +0100 Subject: scsi: core: Remove struct scsi_request Let submitters initialize the scmd->allowed field directly instead of indirecting through struct scsi_request and remove the now superfluous structure. Link: https://lore.kernel.org/r/20220224175552.988286-8-hch@lst.de Reviewed-by: Bart Van Assche Reviewed-by: John Garry Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/scsi/scsi_cmnd.h | 1 - include/scsi/scsi_request.h | 9 --------- 2 files changed, 10 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 5ff0a6e8460c..4b33ca6a7c7d 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -68,7 +68,6 @@ enum scsi_cmnd_submitter { } __packed; struct scsi_cmnd { - struct scsi_request req; struct scsi_device *device; struct list_head eh_entry; /* entry for the host eh_abort_list/eh_cmd_q */ struct delayed_work abort_work; diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h index 929c7bd5c72f..6d424d3e8d02 100644 --- a/include/scsi/scsi_request.h +++ b/include/scsi/scsi_request.h @@ -4,13 +4,4 @@ #include -struct scsi_request { - int retries; -}; - -static inline struct scsi_request *scsi_req(struct request *rq) -{ - return blk_mq_rq_to_pdu(rq); -} - #endif /* _SCSI_SCSI_REQUEST_H */ -- cgit v1.2.3 From 26440303310591e29121964ede0048583cb3126d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Feb 2022 18:55:52 +0100 Subject: scsi: core: Remove This header is empty now except for an include of , so remove it. Link: https://lore.kernel.org/r/20220224175552.988286-9-hch@lst.de Reviewed-by: Bart Van Assche Reviewed-by: John Garry Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/linux/bsg-lib.h | 1 - include/scsi/scsi_cmnd.h | 1 - include/scsi/scsi_request.h | 7 ------- 3 files changed, 9 deletions(-) delete mode 100644 include/scsi/scsi_request.h (limited to 'include') diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 6b211323a489..9e97ced2896d 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -10,7 +10,6 @@ #define _BLK_BSG_ #include -#include struct bsg_job; struct request; diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 4b33ca6a7c7d..76c5eaeeb3b5 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -10,7 +10,6 @@ #include #include #include -#include struct Scsi_Host; struct scsi_driver; diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h deleted file mode 100644 index 6d424d3e8d02..000000000000 --- a/include/scsi/scsi_request.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SCSI_SCSI_REQUEST_H -#define _SCSI_SCSI_REQUEST_H - -#include - -#endif /* _SCSI_SCSI_REQUEST_H */ -- cgit v1.2.3 From d8ec5d67b8bb6593223f24c04e1d530d86748250 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 26 Feb 2022 17:04:32 -0600 Subject: scsi: iscsi: Remove iscsi_scan_finished() qla4xxx does not use iscsi_scan_finished() anymore so remove it. Link: https://lore.kernel.org/r/20220226230435.38733-4-michael.christie@oracle.com Reviewed-by: Lee Duncan Reviewed-by: Chris Leech Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- include/scsi/scsi_transport_iscsi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index c5d7810fd792..90b55db46d7c 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -278,7 +278,6 @@ struct iscsi_cls_session { iscsi_dev_to_session(_stgt->dev.parent) struct iscsi_cls_host { - atomic_t nr_scans; struct mutex mutex; struct request_queue *bsg_q; uint32_t port_speed; @@ -448,7 +447,6 @@ extern void iscsi_get_conn(struct iscsi_cls_conn *conn); extern int iscsi_destroy_conn(struct iscsi_cls_conn *conn); extern void iscsi_unblock_session(struct iscsi_cls_session *session); extern void iscsi_block_session(struct iscsi_cls_session *session); -extern int iscsi_scan_finished(struct Scsi_Host *shost, unsigned long time); extern struct iscsi_endpoint *iscsi_create_endpoint(int dd_size); extern void iscsi_destroy_endpoint(struct iscsi_endpoint *ep); extern struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle); -- cgit v1.2.3 From 5842ea3668310466dcf645a23b6cd5012ce2eadf Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 26 Feb 2022 17:04:33 -0600 Subject: scsi: iscsi: ql4xxx: Use per-session workqueue for unbinding We currently allocate a workqueue per host and only use it for removing the target. For the session per host case we could be using this workqueue to be able to do recoveries (block, unblock, timeout handling) in parallel. To also allow offload drivers to do their session recoveries in parallel, this drops the per host workqueue and replaces it with a per session one. Link: https://lore.kernel.org/r/20220226230435.38733-5-michael.christie@oracle.com Reviewed-by: Lee Duncan Reviewed-by: Chris Leech Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- include/scsi/scsi_transport_iscsi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 90b55db46d7c..7a0d24d3b916 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -251,6 +251,8 @@ struct iscsi_cls_session { bool recovery_tmo_sysfs_override; struct delayed_work recovery_work; + struct workqueue_struct *workq; + unsigned int target_id; bool ida_used; -- cgit v1.2.3 From 69af1c9577aae2149f79be6f485609250fdfb0ad Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 26 Feb 2022 17:04:35 -0600 Subject: scsi: iscsi: Drop temp workq_name When the workqueue code was created it didn't allow variable args so we have been using a temp buffer. Drop that. Link: https://lore.kernel.org/r/20220226230435.38733-7-michael.christie@oracle.com Reviewed-by: Chris Leech Reviewed-by: Lee Duncan Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- include/scsi/libiscsi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index cb805ed9cbf1..e76c94697c1b 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -383,7 +383,6 @@ struct iscsi_host { int state; struct workqueue_struct *workq; - char workq_name[20]; }; /* -- cgit v1.2.3 From af4edb1d50c6d1044cb34bc43621411b7ba2cffe Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 28 Feb 2022 13:36:51 +0200 Subject: scsi: core: sd: Add silence_suspend flag to suppress some PM messages Kernel messages produced during runtime PM can cause a never-ending cycle because user space utilities (e.g. journald or rsyslog) write the messages back to storage, causing runtime resume, more messages, and so on. Messages that tell of things that are expected to happen are arguably unnecessary, so add a flag to suppress them. This flag is used by the UFS driver. Link: https://lore.kernel.org/r/20220228113652.970857-2-adrian.hunter@intel.com Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 647c53b26105..57e3e239a1fc 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -206,6 +206,7 @@ struct scsi_device { unsigned rpm_autosuspend:1; /* Enable runtime autosuspend at device * creation time */ unsigned ignore_media_change:1; /* Ignore MEDIA CHANGE on resume */ + unsigned silence_suspend:1; /* Do not print runtime PM related messages */ unsigned int queue_stopped; /* request queue is quiesced */ bool offline_already; /* Device offline message logged */ -- cgit v1.2.3 From a6264056b39ee0c478e1d73bfc40f61a8cf3673f Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 1 Mar 2022 13:48:56 -0600 Subject: ASoC: soc-acpi: remove sof_fw_filename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've been using a default firmware name for each PCI/ACPI/OF platform for a while. The machine-specific sof_fw_filename is in practice not different from the default, and newer devices don't set this field, so let's remove the redundant definitions. When OEMs modify the base firmware, they can keep the same firmware name but store the file in a separate directory. Reviewed-by: Bard Liao Reviewed-by: Rander Wang Reviewed-by: Péter Ujfalusi Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20220301194903.60859-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-acpi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index ac0893df9c76..fdb536d699ff 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -142,7 +142,6 @@ struct snd_soc_acpi_link_adr { * audio codecs whose presence if checked with ACPI * @pdata: intended for platform data or machine specific-ops. This structure * is not constant since this field may be updated at run-time - * @sof_fw_filename: Sound Open Firmware file name, if enabled * @sof_tplg_filename: Sound Open Firmware topology file name, if enabled */ /* Descriptor for SST ASoC machine driver */ @@ -158,7 +157,6 @@ struct snd_soc_acpi_mach { const void *quirk_data; void *pdata; struct snd_soc_acpi_mach_params mach_params; - const char *sof_fw_filename; const char *sof_tplg_filename; }; -- cgit v1.2.3 From 728dd0ab37421396927749fc8cec9c2009c526c8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Feb 2022 08:59:33 -0500 Subject: NFS: Don't re-read the entire page cache to find the next cookie If the page cache entry that was last read gets invalidated for some reason, then make sure we can re-create it on the next call to readdir. This, combined with the cache page validation, allows us to reuse the cached value of page-index on successive calls to nfs_readdir. Credit is due to Benjamin Coddington for showing that the concept works, and that it allows for improved cache sharing between processes even in the case where pages are lost due to LRU or active invalidation. Suggested-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6e10725887d1..1c533f2c1f36 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -105,6 +105,7 @@ struct nfs_open_dir_context { __be32 verf[NFS_DIR_VERIFIER_SIZE]; __u64 dir_cookie; __u64 dup_cookie; + __u64 last_cookie; pgoff_t page_index; signed char duped; bool eof; -- cgit v1.2.3 From 580f236737d13ee25d5b0b1d124f50014fe6833b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 7 Feb 2022 13:37:00 -0500 Subject: NFS: Adjust the amount of readahead performed by NFS readdir The current NFS readdir code will always try to maximise the amount of readahead it performs on the assumption that we can cache anything that isn't immediately read by the process. There are several cases where this assumption breaks down, including when the 'ls -l' heuristic kicks in to try to force use of readdirplus as a batch replacement for lookup/getattr. This patch therefore tries to tone down the amount of readahead we perform, and adjust it to try to match the amount of data being requested by user space. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1c533f2c1f36..691a27936849 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -107,6 +107,7 @@ struct nfs_open_dir_context { __u64 dup_cookie; __u64 last_cookie; pgoff_t page_index; + unsigned int dtsize; signed char duped; bool eof; }; -- cgit v1.2.3 From 230bc98f7a2a49eb472d184bdec91fd3096384b3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 17 Feb 2022 11:08:24 -0500 Subject: NFS: Improve heuristic for readdirplus The heuristic for readdirplus is designed to try to detect 'ls -l' and similar patterns. It does so by looking for cache hit/miss patterns in both the attribute cache and in the dcache of the files in a given directory, and then sets a flag for the readdirplus code to interpret. The problem with this approach is that a single attribute or dcache miss can cause the NFS code to force a refresh of the attributes for the entire set of files contained in the directory. To be able to make a more nuanced decision, let's sample the number of hits and misses in the set of open directory descriptors. That allows us to set thresholds at which we start preferring READDIRPLUS over regular READDIR, or at which we start to force a re-read of the remaining readdir cache using READDIRPLUS. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 691a27936849..20a4cf0acad2 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -101,6 +101,8 @@ struct nfs_open_context { struct nfs_open_dir_context { struct list_head list; + atomic_t cache_hits; + atomic_t cache_misses; unsigned long attr_gencount; __be32 verf[NFS_DIR_VERIFIER_SIZE]; __u64 dir_cookie; @@ -110,6 +112,7 @@ struct nfs_open_dir_context { unsigned int dtsize; signed char duped; bool eof; + struct rcu_head rcu_head; }; /* @@ -274,13 +277,11 @@ struct nfs4_copy_state { /* * Bit offsets in flags field */ -#define NFS_INO_ADVISE_RDPLUS (0) /* advise readdirplus */ #define NFS_INO_STALE (1) /* possible stale inode */ #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ #define NFS_INO_INVALIDATING (3) /* inode is being invalidated */ #define NFS_INO_PRESERVE_UNLINKED (4) /* preserve file if removed while open */ #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ -#define NFS_INO_FORCE_READDIR (7) /* force readdirplus */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ -- cgit v1.2.3 From f648022faa68ef76058aa121d1aa3a967d59cae8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 23 Feb 2022 11:31:51 -0500 Subject: NFS: Convert readdir page cache to use a cookie based index Instead of using a linear index to address the pages, use the cookie of the first entry, since that is what we use to match the page anyway. This allows us to avoid re-reading the entire cache on a seekdir() type of operation. The latter is very common when re-exporting NFS, and is a major performance drain. The change does affect our duplicate cookie detection, since we can no longer rely on the page index as a linear offset for detecting whether we looped backwards. However since we no longer do a linear search through all the pages on each call to nfs_readdir(), this is less of a concern than it was previously. The other downside is that invalidate_mapping_pages() no longer can use the page index to avoid clearing pages that have been read. A subsequent patch will restore the functionality this provides to the 'ls -l' heuristic. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 20a4cf0acad2..42aad886d3c0 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -106,11 +106,9 @@ struct nfs_open_dir_context { unsigned long attr_gencount; __be32 verf[NFS_DIR_VERIFIER_SIZE]; __u64 dir_cookie; - __u64 dup_cookie; __u64 last_cookie; pgoff_t page_index; unsigned int dtsize; - signed char duped; bool eof; struct rcu_head rcu_head; }; -- cgit v1.2.3 From b0365ccb0712efacf99936e94e92eb7ae63de4d5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 23 Feb 2022 13:29:59 -0500 Subject: NFS: Fix up forced readdirplus Avoid clearing the entire readdir page cache if we're just doing forced readdirplus for the 'ls -l' heuristic. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 42aad886d3c0..3893386ceaed 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -109,6 +109,7 @@ struct nfs_open_dir_context { __u64 last_cookie; pgoff_t page_index; unsigned int dtsize; + bool force_clear; bool eof; struct rcu_head rcu_head; }; -- cgit v1.2.3 From 0adf85b445c7fbc5d2df1f8c1bc54d62c4340237 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 27 Feb 2022 12:46:24 -0500 Subject: NFS: Optimise away the previous cookie field Replace the 'previous cookie' field in struct nfs_entry with the array->last_cookie. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 728cb0c1f0b6..82f7c2730b9a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -745,8 +745,7 @@ struct nfs_auth_info { */ struct nfs_entry { __u64 ino; - __u64 cookie, - prev_cookie; + __u64 cookie; const char * name; unsigned int len; int eof; -- cgit v1.2.3 From 3c36fe9302d153283a0cea9e4e25a9e2143ac232 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 22 Feb 2022 20:51:42 +0100 Subject: ACPI: bus: Introduce acpi_bus_for_each_dev() In order to avoid exposing acpi_bus_type to modules, introduce an acpi_bus_for_each_dev() helper for iterating over all ACPI device objects and make typec_link_ports() use it instead of the raw bus_for_each_dev() along with acpi_bus_type. Having done that, drop the acpi_bus_type export. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Greg Kroah-Hartman Reviewed-by: Heikki Krogerus --- include/acpi/acpi_bus.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index ca88c4706f2b..3f7f01f03869 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -480,6 +480,8 @@ void acpi_initialize_hp_context(struct acpi_device *adev, /* acpi_device.dev.bus == &acpi_bus_type */ extern struct bus_type acpi_bus_type; +int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data); + /* * Events * ------ -- cgit v1.2.3 From 1038fd78a1b8269b642ce09600242682186a78e2 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Mon, 21 Feb 2022 13:10:47 +0100 Subject: crypto: kpp - provide support for KPP template instances The upcoming support for the RFC 7919 ffdhe group parameters will be made available in the form of templates like "ffdhe2048(dh)", "ffdhe3072(dh)" and so on. Template instantiations thereof would wrap the inner "dh" kpp_alg and also provide kpp_alg services to the outside again. Furthermore, it might be perhaps be desirable to provide KDF templates in the future, which would similarly wrap an inner kpp_alg and present themselves to the outside as another kpp_alg, transforming the shared secret on its way out. Introduce the bits needed for supporting KPP template instances. Everything related to inner kpp_alg spawns potentially being held by such template instances will be deferred to a subsequent patch in order to facilitate review. Define struct struct kpp_instance in close analogy to the already existing skcipher_instance, shash_instance and alike, but wrapping a struct kpp_alg. Implement the new kpp_register_instance() template instance registration primitive. Provide some helper functions for - going back and forth between a generic struct crypto_instance and the new struct kpp_instance, - obtaining the instantiating kpp_instance from a crypto_kpp transform and - for accessing a given kpp_instance's implementation specific context data. Annotate everything with proper kernel-doc comments, even though include/crypto/internal/kpp.h is not considered for the generated docs. Signed-off-by: Nicolai Stange Reviewed-by: Hannes Reinecke Signed-off-by: Herbert Xu --- include/crypto/internal/kpp.h | 83 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h index 659b642efada..c4d5a970fe9d 100644 --- a/include/crypto/internal/kpp.h +++ b/include/crypto/internal/kpp.h @@ -10,6 +10,24 @@ #include #include +/** + * struct kpp_instance - KPP template instance + * @free: Callback getting invoked upon instance destruction. Must be set. + * @s: Internal. Generic crypto core instance state properly layout + * to alias with @alg as needed. + * @alg: The &struct kpp_alg implementation provided by the instance. + */ +struct kpp_instance { + void (*free)(struct kpp_instance *inst); + union { + struct { + char head[offsetof(struct kpp_alg, base)]; + struct crypto_instance base; + } s; + struct kpp_alg alg; + }; +}; + /* * Transform internal helpers. */ @@ -33,6 +51,62 @@ static inline const char *kpp_alg_name(struct crypto_kpp *tfm) return crypto_kpp_tfm(tfm)->__crt_alg->cra_name; } +/* + * Template instance internal helpers. + */ +/** + * kpp_crypto_instance() - Cast a &struct kpp_instance to the corresponding + * generic &struct crypto_instance. + * @inst: Pointer to the &struct kpp_instance to be cast. + * Return: A pointer to the &struct crypto_instance embedded in @inst. + */ +static inline struct crypto_instance *kpp_crypto_instance( + struct kpp_instance *inst) +{ + return &inst->s.base; +} + +/** + * kpp_instance() - Cast a generic &struct crypto_instance to the corresponding + * &struct kpp_instance. + * @inst: Pointer to the &struct crypto_instance to be cast. + * Return: A pointer to the &struct kpp_instance @inst is embedded in. + */ +static inline struct kpp_instance *kpp_instance(struct crypto_instance *inst) +{ + return container_of(inst, struct kpp_instance, s.base); +} + +/** + * kpp_alg_instance() - Get the &struct kpp_instance a given KPP transform has + * been instantiated from. + * @kpp: The KPP transform instantiated from some &struct kpp_instance. + * Return: The &struct kpp_instance associated with @kpp. + */ +static inline struct kpp_instance *kpp_alg_instance(struct crypto_kpp *kpp) +{ + return kpp_instance(crypto_tfm_alg_instance(&kpp->base)); +} + +/** + * kpp_instance_ctx() - Get a pointer to a &struct kpp_instance's implementation + * specific context data. + * @inst: The &struct kpp_instance whose context data to access. + * + * A KPP template implementation may allocate extra memory beyond the + * end of a &struct kpp_instance instantiated from &crypto_template.create(). + * This function provides a means to obtain a pointer to this area. + * + * Return: A pointer to the implementation specific context data. + */ +static inline void *kpp_instance_ctx(struct kpp_instance *inst) +{ + return crypto_instance_ctx(kpp_crypto_instance(inst)); +} + +/* + * KPP algorithm (un)registration functions. + */ /** * crypto_register_kpp() -- Register key-agreement protocol primitives algorithm * @@ -56,4 +130,13 @@ int crypto_register_kpp(struct kpp_alg *alg); */ void crypto_unregister_kpp(struct kpp_alg *alg); +/** + * kpp_register_instance() - Register a KPP template instance. + * @tmpl: The instantiating template. + * @inst: The KPP template instance to be registered. + * Return: %0 on success, negative error code otherwise. + */ +int kpp_register_instance(struct crypto_template *tmpl, + struct kpp_instance *inst); + #endif -- cgit v1.2.3 From 46ed5269bf7dac752f78fc5f8dc5efb52b483fe7 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Mon, 21 Feb 2022 13:10:48 +0100 Subject: crypto: kpp - provide support for KPP spawns The upcoming support for the RFC 7919 ffdhe group parameters will be made available in the form of templates like "ffdhe2048(dh)", "ffdhe3072(dh)" and so on. Template instantiations thereof would wrap the inner "dh" kpp_alg and also provide kpp_alg services to the outside again. The primitves needed for providing kpp_alg services from template instances have been introduced with the previous patch. Continue this work now and implement everything needed for enabling template instances to make use of inner KPP algorithms like "dh". More specifically, define a struct crypto_kpp_spawn in close analogy to crypto_skcipher_spawn, crypto_shash_spawn and alike. Implement a crypto_grab_kpp() and crypto_drop_kpp() pair for binding such a spawn to some inner kpp_alg and for releasing it respectively. Template implementations can instantiate transforms from the underlying kpp_alg by means of the new crypto_spawn_kpp(). Finally, provide the crypto_spawn_kpp_alg() helper for accessing a spawn's underlying kpp_alg during template instantiation. Annotate everything with proper kernel-doc comments, even though include/crypto/internal/kpp.h is not considered for the generated docs. Signed-off-by: Nicolai Stange Reviewed-by: Hannes Reinecke Signed-off-by: Herbert Xu --- include/crypto/internal/kpp.h | 75 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h index c4d5a970fe9d..9cb0662ebe87 100644 --- a/include/crypto/internal/kpp.h +++ b/include/crypto/internal/kpp.h @@ -28,6 +28,20 @@ struct kpp_instance { }; }; +/** + * struct crypto_kpp_spawn - KPP algorithm spawn + * @base: Internal. Generic crypto core spawn state. + * + * Template instances can get a hold on some inner KPP algorithm by + * binding a &struct crypto_kpp_spawn via + * crypto_grab_kpp(). Transforms may subsequently get instantiated + * from the referenced inner &struct kpp_alg by means of + * crypto_spawn_kpp(). + */ +struct crypto_kpp_spawn { + struct crypto_spawn base; +}; + /* * Transform internal helpers. */ @@ -139,4 +153,65 @@ void crypto_unregister_kpp(struct kpp_alg *alg); int kpp_register_instance(struct crypto_template *tmpl, struct kpp_instance *inst); +/* + * KPP spawn related functions. + */ +/** + * crypto_grab_kpp() - Look up a KPP algorithm and bind a spawn to it. + * @spawn: The KPP spawn to bind. + * @inst: The template instance owning @spawn. + * @name: The KPP algorithm name to look up. + * @type: The type bitset to pass on to the lookup. + * @mask: The mask bismask to pass on to the lookup. + * Return: %0 on success, a negative error code otherwise. + */ +int crypto_grab_kpp(struct crypto_kpp_spawn *spawn, + struct crypto_instance *inst, + const char *name, u32 type, u32 mask); + +/** + * crypto_drop_kpp() - Release a spawn previously bound via crypto_grab_kpp(). + * @spawn: The spawn to release. + */ +static inline void crypto_drop_kpp(struct crypto_kpp_spawn *spawn) +{ + crypto_drop_spawn(&spawn->base); +} + +/** + * crypto_spawn_kpp_alg() - Get the algorithm a KPP spawn has been bound to. + * @spawn: The spawn to get the referenced &struct kpp_alg for. + * + * This function as well as the returned result are safe to use only + * after @spawn has been successfully bound via crypto_grab_kpp() and + * up to until the template instance owning @spawn has either been + * registered successfully or the spawn has been released again via + * crypto_drop_spawn(). + * + * Return: A pointer to the &struct kpp_alg referenced from the spawn. + */ +static inline struct kpp_alg *crypto_spawn_kpp_alg( + struct crypto_kpp_spawn *spawn) +{ + return container_of(spawn->base.alg, struct kpp_alg, base); +} + +/** + * crypto_spawn_kpp() - Create a transform from a KPP spawn. + * @spawn: The spawn previously bound to some &struct kpp_alg via + * crypto_grab_kpp(). + * + * Once a &struct crypto_kpp_spawn has been successfully bound to a + * &struct kpp_alg via crypto_grab_kpp(), transforms for the latter + * may get instantiated from the former by means of this function. + * + * Return: A pointer to the freshly created KPP transform on success + * or an ``ERR_PTR()`` otherwise. + */ +static inline struct crypto_kpp *crypto_spawn_kpp( + struct crypto_kpp_spawn *spawn) +{ + return crypto_spawn_tfm2(&spawn->base); +} + #endif -- cgit v1.2.3 From 48c6d8b878c1d811015cfba1a302d8474a9aace6 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Mon, 21 Feb 2022 13:10:49 +0100 Subject: crypto: dh - remove struct dh's ->q member The only current user of the DH KPP algorithm, the keyctl(KEYCTL_DH_COMPUTE) syscall, doesn't set the domain parameter ->q in struct dh. Remove it and any associated (de)serialization code in crypto_dh_encode_key() and crypto_dh_decode_key. Adjust the encoded ->secret values in testmgr's DH test vectors accordingly. Note that the dh-generic implementation would have initialized its struct dh_ctx's ->q from the decoded struct dh's ->q, if present. If this struct dh_ctx's ->q would ever have been non-NULL, it would have enabled a full key validation as specified in NIST SP800-56A in dh_is_pubkey_valid(). However, as outlined above, ->q is always NULL in practice and the full key validation code is effectively dead. A later patch will make dh_is_pubkey_valid() to calculate Q from P on the fly, if possible, so don't remove struct dh_ctx's ->q now, but leave it there until that has happened. Signed-off-by: Nicolai Stange Reviewed-by: Hannes Reinecke Signed-off-by: Herbert Xu --- include/crypto/dh.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/crypto/dh.h b/include/crypto/dh.h index d71e9858ab86..2585f0e6bb69 100644 --- a/include/crypto/dh.h +++ b/include/crypto/dh.h @@ -24,21 +24,17 @@ * * @key: Private DH key * @p: Diffie-Hellman parameter P - * @q: Diffie-Hellman parameter Q * @g: Diffie-Hellman generator G * @key_size: Size of the private DH key * @p_size: Size of DH parameter P - * @q_size: Size of DH parameter Q * @g_size: Size of DH generator G */ struct dh { void *key; void *p; - void *q; void *g; unsigned int key_size; unsigned int p_size; - unsigned int q_size; unsigned int g_size; }; -- cgit v1.2.3 From 215bebc8c6ac438c382a6a56bd2764a2d4e1da72 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Mon, 21 Feb 2022 13:10:50 +0100 Subject: crypto: dh - constify struct dh's pointer members struct dh contains several pointer members corresponding to DH parameters: ->key, ->p and ->g. A subsequent commit will introduce "dh" wrapping templates of the form "ffdhe2048(dh)", "ffdhe3072(dh)" and so on in order to provide built-in support for the well-known safe-prime ffdhe group parameters specified in RFC 7919. These templates will need to set the group parameter related members of the (serialized) struct dh instance passed to the inner "dh" kpp_alg instance, i.e. ->p and ->g, to some constant, static storage arrays. Turn the struct dh pointer members' types into "pointer to const" in preparation for this. Signed-off-by: Nicolai Stange Reviewed-by: Hannes Reinecke Signed-off-by: Herbert Xu --- include/crypto/dh.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/crypto/dh.h b/include/crypto/dh.h index 2585f0e6bb69..67f3f6bca527 100644 --- a/include/crypto/dh.h +++ b/include/crypto/dh.h @@ -30,9 +30,9 @@ * @g_size: Size of DH generator G */ struct dh { - void *key; - void *p; - void *g; + const void *key; + const void *p; + const void *g; unsigned int key_size; unsigned int p_size; unsigned int g_size; -- cgit v1.2.3 From fae198935c442e09afa3ecca197e144f732068d7 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Mon, 21 Feb 2022 13:10:51 +0100 Subject: crypto: dh - split out deserialization code from crypto_dh_decode() A subsequent commit will introduce "dh" wrapping templates of the form "ffdhe2048(dh)", "ffdhe3072(dh)" and so on in order to provide built-in support for the well-known safe-prime ffdhe group parameters specified in RFC 7919. Those templates' ->set_secret() will wrap the inner "dh" implementation's ->set_secret() and set the ->p and ->g group parameters as appropriate on the way inwards. More specifically, - A ffdheXYZ(dh) user would call crypto_dh_encode() on a struct dh instance having ->p == ->g == NULL as well as ->p_size == ->g_size == 0 and pass the resulting buffer to the outer ->set_secret(). - This outer ->set_secret() would then decode the struct dh via crypto_dh_decode_key(), set ->p, ->g, ->p_size as well as ->g_size as appropriate for the group in question and encode the struct dh again before passing it further down to the inner "dh"'s ->set_secret(). The problem is that crypto_dh_decode_key() implements some basic checks which would reject parameter sets with ->p_size == 0 and thus, the ffdheXYZ templates' ->set_secret() cannot use it as-is for decoding the passed buffer. As the inner "dh"'s ->set_secret() will eventually conduct said checks on the final parameter set anyway, the outer ->set_secret() really only needs the decoding functionality. Split out the pure struct dh decoding part from crypto_dh_decode_key() into the new __crypto_dh_decode_key(). __crypto_dh_decode_key() gets defined in crypto/dh_helper.c, but will have to get called from crypto/dh.c and thus, its declaration must be somehow made available to the latter. Strictly speaking, __crypto_dh_decode_key() is internal to the dh_generic module, yet it would be a bit over the top to introduce a new header like e.g. include/crypto/internal/dh.h containing just a single prototype. Add the __crypto_dh_decode_key() declaration to include/crypto/dh.h instead. Provide a proper kernel-doc annotation, even though __crypto_dh_decode_key() is purposedly not on the function list specified in Documentation/crypto/api-kpp.rst. Signed-off-by: Nicolai Stange Reviewed-by: Hannes Reinecke Signed-off-by: Herbert Xu --- include/crypto/dh.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/crypto/dh.h b/include/crypto/dh.h index 67f3f6bca527..7b863e911cb4 100644 --- a/include/crypto/dh.h +++ b/include/crypto/dh.h @@ -79,4 +79,20 @@ int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params); */ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params); +/** + * __crypto_dh_decode_key() - decode a private key without parameter checks + * @buf: Buffer holding a packet key that should be decoded + * @len: Length of the packet private key buffer + * @params: Buffer allocated by the caller that is filled with the + * unpacked DH private key. + * + * Internal function providing the same services as the exported + * crypto_dh_decode_key(), but without any of those basic parameter + * checks conducted by the latter. + * + * Return: -EINVAL if buffer has insufficient size, 0 on success + */ +int __crypto_dh_decode_key(const char *buf, unsigned int len, + struct dh *params); + #endif -- cgit v1.2.3 From d6097b8d5d55f26cd2244e7e7f00a5a077772a91 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Mon, 21 Feb 2022 13:10:58 +0100 Subject: crypto: api - allow algs only in specific constructions in FIPS mode Currently we do not distinguish between algorithms that fail on the self-test vs. those which are disabled in FIPS mode (not allowed). Both are marked as having failed the self-test. Recently the need arose to allow the usage of certain algorithms only as arguments to specific template instantiations in FIPS mode. For example, standalone "dh" must be blocked, but e.g. "ffdhe2048(dh)" is allowed. Other potential use cases include "cbcmac(aes)", which must only be used with ccm(), or "ghash", which must be used only for gcm(). This patch allows this scenario by adding a new flag FIPS_INTERNAL to indicate those algorithms that are not FIPS-allowed. They can then be used as template arguments only, i.e. when looked up via crypto_grab_spawn() to be more specific. The FIPS_INTERNAL bit gets propagated upwards recursively into the surrounding template instances, until the construction eventually matches an explicit testmgr entry with ->fips_allowed being set, if any. The behaviour to skip !->fips_allowed self-test executions in FIPS mode will be retained. Note that this effectively means that FIPS_INTERNAL algorithms are handled very similarly to the INTERNAL ones in this regard. It is expected that the FIPS_INTERNAL algorithms will receive sufficient testing when the larger constructions they're a part of, if any, get exercised by testmgr. Note that as a side-effect of this patch algorithms which are not FIPS-allowed will now return ENOENT instead of ELIBBAD. Hopefully this is not an issue as some people were relying on this already. Link: https://lore.kernel.org/r/YeEVSaMEVJb3cQkq@gondor.apana.org.au Originally-by: Herbert Xu Signed-off-by: Nicolai Stange Reviewed-by: Hannes Reinecke Signed-off-by: Herbert Xu --- include/linux/crypto.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 855869e1fd32..2324ab6f1846 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -132,6 +132,15 @@ */ #define CRYPTO_ALG_ALLOCATES_MEMORY 0x00010000 +/* + * Mark an algorithm as a service implementation only usable by a + * template and never by a normal user of the kernel crypto API. + * This is intended to be used by algorithms that are themselves + * not FIPS-approved but may instead be used to implement parts of + * a FIPS-approved algorithm (e.g., dh vs. ffdhe2048(dh)). + */ +#define CRYPTO_ALG_FIPS_INTERNAL 0x00020000 + /* * Transform masks and values (for crt_flags). */ -- cgit v1.2.3 From 7976c1492571a5fb234c416559a0d9790855c635 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 23 Feb 2022 08:07:01 +0100 Subject: crypto: crypto_xor - use helpers for unaligned accesses Dereferencing a misaligned pointer is undefined behavior in C, and may result in codegen on architectures such as ARM that trigger alignments traps and expensive fixups in software. Instead, use the get_aligned()/put_aligned() accessors, which are cheap or even completely free when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y. In the converse case, the prior alignment checks ensure that the casts are safe, and so no unaligned accessors are necessary. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- include/crypto/algapi.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index f76ec723ceae..f50c5d1725da 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -13,6 +13,8 @@ #include #include +#include + /* * Maximum values for blocksize and alignmask, used to allocate * static buffers that are big enough for any combination of @@ -154,9 +156,11 @@ static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size) (size % sizeof(unsigned long)) == 0) { unsigned long *d = (unsigned long *)dst; unsigned long *s = (unsigned long *)src; + unsigned long l; while (size > 0) { - *d++ ^= *s++; + l = get_unaligned(d) ^ get_unaligned(s++); + put_unaligned(l, d++); size -= sizeof(unsigned long); } } else { @@ -173,9 +177,11 @@ static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2, unsigned long *d = (unsigned long *)dst; unsigned long *s1 = (unsigned long *)src1; unsigned long *s2 = (unsigned long *)src2; + unsigned long l; while (size > 0) { - *d++ = *s1++ ^ *s2++; + l = get_unaligned(s1++) ^ get_unaligned(s2++); + put_unaligned(l, d++); size -= sizeof(unsigned long); } } else { -- cgit v1.2.3 From 80f940ef527efdd8e36c69f7b5ee8e07ac8891d9 Mon Sep 17 00:00:00 2001 From: Harsha Date: Wed, 23 Feb 2022 16:05:02 +0530 Subject: firmware: xilinx: Add ZynqMP SHA API for SHA3 functionality This patch adds zynqmp_pm_sha_hash API in the ZynqMP firmware to compute SHA3 hash of given data. Signed-off-by: Harsha Signed-off-by: Kalyani Akula Acked-by: Michal Simek Signed-off-by: Herbert Xu --- include/linux/firmware/xlnx-zynqmp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 907cb01890cf..f6783f58c64a 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -93,6 +93,7 @@ enum pm_api_id { PM_FPGA_LOAD = 22, PM_FPGA_GET_STATUS = 23, PM_GET_CHIPID = 24, + PM_SECURE_SHA = 26, PM_PINCTRL_REQUEST = 28, PM_PINCTRL_RELEASE = 29, PM_PINCTRL_GET_FUNCTION = 30, @@ -427,6 +428,7 @@ int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities, const u32 qos, const enum zynqmp_pm_request_ack ack); int zynqmp_pm_aes_engine(const u64 address, u32 *out); +int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags); int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags); int zynqmp_pm_fpga_get_status(u32 *value); int zynqmp_pm_write_ggs(u32 index, u32 value); @@ -601,6 +603,12 @@ static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out) return -ENODEV; } +static inline int zynqmp_pm_sha_hash(const u64 address, const u32 size, + const u32 flags) +{ + return -ENODEV; +} + static inline int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags) { -- cgit v1.2.3 From d58b8a99cbb84c1eb3b3613d23c1a328695a9455 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 25 Feb 2022 12:33:51 -0500 Subject: drm/amdkfd: Add SMI add event helper To remove duplicate code, unify event message format and simplify new event add in the following patches. Use KFD_SMI_EVENT_MSG_SIZE to define msg size, the same size will be used in user space to alloc the msg receive buffer. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index baec5a41de3e..b40687bf1014 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -463,6 +463,7 @@ enum kfd_smi_event { }; #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) +#define KFD_SMI_EVENT_MSG_SIZE 96 struct kfd_ioctl_smi_events_args { __u32 gpuid; /* to KFD */ -- cgit v1.2.3 From 4f9a7a1dc2a294c5c5c4b0246e2281e6ec88fb91 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 2 Mar 2022 11:29:14 +0000 Subject: OPP: Add "opp-microwatt" supporting code Add new property to the OPP: power value. The OPP entry in the DT can have "opp-microwatt". Add the needed code to handle this new property in the existing infrastructure. Signed-off-by: Lukasz Luba Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 879c138c7b8e..0d85a63a1f78 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -32,14 +32,17 @@ enum dev_pm_opp_event { * @u_volt_min: Minimum voltage in microvolts corresponding to this OPP * @u_volt_max: Maximum voltage in microvolts corresponding to this OPP * @u_amp: Maximum current drawn by the device in microamperes + * @u_watt: Power used by the device in microwatts * - * This structure stores the voltage/current values for a single power supply. + * This structure stores the voltage/current/power values for a single power + * supply. */ struct dev_pm_opp_supply { unsigned long u_volt; unsigned long u_volt_min; unsigned long u_volt_max; unsigned long u_amp; + unsigned long u_watt; }; /** @@ -94,6 +97,8 @@ void dev_pm_opp_put_opp_table(struct opp_table *opp_table); unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp); +unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp); + unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp); unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp); @@ -186,6 +191,11 @@ static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) return 0; } +static inline unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp) +{ + return 0; +} + static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) { return 0; -- cgit v1.2.3 From caeea9e6671984c3865918459d756b961a24bb49 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 2 Mar 2022 11:29:15 +0000 Subject: PM: EM: add macro to set .active_power() callback conditionally The Energy Model is able to use new power values coming from DT. Add a new macro which is helpful in setting the .active_power() callback conditionally in setup time. The dual-macro implementation handles both kernel configurations: w/ EM and w/o EM built-in. Reported-by: kernel test robot Signed-off-by: Lukasz Luba Signed-off-by: Viresh Kumar --- include/linux/energy_model.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 6377adc3b78d..9f3c400bc52d 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -116,6 +116,7 @@ struct em_data_callback { struct device *dev); }; #define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb } +#define EM_SET_ACTIVE_POWER_CB(em_cb, cb) ((em_cb).active_power = cb) struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); @@ -264,6 +265,7 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) #else struct em_data_callback {}; #define EM_DATA_CB(_active_power_cb) { } +#define EM_SET_ACTIVE_POWER_CB(em_cb, cb) do { } while (0) static inline int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, -- cgit v1.2.3 From bf08824a0f4776fc0626b82b6924fa1a5643eacb Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Mon, 28 Feb 2022 20:58:56 +0100 Subject: flow_dissector: Add support for HSR Network drivers such as igb or igc call eth_get_headlen() to determine the header length for their to be constructed skbs in receive path. When running HSR on top of these drivers, it results in triggering BUG_ON() in skb_pull(). The reason is the skb headlen is not sufficient for HSR to work correctly. skb_pull() notices that. For instance, eth_get_headlen() returns 14 bytes for TCP traffic over HSR which is not correct. The problem is, the flow dissection code does not take HSR into account. Therefore, add support for it. Reported-by: Anthony Harivel Signed-off-by: Kurt Kanzenbach Link: https://lore.kernel.org/r/20220228195856.88187-1-kurt@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/if_hsr.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/if_hsr.h b/include/linux/if_hsr.h index 38bbc537d4e4..408539d5ea5f 100644 --- a/include/linux/if_hsr.h +++ b/include/linux/if_hsr.h @@ -9,6 +9,22 @@ enum hsr_version { PRP_V1, }; +/* HSR Tag. + * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB, + * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest, + * h_source, h_proto = 0x88FB }, and add { path, LSDU_size, sequence Nr, + * encapsulated protocol } instead. + * + * Field names as defined in the IEC:2010 standard for HSR. + */ +struct hsr_tag { + __be16 path_and_LSDU_size; + __be16 sequence_nr; + __be16 encap_proto; +} __packed; + +#define HSR_HLEN 6 + #if IS_ENABLED(CONFIG_HSR) extern bool is_hsr_master(struct net_device *dev); extern int hsr_get_version(struct net_device *dev, enum hsr_version *ver); -- cgit v1.2.3 From 42f0c1934c7cb3e94c2fe8f5771245fa5631d0e7 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Tue, 1 Mar 2022 06:35:42 -0800 Subject: tcp: Remove the unused api Last tcp_write_queue_head() use was removed in commit 114f39feab36 ("tcp: restore autocorking"), so remove it. Signed-off-by: Tao Chen Link: https://lore.kernel.org/r/SYZP282MB33317DEE1253B37C0F57231E86029@SYZP282MB3331.AUSP282.PROD.OUTLOOK.COM Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 479a27777ad6..d486d7b6112d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1831,11 +1831,6 @@ static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk) return skb_rb_last(&sk->tcp_rtx_queue); } -static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) -{ - return skb_peek(&sk->sk_write_queue); -} - static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk) { return skb_peek_tail(&sk->sk_write_queue); -- cgit v1.2.3 From 8610037e8106b48c79cfe0afb92b2b2466e51c3d Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Tue, 1 Mar 2022 23:55:47 -0800 Subject: page_pool: Add allocation stats Add per-pool statistics counters for the allocation path of a page pool. These stats are incremented in softirq context, so no locking or per-cpu variables are needed. This code is disabled by default and a kernel config option is provided for users who wish to enable them. The statistics added are: - fast: successful fast path allocations - slow: slow path order-0 allocations - slow_high_order: slow path high order allocations - empty: ptr ring is empty, so a slow path allocation was forced. - refill: an allocation which triggered a refill of the cache - waive: pages obtained from the ptr ring that cannot be added to the cache due to a NUMA mismatch. Signed-off-by: Joe Damato Acked-by: Jesper Dangaard Brouer Reviewed-by: Ilias Apalodimas Signed-off-by: David S. Miller --- include/net/page_pool.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 97c3c19872ff..1f27e8a48830 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -84,6 +84,19 @@ struct page_pool_params { void *init_arg; }; +#ifdef CONFIG_PAGE_POOL_STATS +struct page_pool_alloc_stats { + u64 fast; /* fast path allocations */ + u64 slow; /* slow-path order 0 allocations */ + u64 slow_high_order; /* slow-path high order allocations */ + u64 empty; /* failed refills due to empty ptr ring, forcing + * slow path allocation + */ + u64 refill; /* allocations via successful refill */ + u64 waive; /* failed refills due to numa zone mismatch */ +}; +#endif + struct page_pool { struct page_pool_params p; @@ -96,6 +109,11 @@ struct page_pool { unsigned int frag_offset; struct page *frag_page; long frag_users; + +#ifdef CONFIG_PAGE_POOL_STATS + /* these stats are incremented while in softirq context */ + struct page_pool_alloc_stats alloc_stats; +#endif u32 xdp_mem_id; /* -- cgit v1.2.3 From ad6fa1e1ab1b8164f1ba296b1b4dc556a483bcad Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Tue, 1 Mar 2022 23:55:48 -0800 Subject: page_pool: Add recycle stats Add per-cpu stats tracking page pool recycling events: - cached: recycling placed page in the page pool cache - cache_full: page pool cache was full - ring: page placed into the ptr ring - ring_full: page released from page pool because the ptr ring was full - released_refcnt: page released (and not recycled) because refcnt > 1 Signed-off-by: Joe Damato Acked-by: Jesper Dangaard Brouer Reviewed-by: Ilias Apalodimas Signed-off-by: David S. Miller --- include/net/page_pool.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 1f27e8a48830..298af95bbf96 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -95,6 +95,18 @@ struct page_pool_alloc_stats { u64 refill; /* allocations via successful refill */ u64 waive; /* failed refills due to numa zone mismatch */ }; + +struct page_pool_recycle_stats { + u64 cached; /* recycling placed page in the cache. */ + u64 cache_full; /* cache was full */ + u64 ring; /* recycling placed page back into ptr ring */ + u64 ring_full; /* page was released from page-pool because + * PTR ring was full. + */ + u64 released_refcnt; /* page released because of elevated + * refcnt + */ +}; #endif struct page_pool { @@ -144,6 +156,10 @@ struct page_pool { */ struct ptr_ring ring; +#ifdef CONFIG_PAGE_POOL_STATS + /* recycle stats are per-cpu to avoid locking */ + struct page_pool_recycle_stats __percpu *recycle_stats; +#endif atomic_t pages_state_release_cnt; /* A page_pool is strictly tied to a single RX-queue being -- cgit v1.2.3 From 6b95e3388b1ea0ca63500c5a6e39162dbf828433 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Tue, 1 Mar 2022 23:55:49 -0800 Subject: page_pool: Add function to batch and return stats Adds a function page_pool_get_stats which can be used by drivers to obtain stats for a specified page_pool. Signed-off-by: Joe Damato Acked-by: Jesper Dangaard Brouer Reviewed-by: Ilias Apalodimas Signed-off-by: David S. Miller --- include/net/page_pool.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 298af95bbf96..ea5fb70e5101 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -107,6 +107,23 @@ struct page_pool_recycle_stats { * refcnt */ }; + +/* This struct wraps the above stats structs so users of the + * page_pool_get_stats API can pass a single argument when requesting the + * stats for the page pool. + */ +struct page_pool_stats { + struct page_pool_alloc_stats alloc_stats; + struct page_pool_recycle_stats recycle_stats; +}; + +/* + * Drivers that wish to harvest page pool stats and report them to users + * (perhaps via ethtool, debugfs, or another mechanism) can allocate a + * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool. + */ +bool page_pool_get_stats(struct page_pool *pool, + struct page_pool_stats *stats); #endif struct page_pool { -- cgit v1.2.3 From 46efc97b73060823fdc18103a5e317a8327d44e1 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:17 +0200 Subject: net: rtnetlink: RTM_GETSTATS: Allow filtering inside nests The filter_mask field of RTM_GETSTATS header determines which top-level attributes should be included in the netlink response. This saves processing time by only including the bits that the user cares about instead of always dumping everything. This is doubly important for HW-backed statistics that would typically require a trip to the device to fetch the stats. So far there was only one HW-backed stat suite per attribute. However, IFLA_STATS_LINK_OFFLOAD_XSTATS is a nest, and will gain a new stat suite in the following patches. It would therefore be advantageous to be able to filter within that nest, and select just one or the other HW-backed statistics suite. Extend rtnetlink so that RTM_GETSTATS permits attributes in the payload. The scheme is as follows: - RTM_GETSTATS - struct if_stats_msg - attr nest IFLA_STATS_GET_FILTERS - attr IFLA_STATS_LINK_OFFLOAD_XSTATS - u32 filter_mask This scheme reuses the existing enumerators by nesting them in a dedicated context attribute. This is covered by policies as usual, therefore a gradual opt-in is possible. Currently only IFLA_STATS_LINK_OFFLOAD_XSTATS nest has filtering enabled, because for the SW counters the issue does not seem to be that important. rtnl_offload_xstats_get_size() and _fill() are extended to observe the requested filters. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e315e53125f4..4d62ea6e1288 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1207,6 +1207,16 @@ enum { #define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1)) +enum { + IFLA_STATS_GETSET_UNSPEC, + IFLA_STATS_GET_FILTERS, /* Nest of IFLA_STATS_LINK_xxx, each a u32 with + * a filter mask for the corresponding group. + */ + __IFLA_STATS_GETSET_MAX, +}; + +#define IFLA_STATS_GETSET_MAX (__IFLA_STATS_GETSET_MAX - 1) + /* These are embedded into IFLA_STATS_LINK_XSTATS: * [IFLA_STATS_LINK_XSTATS] * -> [LINK_XSTATS_TYPE_xxx] -- cgit v1.2.3 From 9309f97aef6d8250bb484dabeac925c3a7c57716 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:20 +0200 Subject: net: dev: Add hardware stats support Offloading switch device drivers may be able to collect statistics of the traffic taking place in the HW datapath that pertains to a certain soft netdevice, such as VLAN. Add the necessary infrastructure to allow exposing these statistics to the offloaded netdevice in question. The API was shaped by the following considerations: - Collection of HW statistics is not free: there may be a finite number of counters, and the act of counting may have a performance impact. It is therefore necessary to allow toggling whether HW counting should be done for any particular SW netdevice. - As the drivers are loaded and removed, a particular device may get offloaded and unoffloaded again. At the same time, the statistics values need to stay monotonic (modulo the eventual 64-bit wraparound), increasing only to reflect traffic measured in the device. To that end, the netdevice keeps around a lazily-allocated copy of struct rtnl_link_stats64. Device drivers then contribute to the values kept therein at various points. Even as the driver goes away, the struct stays around to maintain the statistics values. - Different HW devices may be able to count different things. The motivation behind this patch in particular is exposure of HW counters on Nvidia Spectrum switches, where the only practical approach to counting traffic on offloaded soft netdevices currently is to use router interface counters, and count L3 traffic. Correspondingly that is the statistics suite added in this patch. Other devices may be able to measure different kinds of traffic, and for that reason, the APIs are built to allow uniform access to different statistics suites. - Because soft netdevices and offloading drivers are only loosely bound, a netdevice uses a notifier chain to communicate with the drivers. Several new notifiers, NETDEV_OFFLOAD_XSTATS_*, have been added to carry messages to the offloading drivers. - Devices can have various conditions for when a particular counter is available. As the device is configured and reconfigured, the device offload may become or cease being suitable for counter binding. A netdevice can use a notifier type NETDEV_OFFLOAD_XSTATS_REPORT_USED to ping offloading drivers and determine whether anyone currently implements a given statistics suite. This information can then be propagated to user space. When the driver decides to unoffload a netdevice, it can use a newly-added function, netdev_offload_xstats_report_delta(), to record outstanding collected statistics, before destroying the HW counter. This patch adds a helper, call_netdevice_notifiers_info_robust(), for dispatching a notifier with the possibility of unwind when one of the consumers bails. Given the wish to eventually get rid of the global notifier block altogether, this helper only invokes the per-netns notifier block. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 42 ++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/if_link.h | 15 +++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c79ee2296296..19a27ac361ef 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1950,6 +1950,7 @@ enum netdev_ml_priv_type { * @watchdog_dev_tracker: refcount tracker used by watchdog. * @dev_registered_tracker: tracker for reference held while * registered + * @offload_xstats_l3: L3 HW stats for this netdevice. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2287,6 +2288,7 @@ struct net_device { netdevice_tracker linkwatch_dev_tracker; netdevice_tracker watchdog_dev_tracker; netdevice_tracker dev_registered_tracker; + struct rtnl_hw_stats64 *offload_xstats_l3; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -2727,6 +2729,10 @@ enum netdev_cmd { NETDEV_CVLAN_FILTER_DROP_INFO, NETDEV_SVLAN_FILTER_PUSH_INFO, NETDEV_SVLAN_FILTER_DROP_INFO, + NETDEV_OFFLOAD_XSTATS_ENABLE, + NETDEV_OFFLOAD_XSTATS_DISABLE, + NETDEV_OFFLOAD_XSTATS_REPORT_USED, + NETDEV_OFFLOAD_XSTATS_REPORT_DELTA, }; const char *netdev_cmd_to_name(enum netdev_cmd cmd); @@ -2777,6 +2783,42 @@ struct netdev_notifier_pre_changeaddr_info { const unsigned char *dev_addr; }; +enum netdev_offload_xstats_type { + NETDEV_OFFLOAD_XSTATS_TYPE_L3 = 1, +}; + +struct netdev_notifier_offload_xstats_info { + struct netdev_notifier_info info; /* must be first */ + enum netdev_offload_xstats_type type; + + union { + /* NETDEV_OFFLOAD_XSTATS_REPORT_DELTA */ + struct netdev_notifier_offload_xstats_rd *report_delta; + /* NETDEV_OFFLOAD_XSTATS_REPORT_USED */ + struct netdev_notifier_offload_xstats_ru *report_used; + }; +}; + +int netdev_offload_xstats_enable(struct net_device *dev, + enum netdev_offload_xstats_type type, + struct netlink_ext_ack *extack); +int netdev_offload_xstats_disable(struct net_device *dev, + enum netdev_offload_xstats_type type); +bool netdev_offload_xstats_enabled(const struct net_device *dev, + enum netdev_offload_xstats_type type); +int netdev_offload_xstats_get(struct net_device *dev, + enum netdev_offload_xstats_type type, + struct rtnl_hw_stats64 *stats, bool *used, + struct netlink_ext_ack *extack); +void +netdev_offload_xstats_report_delta(struct netdev_notifier_offload_xstats_rd *rd, + const struct rtnl_hw_stats64 *stats); +void +netdev_offload_xstats_report_used(struct netdev_notifier_offload_xstats_ru *ru); +void netdev_offload_xstats_push_delta(struct net_device *dev, + enum netdev_offload_xstats_type type, + const struct rtnl_hw_stats64 *stats); + static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, struct net_device *dev) { diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4d62ea6e1288..ef6a62a2e15d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -245,6 +245,21 @@ struct rtnl_link_stats64 { __u64 rx_nohandler; }; +/* Subset of link stats useful for in-HW collection. Meaning of the fields is as + * for struct rtnl_link_stats64. + */ +struct rtnl_hw_stats64 { + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; +}; + /* The struct should be in sync with struct ifmap */ struct rtnl_link_ifmap { __u64 mem_start; -- cgit v1.2.3 From 0e7788fd76222dba3229eada9162efab185923fc Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:21 +0200 Subject: net: rtnetlink: Add UAPI for obtaining L3 offload xstats Add a new IFLA_STATS_LINK_OFFLOAD_XSTATS child attribute, IFLA_OFFLOAD_XSTATS_L3_STATS, to carry statistics for traffic that takes place in a HW router. The offloaded HW stats are designed to allow per-netdevice enablement and disablement. Additionally, as a netdevice is configured, it may become or cease being suitable for binding of a HW counter. Both of these aspects need to be communicated to the userspace. To that end, add another child attribute, IFLA_OFFLOAD_XSTATS_HW_S_INFO: - attr nest IFLA_OFFLOAD_XSTATS_HW_S_INFO - attr nest IFLA_OFFLOAD_XSTATS_L3_STATS - attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST - {0,1} as u8 - attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED - {0,1} as u8 Thus this one attribute is a nest that can be used to carry information about various types of HW statistics, and indexing is very simply done by wrapping the information for a given statistics suite into the attribute that carries the suite is the RTM_GETSTATS query. At the same time, because _HW_S_INFO is nested directly below IFLA_STATS_LINK_OFFLOAD_XSTATS, it is possible through filtering to request only the metadata about individual statistics suites, without having to hit the HW to get the actual counters. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ef6a62a2e15d..b1031f481d2f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1249,10 +1249,21 @@ enum { enum { IFLA_OFFLOAD_XSTATS_UNSPEC, IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ + IFLA_OFFLOAD_XSTATS_HW_S_INFO, /* HW stats info. A nest */ + IFLA_OFFLOAD_XSTATS_L3_STATS, /* struct rtnl_hw_stats64 */ __IFLA_OFFLOAD_XSTATS_MAX }; #define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) +enum { + IFLA_OFFLOAD_XSTATS_HW_S_INFO_UNSPEC, + IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, /* u8 */ + IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, /* u8 */ + __IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX, +}; +#define IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX \ + (__IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX - 1) + /* XDP section */ #define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) -- cgit v1.2.3 From 03ba35667091337d8e632cf4b814f1c1b914609b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:22 +0200 Subject: net: rtnetlink: Add RTM_SETSTATS The offloaded HW stats are designed to allow per-netdevice enablement and disablement. These stats are only accessible through RTM_GETSTATS, and therefore should be toggled by a RTM_SETSTATS message. Add it, and the necessary skeleton handler. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 0970cb4b1b88..14462dc159fd 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -146,6 +146,8 @@ enum { #define RTM_NEWSTATS RTM_NEWSTATS RTM_GETSTATS = 94, #define RTM_GETSTATS RTM_GETSTATS + RTM_SETSTATS, +#define RTM_SETSTATS RTM_SETSTATS RTM_NEWCACHEREPORT = 96, #define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT -- cgit v1.2.3 From 5fd0b838efac16046509f7fb100455d0463b9687 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:23 +0200 Subject: net: rtnetlink: Add UAPI toggle for IFLA_OFFLOAD_XSTATS_L3_STATS The offloaded HW stats are designed to allow per-netdevice enablement and disablement. Add an attribute, IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS, which should be carried by the RTM_SETSTATS message, and expresses a desire to toggle L3 offload xstats on or off. As part of the above, add an exported function rtnl_offload_xstats_notify() that drivers can use when they have installed or deinstalled the counters backing the HW stats. At this point, it is possible to enable, disable and query L3 offload xstats on netdevices. (However there is no driver actually implementing these.) Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 +++ include/uapi/linux/if_link.h | 1 + include/uapi/linux/rtnetlink.h | 2 ++ 3 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index bb9cb84114c1..7f970b16da3a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -134,4 +134,7 @@ extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, int (*vlan_fill)(struct sk_buff *skb, struct net_device *dev, u32 filter_mask)); + +extern void rtnl_offload_xstats_notify(struct net_device *dev); + #endif /* __LINUX_RTNETLINK_H */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b1031f481d2f..ddca20357e7e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1227,6 +1227,7 @@ enum { IFLA_STATS_GET_FILTERS, /* Nest of IFLA_STATS_LINK_xxx, each a u32 with * a filter mask for the corresponding group. */ + IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS, /* 0 or 1 as u8 */ __IFLA_STATS_GETSET_MAX, }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 14462dc159fd..51530aade46e 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -767,6 +767,8 @@ enum rtnetlink_groups { #define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR RTNLGRP_TUNNEL, #define RTNLGRP_TUNNEL RTNLGRP_TUNNEL + RTNLGRP_STATS, +#define RTNLGRP_STATS RTNLGRP_STATS __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) -- cgit v1.2.3 From 445ad495f0ff71553693e6a2a9d7a3bc6917ca36 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Feb 2022 16:20:17 +0200 Subject: vfio: Have the core code decode the VFIO_DEVICE_FEATURE ioctl Invoke a new device op 'device_feature' to handle just the data array portion of the command. This lifts the ioctl validation to the core code and makes it simpler for either the core code, or layered drivers, to implement their own feature values. Provide vfio_check_feature() to consolidate checking the flags/etc against what the driver supports. Link: https://lore.kernel.org/all/20220224142024.147653-9-yishaih@nvidia.com Signed-off-by: Jason Gunthorpe Tested-by: Shameer Kolothum Reviewed-by: Alex Williamson Reviewed-by: Cornelia Huck Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/vfio.h | 32 ++++++++++++++++++++++++++++++++ include/linux/vfio_pci_core.h | 2 ++ 2 files changed, 34 insertions(+) (limited to 'include') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 76191d7abed1..550c28f2ef60 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -55,6 +55,7 @@ struct vfio_device { * @match: Optional device name match callback (return: 0 for no-match, >0 for * match, -errno for abort (ex. match with insufficient or incorrect * additional args) + * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl */ struct vfio_device_ops { char *name; @@ -69,8 +70,39 @@ struct vfio_device_ops { int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); void (*request)(struct vfio_device *vdev, unsigned int count); int (*match)(struct vfio_device *vdev, char *buf); + int (*device_feature)(struct vfio_device *device, u32 flags, + void __user *arg, size_t argsz); }; +/** + * vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl + * @flags: Arg from the device_feature op + * @argsz: Arg from the device_feature op + * @supported_ops: Combination of VFIO_DEVICE_FEATURE_GET and SET the driver + * supports + * @minsz: Minimum data size the driver accepts + * + * For use in a driver's device_feature op. Checks that the inputs to the + * VFIO_DEVICE_FEATURE ioctl are correct for the driver's feature. Returns 1 if + * the driver should execute the get or set, otherwise the relevant + * value should be returned. + */ +static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops, + size_t minsz) +{ + if ((flags & (VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_SET)) & + ~supported_ops) + return -EINVAL; + if (flags & VFIO_DEVICE_FEATURE_PROBE) + return 0; + /* Without PROBE one of GET or SET must be requested */ + if (!(flags & (VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_SET))) + return -EINVAL; + if (argsz < minsz) + return -EINVAL; + return 1; +} + void vfio_init_group_dev(struct vfio_device *device, struct device *dev, const struct vfio_device_ops *ops); void vfio_uninit_group_dev(struct vfio_device *device); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index ef9a44b6cf5d..beba0b2ed87d 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -220,6 +220,8 @@ int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn); extern const struct pci_error_handlers vfio_pci_core_err_handlers; long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg); +int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags, + void __user *arg, size_t argsz); ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, size_t count, loff_t *ppos); ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, -- cgit v1.2.3 From 115dcec65f61d53e25e1bed5e380468b30f98b14 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Feb 2022 16:20:18 +0200 Subject: vfio: Define device migration protocol v2 Replace the existing region based migration protocol with an ioctl based protocol. The two protocols have the same general semantic behaviors, but the way the data is transported is changed. This is the STOP_COPY portion of the new protocol, it defines the 5 states for basic stop and copy migration and the protocol to move the migration data in/out of the kernel. Compared to the clarification of the v1 protocol Alex proposed: https://lore.kernel.org/r/163909282574.728533.7460416142511440919.stgit@omen This has a few deliberate functional differences: - ERROR arcs allow the device function to remain unchanged. - The protocol is not required to return to the original state on transition failure. Instead userspace can execute an unwind back to the original state, reset, or do something else without needing kernel support. This simplifies the kernel design and should userspace choose a policy like always reset, avoids doing useless work in the kernel on error handling paths. - PRE_COPY is made optional, userspace must discover it before using it. This reflects the fact that the majority of drivers we are aware of right now will not implement PRE_COPY. - segmentation is not part of the data stream protocol, the receiver does not have to reproduce the framing boundaries. The hybrid FSM for the device_state is described as a Mealy machine by documenting each of the arcs the driver is required to implement. Defining the remaining set of old/new device_state transitions as 'combination transitions' which are naturally defined as taking multiple FSM arcs along the shortest path within the FSM's digraph allows a complete matrix of transitions. A new VFIO_DEVICE_FEATURE of VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE is defined to replace writing to the device_state field in the region. This allows returning a brand new FD whenever the requested transition opens a data transfer session. The VFIO core code implements the new feature and provides a helper function to the driver. Using the helper the driver only has to implement 6 of the FSM arcs and the other combination transitions are elaborated consistently from those arcs. A new VFIO_DEVICE_FEATURE of VFIO_DEVICE_FEATURE_MIGRATION is defined to report the capability for migration and indicate which set of states and arcs are supported by the device. The FSM provides a lot of flexibility to make backwards compatible extensions but the VFIO_DEVICE_FEATURE also allows for future breaking extensions for scenarios that cannot support even the basic STOP_COPY requirements. The VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE with the GET option (i.e. VFIO_DEVICE_FEATURE_GET) can be used to read the current migration state of the VFIO device. Data transfer sessions are now carried over a file descriptor, instead of the region. The FD functions for the lifetime of the data transfer session. read() and write() transfer the data with normal Linux stream FD semantics. This design allows future expansion to support poll(), io_uring, and other performance optimizations. The complicated mmap mode for data transfer is discarded as current qemu doesn't take meaningful advantage of it, and the new qemu implementation avoids substantially all the performance penalty of using a read() on the region. Link: https://lore.kernel.org/all/20220224142024.147653-10-yishaih@nvidia.com Signed-off-by: Jason Gunthorpe Tested-by: Shameer Kolothum Reviewed-by: Kevin Tian Reviewed-by: Alex Williamson Reviewed-by: Cornelia Huck Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/vfio.h | 20 ++++++ include/uapi/linux/vfio.h | 174 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 181 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 550c28f2ef60..c44e80bbbd3b 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -56,6 +56,16 @@ struct vfio_device { * match, -errno for abort (ex. match with insufficient or incorrect * additional args) * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl + * @migration_set_state: Optional callback to change the migration state for + * devices that support migration. It's mandatory for + * VFIO_DEVICE_FEATURE_MIGRATION migration support. + * The returned FD is used for data transfer according to the FSM + * definition. The driver is responsible to ensure that FD reaches end + * of stream or error whenever the migration FSM leaves a data transfer + * state or before close_device() returns. + * @migration_get_state: Optional callback to get the migration state for + * devices that support migration. It's mandatory for + * VFIO_DEVICE_FEATURE_MIGRATION migration support. */ struct vfio_device_ops { char *name; @@ -72,6 +82,11 @@ struct vfio_device_ops { int (*match)(struct vfio_device *vdev, char *buf); int (*device_feature)(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); + struct file *(*migration_set_state)( + struct vfio_device *device, + enum vfio_device_mig_state new_state); + int (*migration_get_state)(struct vfio_device *device, + enum vfio_device_mig_state *curr_state); }; /** @@ -114,6 +129,11 @@ extern void vfio_device_put(struct vfio_device *device); int vfio_assign_device_set(struct vfio_device *device, void *set_id); +int vfio_mig_get_next_state(struct vfio_device *device, + enum vfio_device_mig_state cur_fsm, + enum vfio_device_mig_state new_fsm, + enum vfio_device_mig_state *next_fsm); + /* * External user API */ diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ef33ea002b0b..22ed358c04c5 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -605,25 +605,25 @@ struct vfio_region_gfx_edid { struct vfio_device_migration_info { __u32 device_state; /* VFIO device state */ -#define VFIO_DEVICE_STATE_STOP (0) -#define VFIO_DEVICE_STATE_RUNNING (1 << 0) -#define VFIO_DEVICE_STATE_SAVING (1 << 1) -#define VFIO_DEVICE_STATE_RESUMING (1 << 2) -#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ - VFIO_DEVICE_STATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING) +#define VFIO_DEVICE_STATE_V1_STOP (0) +#define VFIO_DEVICE_STATE_V1_RUNNING (1 << 0) +#define VFIO_DEVICE_STATE_V1_SAVING (1 << 1) +#define VFIO_DEVICE_STATE_V1_RESUMING (1 << 2) +#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_V1_RUNNING | \ + VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING) #define VFIO_DEVICE_STATE_VALID(state) \ - (state & VFIO_DEVICE_STATE_RESUMING ? \ - (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1) + (state & VFIO_DEVICE_STATE_V1_RESUMING ? \ + (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_V1_RESUMING : 1) #define VFIO_DEVICE_STATE_IS_ERROR(state) \ - ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING)) + ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING)) #define VFIO_DEVICE_STATE_SET_ERROR(state) \ - ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING) + ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING) __u32 reserved; __u64 pending_bytes; @@ -1002,6 +1002,154 @@ struct vfio_device_feature { */ #define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) +/* + * Indicates the device can support the migration API through + * VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. If this GET succeeds, the RUNNING and + * ERROR states are always supported. Support for additional states is + * indicated via the flags field; at least VFIO_MIGRATION_STOP_COPY must be + * set. + * + * VFIO_MIGRATION_STOP_COPY means that STOP, STOP_COPY and + * RESUMING are supported. + */ +struct vfio_device_feature_migration { + __aligned_u64 flags; +#define VFIO_MIGRATION_STOP_COPY (1 << 0) +}; +#define VFIO_DEVICE_FEATURE_MIGRATION 1 + +/* + * Upon VFIO_DEVICE_FEATURE_SET, execute a migration state change on the VFIO + * device. The new state is supplied in device_state, see enum + * vfio_device_mig_state for details + * + * The kernel migration driver must fully transition the device to the new state + * value before the operation returns to the user. + * + * The kernel migration driver must not generate asynchronous device state + * transitions outside of manipulation by the user or the VFIO_DEVICE_RESET + * ioctl as described above. + * + * If this function fails then current device_state may be the original + * operating state or some other state along the combination transition path. + * The user can then decide if it should execute a VFIO_DEVICE_RESET, attempt + * to return to the original state, or attempt to return to some other state + * such as RUNNING or STOP. + * + * If the new_state starts a new data transfer session then the FD associated + * with that session is returned in data_fd. The user is responsible to close + * this FD when it is finished. The user must consider the migration data stream + * carried over the FD to be opaque and must preserve the byte order of the + * stream. The user is not required to preserve buffer segmentation when writing + * the data stream during the RESUMING operation. + * + * Upon VFIO_DEVICE_FEATURE_GET, get the current migration state of the VFIO + * device, data_fd will be -1. + */ +struct vfio_device_feature_mig_state { + __u32 device_state; /* From enum vfio_device_mig_state */ + __s32 data_fd; +}; +#define VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE 2 + +/* + * The device migration Finite State Machine is described by the enum + * vfio_device_mig_state. Some of the FSM arcs will create a migration data + * transfer session by returning a FD, in this case the migration data will + * flow over the FD using read() and write() as discussed below. + * + * There are 5 states to support VFIO_MIGRATION_STOP_COPY: + * RUNNING - The device is running normally + * STOP - The device does not change the internal or external state + * STOP_COPY - The device internal state can be read out + * RESUMING - The device is stopped and is loading a new internal state + * ERROR - The device has failed and must be reset + * + * The FSM takes actions on the arcs between FSM states. The driver implements + * the following behavior for the FSM arcs: + * + * RUNNING -> STOP + * STOP_COPY -> STOP + * While in STOP the device must stop the operation of the device. The device + * must not generate interrupts, DMA, or any other change to external state. + * It must not change its internal state. When stopped the device and kernel + * migration driver must accept and respond to interaction to support external + * subsystems in the STOP state, for example PCI MSI-X and PCI config space. + * Failure by the user to restrict device access while in STOP must not result + * in error conditions outside the user context (ex. host system faults). + * + * The STOP_COPY arc will terminate a data transfer session. + * + * RESUMING -> STOP + * Leaving RESUMING terminates a data transfer session and indicates the + * device should complete processing of the data delivered by write(). The + * kernel migration driver should complete the incorporation of data written + * to the data transfer FD into the device internal state and perform + * final validity and consistency checking of the new device state. If the + * user provided data is found to be incomplete, inconsistent, or otherwise + * invalid, the migration driver must fail the SET_STATE ioctl and + * optionally go to the ERROR state as described below. + * + * While in STOP the device has the same behavior as other STOP states + * described above. + * + * To abort a RESUMING session the device must be reset. + * + * STOP -> RUNNING + * While in RUNNING the device is fully operational, the device may generate + * interrupts, DMA, respond to MMIO, all vfio device regions are functional, + * and the device may advance its internal state. + * + * STOP -> STOP_COPY + * This arc begin the process of saving the device state and will return a + * new data_fd. + * + * While in the STOP_COPY state the device has the same behavior as STOP + * with the addition that the data transfers session continues to stream the + * migration state. End of stream on the FD indicates the entire device + * state has been transferred. + * + * The user should take steps to restrict access to vfio device regions while + * the device is in STOP_COPY or risk corruption of the device migration data + * stream. + * + * STOP -> RESUMING + * Entering the RESUMING state starts a process of restoring the device state + * and will return a new data_fd. The data stream fed into the data_fd should + * be taken from the data transfer output of a single FD during saving from + * a compatible device. The migration driver may alter/reset the internal + * device state for this arc if required to prepare the device to receive the + * migration data. + * + * any -> ERROR + * ERROR cannot be specified as a device state, however any transition request + * can be failed with an errno return and may then move the device_state into + * ERROR. In this case the device was unable to execute the requested arc and + * was also unable to restore the device to any valid device_state. + * To recover from ERROR VFIO_DEVICE_RESET must be used to return the + * device_state back to RUNNING. + * + * The remaining possible transitions are interpreted as combinations of the + * above FSM arcs. As there are multiple paths through the FSM arcs the path + * should be selected based on the following rules: + * - Select the shortest path. + * Refer to vfio_mig_get_next_state() for the result of the algorithm. + * + * The automatic transit through the FSM arcs that make up the combination + * transition is invisible to the user. When working with combination arcs the + * user may see any step along the path in the device_state if SET_STATE + * fails. When handling these types of errors users should anticipate future + * revisions of this protocol using new states and those states becoming + * visible in this case. + */ +enum vfio_device_mig_state { + VFIO_DEVICE_STATE_ERROR = 0, + VFIO_DEVICE_STATE_STOP = 1, + VFIO_DEVICE_STATE_RUNNING = 2, + VFIO_DEVICE_STATE_STOP_COPY = 3, + VFIO_DEVICE_STATE_RESUMING = 4, +}; + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- cgit v1.2.3 From 8cb3d83b959be0631cd719b995c40c3cda21cd47 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Feb 2022 16:20:19 +0200 Subject: vfio: Extend the device migration protocol with RUNNING_P2P The RUNNING_P2P state is designed to support multiple devices in the same VM that are doing P2P transactions between themselves. When in RUNNING_P2P the device must be able to accept incoming P2P transactions but should not generate outgoing P2P transactions. As an optional extension to the mandatory states it is defined as in between STOP and RUNNING: STOP -> RUNNING_P2P -> RUNNING -> RUNNING_P2P -> STOP For drivers that are unable to support RUNNING_P2P the core code silently merges RUNNING_P2P and RUNNING together. Unless driver support is present, the new state cannot be used in SET_STATE. Drivers that support this will be required to implement 4 FSM arcs beyond the basic FSM. 2 of the basic FSM arcs become combination transitions. Compared to the v1 clarification, NDMA is redefined into FSM states and is described in terms of the desired P2P quiescent behavior, noting that halting all DMA is an acceptable implementation. Link: https://lore.kernel.org/all/20220224142024.147653-11-yishaih@nvidia.com Signed-off-by: Jason Gunthorpe Tested-by: Shameer Kolothum Reviewed-by: Kevin Tian Reviewed-by: Alex Williamson Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/vfio.h | 1 + include/uapi/linux/vfio.h | 36 ++++++++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index c44e80bbbd3b..66dda06ec42d 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -33,6 +33,7 @@ struct vfio_device { struct vfio_group *group; struct vfio_device_set *dev_set; struct list_head dev_set_list; + unsigned int migration_flags; /* Members below here are private, not for driver use */ refcount_t refcount; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 22ed358c04c5..26a66f68371d 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1011,10 +1011,16 @@ struct vfio_device_feature { * * VFIO_MIGRATION_STOP_COPY means that STOP, STOP_COPY and * RESUMING are supported. + * + * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P + * is supported in addition to the STOP_COPY states. + * + * Other combinations of flags have behavior to be defined in the future. */ struct vfio_device_feature_migration { __aligned_u64 flags; #define VFIO_MIGRATION_STOP_COPY (1 << 0) +#define VFIO_MIGRATION_P2P (1 << 1) }; #define VFIO_DEVICE_FEATURE_MIGRATION 1 @@ -1065,10 +1071,13 @@ struct vfio_device_feature_mig_state { * RESUMING - The device is stopped and is loading a new internal state * ERROR - The device has failed and must be reset * + * And 1 optional state to support VFIO_MIGRATION_P2P: + * RUNNING_P2P - RUNNING, except the device cannot do peer to peer DMA + * * The FSM takes actions on the arcs between FSM states. The driver implements * the following behavior for the FSM arcs: * - * RUNNING -> STOP + * RUNNING_P2P -> STOP * STOP_COPY -> STOP * While in STOP the device must stop the operation of the device. The device * must not generate interrupts, DMA, or any other change to external state. @@ -1095,11 +1104,16 @@ struct vfio_device_feature_mig_state { * * To abort a RESUMING session the device must be reset. * - * STOP -> RUNNING + * RUNNING_P2P -> RUNNING * While in RUNNING the device is fully operational, the device may generate * interrupts, DMA, respond to MMIO, all vfio device regions are functional, * and the device may advance its internal state. * + * RUNNING -> RUNNING_P2P + * STOP -> RUNNING_P2P + * While in RUNNING_P2P the device is partially running in the P2P quiescent + * state defined below. + * * STOP -> STOP_COPY * This arc begin the process of saving the device state and will return a * new data_fd. @@ -1129,6 +1143,18 @@ struct vfio_device_feature_mig_state { * To recover from ERROR VFIO_DEVICE_RESET must be used to return the * device_state back to RUNNING. * + * The optional peer to peer (P2P) quiescent state is intended to be a quiescent + * state for the device for the purposes of managing multiple devices within a + * user context where peer-to-peer DMA between devices may be active. The + * RUNNING_P2P states must prevent the device from initiating + * any new P2P DMA transactions. If the device can identify P2P transactions + * then it can stop only P2P DMA, otherwise it must stop all DMA. The migration + * driver must complete any such outstanding operations prior to completing the + * FSM arc into a P2P state. For the purpose of specification the states + * behave as though the device was fully running if not supported. Like while in + * STOP or STOP_COPY the user must not touch the device, otherwise the state + * can be exited. + * * The remaining possible transitions are interpreted as combinations of the * above FSM arcs. As there are multiple paths through the FSM arcs the path * should be selected based on the following rules: @@ -1141,6 +1167,11 @@ struct vfio_device_feature_mig_state { * fails. When handling these types of errors users should anticipate future * revisions of this protocol using new states and those states becoming * visible in this case. + * + * The optional states cannot be used with SET_STATE if the device does not + * support them. The user can discover if these states are supported by using + * VFIO_DEVICE_FEATURE_MIGRATION. By using combination transitions the user can + * avoid knowing about these optional states if the kernel driver supports them. */ enum vfio_device_mig_state { VFIO_DEVICE_STATE_ERROR = 0, @@ -1148,6 +1179,7 @@ enum vfio_device_mig_state { VFIO_DEVICE_STATE_RUNNING = 2, VFIO_DEVICE_STATE_STOP_COPY = 3, VFIO_DEVICE_STATE_RESUMING = 4, + VFIO_DEVICE_STATE_RUNNING_P2P = 5, }; /* -------- API for Type1 VFIO IOMMU -------- */ -- cgit v1.2.3 From 0f3f9cd7f752f6e685e378620babc5e34af6fb9f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Feb 2022 16:20:20 +0200 Subject: vfio: Remove migration protocol v1 documentation v1 was never implemented and is replaced by v2. The old uAPI documentation is removed from the header file. The old uAPI definitions are still kept in the header file to ease transition for userspace copying these headers. They will be fully removed down the road. Link: https://lore.kernel.org/all/20220224142024.147653-12-yishaih@nvidia.com Signed-off-by: Jason Gunthorpe Tested-by: Shameer Kolothum Reviewed-by: Alex Williamson Reviewed-by: Cornelia Huck Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/uapi/linux/vfio.h | 200 +--------------------------------------------- 1 file changed, 2 insertions(+), 198 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 26a66f68371d..fea86061b44e 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -323,7 +323,7 @@ struct vfio_region_info_cap_type { #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) #define VFIO_REGION_TYPE_GFX (1) #define VFIO_REGION_TYPE_CCW (2) -#define VFIO_REGION_TYPE_MIGRATION (3) +#define VFIO_REGION_TYPE_MIGRATION_DEPRECATED (3) /* sub-types for VFIO_REGION_TYPE_PCI_* */ @@ -405,203 +405,7 @@ struct vfio_region_gfx_edid { #define VFIO_REGION_SUBTYPE_CCW_CRW (3) /* sub-types for VFIO_REGION_TYPE_MIGRATION */ -#define VFIO_REGION_SUBTYPE_MIGRATION (1) - -/* - * The structure vfio_device_migration_info is placed at the 0th offset of - * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related - * migration information. Field accesses from this structure are only supported - * at their native width and alignment. Otherwise, the result is undefined and - * vendor drivers should return an error. - * - * device_state: (read/write) - * - The user application writes to this field to inform the vendor driver - * about the device state to be transitioned to. - * - The vendor driver should take the necessary actions to change the - * device state. After successful transition to a given state, the - * vendor driver should return success on write(device_state, state) - * system call. If the device state transition fails, the vendor driver - * should return an appropriate -errno for the fault condition. - * - On the user application side, if the device state transition fails, - * that is, if write(device_state, state) returns an error, read - * device_state again to determine the current state of the device from - * the vendor driver. - * - The vendor driver should return previous state of the device unless - * the vendor driver has encountered an internal error, in which case - * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR. - * - The user application must use the device reset ioctl to recover the - * device from VFIO_DEVICE_STATE_ERROR state. If the device is - * indicated to be in a valid device state by reading device_state, the - * user application may attempt to transition the device to any valid - * state reachable from the current state or terminate itself. - * - * device_state consists of 3 bits: - * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear, - * it indicates the _STOP state. When the device state is changed to - * _STOP, driver should stop the device before write() returns. - * - If bit 1 is set, it indicates the _SAVING state, which means that the - * driver should start gathering device state information that will be - * provided to the VFIO user application to save the device's state. - * - If bit 2 is set, it indicates the _RESUMING state, which means that - * the driver should prepare to resume the device. Data provided through - * the migration region should be used to resume the device. - * Bits 3 - 31 are reserved for future use. To preserve them, the user - * application should perform a read-modify-write operation on this - * field when modifying the specified bits. - * - * +------- _RESUMING - * |+------ _SAVING - * ||+----- _RUNNING - * ||| - * 000b => Device Stopped, not saving or resuming - * 001b => Device running, which is the default state - * 010b => Stop the device & save the device state, stop-and-copy state - * 011b => Device running and save the device state, pre-copy state - * 100b => Device stopped and the device state is resuming - * 101b => Invalid state - * 110b => Error state - * 111b => Invalid state - * - * State transitions: - * - * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP - * (100b) (001b) (011b) (010b) (000b) - * 0. Running or default state - * | - * - * 1. Normal Shutdown (optional) - * |------------------------------------->| - * - * 2. Save the state or suspend - * |------------------------->|---------->| - * - * 3. Save the state during live migration - * |----------->|------------>|---------->| - * - * 4. Resuming - * |<---------| - * - * 5. Resumed - * |--------->| - * - * 0. Default state of VFIO device is _RUNNING when the user application starts. - * 1. During normal shutdown of the user application, the user application may - * optionally change the VFIO device state from _RUNNING to _STOP. This - * transition is optional. The vendor driver must support this transition but - * must not require it. - * 2. When the user application saves state or suspends the application, the - * device state transitions from _RUNNING to stop-and-copy and then to _STOP. - * On state transition from _RUNNING to stop-and-copy, driver must stop the - * device, save the device state and send it to the application through the - * migration region. The sequence to be followed for such transition is given - * below. - * 3. In live migration of user application, the state transitions from _RUNNING - * to pre-copy, to stop-and-copy, and to _STOP. - * On state transition from _RUNNING to pre-copy, the driver should start - * gathering the device state while the application is still running and send - * the device state data to application through the migration region. - * On state transition from pre-copy to stop-and-copy, the driver must stop - * the device, save the device state and send it to the user application - * through the migration region. - * Vendor drivers must support the pre-copy state even for implementations - * where no data is provided to the user before the stop-and-copy state. The - * user must not be required to consume all migration data before the device - * transitions to a new state, including the stop-and-copy state. - * The sequence to be followed for above two transitions is given below. - * 4. To start the resuming phase, the device state should be transitioned from - * the _RUNNING to the _RESUMING state. - * In the _RESUMING state, the driver should use the device state data - * received through the migration region to resume the device. - * 5. After providing saved device data to the driver, the application should - * change the state from _RESUMING to _RUNNING. - * - * reserved: - * Reads on this field return zero and writes are ignored. - * - * pending_bytes: (read only) - * The number of pending bytes still to be migrated from the vendor driver. - * - * data_offset: (read only) - * The user application should read data_offset field from the migration - * region. The user application should read the device data from this - * offset within the migration region during the _SAVING state or write - * the device data during the _RESUMING state. See below for details of - * sequence to be followed. - * - * data_size: (read/write) - * The user application should read data_size to get the size in bytes of - * the data copied in the migration region during the _SAVING state and - * write the size in bytes of the data copied in the migration region - * during the _RESUMING state. - * - * The format of the migration region is as follows: - * ------------------------------------------------------------------ - * |vfio_device_migration_info| data section | - * | | /////////////////////////////// | - * ------------------------------------------------------------------ - * ^ ^ - * offset 0-trapped part data_offset - * - * The structure vfio_device_migration_info is always followed by the data - * section in the region, so data_offset will always be nonzero. The offset - * from where the data is copied is decided by the kernel driver. The data - * section can be trapped, mmapped, or partitioned, depending on how the kernel - * driver defines the data section. The data section partition can be defined - * as mapped by the sparse mmap capability. If mmapped, data_offset must be - * page aligned, whereas initial section which contains the - * vfio_device_migration_info structure, might not end at the offset, which is - * page aligned. The user is not required to access through mmap regardless - * of the capabilities of the region mmap. - * The vendor driver should determine whether and how to partition the data - * section. The vendor driver should return data_offset accordingly. - * - * The sequence to be followed while in pre-copy state and stop-and-copy state - * is as follows: - * a. Read pending_bytes, indicating the start of a new iteration to get device - * data. Repeated read on pending_bytes at this stage should have no side - * effects. - * If pending_bytes == 0, the user application should not iterate to get data - * for that device. - * If pending_bytes > 0, perform the following steps. - * b. Read data_offset, indicating that the vendor driver should make data - * available through the data section. The vendor driver should return this - * read operation only after data is available from (region + data_offset) - * to (region + data_offset + data_size). - * c. Read data_size, which is the amount of data in bytes available through - * the migration region. - * Read on data_offset and data_size should return the offset and size of - * the current buffer if the user application reads data_offset and - * data_size more than once here. - * d. Read data_size bytes of data from (region + data_offset) from the - * migration region. - * e. Process the data. - * f. Read pending_bytes, which indicates that the data from the previous - * iteration has been read. If pending_bytes > 0, go to step b. - * - * The user application can transition from the _SAVING|_RUNNING - * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the - * number of pending bytes. The user application should iterate in _SAVING - * (stop-and-copy) until pending_bytes is 0. - * - * The sequence to be followed while _RESUMING device state is as follows: - * While data for this device is available, repeat the following steps: - * a. Read data_offset from where the user application should write data. - * b. Write migration data starting at the migration region + data_offset for - * the length determined by data_size from the migration source. - * c. Write data_size, which indicates to the vendor driver that data is - * written in the migration region. Vendor driver must return this write - * operations on consuming data. Vendor driver should apply the - * user-provided migration region data to the device resume state. - * - * If an error occurs during the above sequences, the vendor driver can return - * an error code for next read() or write() operation, which will terminate the - * loop. The user application should then take the next necessary action, for - * example, failing migration or terminating the user application. - * - * For the user application, data is opaque. The user application should write - * data in the same order as the data is received and the data should be of - * same transaction size at the source. - */ +#define VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED (1) struct vfio_device_migration_info { __u32 device_state; /* VFIO device state */ -- cgit v1.2.3 From 915076f70efad04bf6fc843970a3c7763487011a Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 24 Feb 2022 16:20:23 +0200 Subject: vfio/pci: Expose vfio_pci_core_aer_err_detected() Expose vfio_pci_core_aer_err_detected() to be used by drivers as part of their pci_error_handlers structure. Next patch for mlx5 driver will use it. Link: https://lore.kernel.org/all/20220224142024.147653-15-yishaih@nvidia.com Reviewed-by: Alex Williamson Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/vfio_pci_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index beba0b2ed87d..9f1bf8e49d43 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -232,6 +232,8 @@ int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); +pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, + pci_channel_state_t state); static inline bool vfio_pci_is_vga(struct pci_dev *pdev) { -- cgit v1.2.3 From 3f8bab174cb26aa5a8053c4457cc733881e3ad88 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 3 Mar 2022 09:08:31 +0100 Subject: serial: make uart_console_write->putchar()'s character an unsigned char MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, uart_console_write->putchar's second parameter (the character) is of type int. It makes little sense, provided uart_console_write() accepts the input string as "const char *s" and passes its content -- the characters -- to putchar(). So switch the character's type to unsigned char. We don't use char as that is signed on some platforms. That would cause troubles for drivers which (implicitly) cast the char to u16 when writing to the device. Sign extension would happen in that case and the value written would be completely different to the provided char. DZ is an example of such a driver -- on MIPS, it uses u16 for dz_out in dz_console_putchar(). Note we do the char -> uchar conversion implicitly in uart_console_write(). Provided we do not change size of the data type, sign extension does not happen there, so the problem is void. This makes the types consistent and unified with the rest of the uart layer, which uses unsigned char in most places already. One exception is xmit_buf, but that is going to be converted later. Cc: Paul Cercueil Cc: Tobias Klauser Cc: Russell King Cc: Vineet Gupta Cc: Nicolas Ferre Cc: Alexandre Belloni Cc: Ludovic Desroches Cc: Florian Fainelli Cc: bcm-kernel-feedback-list@broadcom.com Cc: Alexander Shiyan Cc: Baruch Siach Cc: "Maciej W. Rozycki" Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Cc: Karol Gugala Cc: Mateusz Holenko Cc: Vladimir Zapolskiy Cc: Neil Armstrong Cc: Kevin Hilman Cc: Jerome Brunet Cc: Martin Blumenstingl Cc: Taichi Sugaya Cc: Takao Orito Cc: Liviu Dudau Cc: Sudeep Holla Cc: Lorenzo Pieralisi Cc: "Andreas Färber" Cc: Manivannan Sadhasivam Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Andy Gross Cc: Bjorn Andersson Cc: Krzysztof Kozlowski Cc: Orson Zhai Cc: Baolin Wang Cc: Chunyan Zhang Cc: Patrice Chotard Cc: Maxime Coquelin Cc: Alexandre Torgue Cc: "David S. Miller" Cc: Peter Korsgaard Cc: Michal Simek Acked-by: Richard Genoud [atmel_serial] Acked-by: Uwe Kleine-König Acked-by: Paul Cercueil Acked-by: Neil Armstrong # meson_serial Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220303080831.21783-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 31f7fe527395..14ae35f68abb 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -399,7 +399,7 @@ int uart_set_options(struct uart_port *port, struct console *co, int baud, struct tty_driver *uart_console_device(struct console *co, int *index); void uart_console_write(struct uart_port *port, const char *s, unsigned int count, - void (*putchar)(struct uart_port *, int)); + void (*putchar)(struct uart_port *, unsigned char)); /* * Port/driver registration/removal -- cgit v1.2.3 From f9cef64fa23f6c1ff177be5082113c5a94e34e5d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 2 Mar 2022 21:14:13 +0200 Subject: net: dsa: felix: migrate host FDB and MDB entries when changing tag proto The "ocelot" and "ocelot-8021q" tagging protocols make use of different hardware resources, and host FDB entries have different destination ports in the switch analyzer module, practically speaking. So when the user requests a tagging protocol change, the driver must migrate all host FDB and MDB entries from the NPI port (in fact CPU port module) towards the same physical port, but this time used as a regular port. It is pointless for the felix driver to keep a copy of the host addresses, when we can create and export DSA helpers for walking through the addresses that it already needs to keep on the CPU port, for refcounting purposes. felix_classify_db() is moved up to avoid a forward declaration. We pass "bool change" because dp->fdbs and dp->mdbs are uninitialized lists when felix_setup() first calls felix_set_tag_protocol(), so we need to avoid calling dsa_port_walk_fdbs() during probe time. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index cfedcfb86350..71cc363dbbd4 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1219,6 +1219,13 @@ struct dsa_switch_driver { struct net_device *dsa_dev_to_net_device(struct device *dev); +typedef int dsa_fdb_walk_cb_t(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db); + +int dsa_port_walk_fdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb); +int dsa_port_walk_mdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb); + /* Keep inline for faster access in hot path */ static inline bool netdev_uses_dsa(const struct net_device *dev) { -- cgit v1.2.3 From a1ac9c8acec1605c6b43af418f79facafdced680 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 2 Mar 2022 11:55:25 -0800 Subject: net: Add skb->mono_delivery_time to distinguish mono delivery_time from (rcv) timestamp skb->tstamp was first used as the (rcv) timestamp. The major usage is to report it to the user (e.g. SO_TIMESTAMP). Later, skb->tstamp is also set as the (future) delivery_time (e.g. EDT in TCP) during egress and used by the qdisc (e.g. sch_fq) to make decision on when the skb can be passed to the dev. Currently, there is no way to tell skb->tstamp having the (rcv) timestamp or the delivery_time, so it is always reset to 0 whenever forwarded between egress and ingress. While it makes sense to always clear the (rcv) timestamp in skb->tstamp to avoid confusing sch_fq that expects the delivery_time, it is a performance issue [0] to clear the delivery_time if the skb finally egress to a fq@phy-dev. For example, when forwarding from egress to ingress and then finally back to egress: tcp-sender => veth@netns => veth@hostns => fq@eth0@hostns ^ ^ reset rest This patch adds one bit skb->mono_delivery_time to flag the skb->tstamp is storing the mono delivery_time (EDT) instead of the (rcv) timestamp. The current use case is to keep the TCP mono delivery_time (EDT) and to be used with sch_fq. A latter patch will also allow tc-bpf@ingress to read and change the mono delivery_time. In the future, another bit (e.g. skb->user_delivery_time) can be added for the SCM_TXTIME where the clock base is tracked by sk->sk_clockid. [ This patch is a prep work. The following patches will get the other parts of the stack ready first. Then another patch after that will finally set the skb->mono_delivery_time. ] skb_set_delivery_time() function is added. It is used by the tcp_output.c and during ip[6] fragmentation to assign the delivery_time to the skb->tstamp and also set the skb->mono_delivery_time. A note on the change in ip_send_unicast_reply() in ip_output.c. It is only used by TCP to send reset/ack out of a ctl_sk. Like the new skb_set_delivery_time(), this patch sets the skb->mono_delivery_time to 0 for now as a place holder. It will be enabled in a latter patch. A similar case in tcp_ipv6 can be done with skb_set_delivery_time() in tcp_v6_send_response(). [0] (slide 22): https://linuxplumbersconf.org/event/11/contributions/953/attachments/867/1658/LPC_2021_BPF_Datapath_Extensions.pdf Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/skbuff.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d67941f78b92..803ffa63dea6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -795,6 +795,10 @@ typedef unsigned char *sk_buff_data_t; * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required + * @mono_delivery_time: When set, skb->tstamp has the + * delivery_time in mono clock base (i.e. EDT). Otherwise, the + * skb->tstamp has the (rcv) timestamp at ingress and + * delivery_time at egress. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @secmark: security marking @@ -965,6 +969,7 @@ struct sk_buff { __u8 decrypted:1; #endif __u8 slow_gro:1; + __u8 mono_delivery_time:1; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -3983,6 +3988,14 @@ static inline ktime_t net_timedelta(ktime_t t) return ktime_sub(ktime_get_real(), t); } +static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, + bool mono) +{ + skb->tstamp = kt; + /* Setting mono_delivery_time will be enabled later */ + skb->mono_delivery_time = 0; +} + static inline u8 skb_metadata_len(const struct sk_buff *skb) { return skb_shinfo(skb)->meta_len; -- cgit v1.2.3 From de799101519aad23c6096041ba2744d7b5517e6a Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 2 Mar 2022 11:55:31 -0800 Subject: net: Add skb_clear_tstamp() to keep the mono delivery_time Right now, skb->tstamp is reset to 0 whenever the skb is forwarded. If skb->tstamp has the mono delivery_time, clearing it can hurt the performance when it finally transmits out to fq@phy-dev. The earlier patch added a skb->mono_delivery_time bit to flag the skb->tstamp carrying the mono delivery_time. This patch adds skb_clear_tstamp() helper which keeps the mono delivery_time and clears everything else. The delivery_time clearing will be postponed until the stack knows the skb will be delivered locally. It will be done in a latter patch. Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 803ffa63dea6..27a28920e7b3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3996,6 +3996,14 @@ static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, skb->mono_delivery_time = 0; } +static inline void skb_clear_tstamp(struct sk_buff *skb) +{ + if (skb->mono_delivery_time) + return; + + skb->tstamp = 0; +} + static inline u8 skb_metadata_len(const struct sk_buff *skb) { return skb_shinfo(skb)->meta_len; @@ -4852,7 +4860,7 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) #ifdef CONFIG_NET_REDIRECT skb->from_ingress = from_ingress; if (skb->from_ingress) - skb->tstamp = 0; + skb_clear_tstamp(skb); #endif } -- cgit v1.2.3 From 27942a15209f564ed8ee2a9e126cb7b105181355 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 2 Mar 2022 11:55:38 -0800 Subject: net: Handle delivery_time in skb->tstamp during network tapping with af_packet A latter patch will set the skb->mono_delivery_time to flag the skb->tstamp is used as the mono delivery_time (EDT) instead of the (rcv) timestamp. skb_clear_tstamp() will then keep this delivery_time during forwarding. This patch is to make the network tapping (with af_packet) to handle the delivery_time stored in skb->tstamp. Regardless of tapping at the ingress or egress, the tapped skb is received by the af_packet socket, so it is ingress to the af_packet socket and it expects the (rcv) timestamp. When tapping at egress, dev_queue_xmit_nit() is used. It has already expected skb->tstamp may have delivery_time, so it does skb_clone()+net_timestamp_set() to ensure the cloned skb has the (rcv) timestamp before passing to the af_packet sk. This patch only adds to clear the skb->mono_delivery_time bit in net_timestamp_set(). When tapping at ingress, it currently expects the skb->tstamp is either 0 or the (rcv) timestamp. Meaning, the tapping at ingress path has already expected the skb->tstamp could be 0 and it will get the (rcv) timestamp by ktime_get_real() when needed. There are two cases for tapping at ingress: One case is af_packet queues the skb to its sk_receive_queue. The skb is either not shared or new clone created. The newly added skb_clear_delivery_time() is called to clear the delivery_time (if any) and set the (rcv) timestamp if needed before the skb is queued to the sk_receive_queue. Another case, the ingress skb is directly copied to the rx_ring and tpacket_get_timestamp() is used to get the (rcv) timestamp. The newly added skb_tstamp() is used in tpacket_get_timestamp() to check the skb->mono_delivery_time bit before returning skb->tstamp. As mentioned earlier, the tapping@ingress has already expected the skb may not have the (rcv) timestamp (because no sk has asked for it) and has handled this case by directly calling ktime_get_real(). Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/skbuff.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 27a28920e7b3..7e2d796ece80 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3996,6 +3996,22 @@ static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, skb->mono_delivery_time = 0; } +DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); + +/* It is used in the ingress path to clear the delivery_time. + * If needed, set the skb->tstamp to the (rcv) timestamp. + */ +static inline void skb_clear_delivery_time(struct sk_buff *skb) +{ + if (skb->mono_delivery_time) { + skb->mono_delivery_time = 0; + if (static_branch_unlikely(&netstamp_needed_key)) + skb->tstamp = ktime_get_real(); + else + skb->tstamp = 0; + } +} + static inline void skb_clear_tstamp(struct sk_buff *skb) { if (skb->mono_delivery_time) @@ -4004,6 +4020,14 @@ static inline void skb_clear_tstamp(struct sk_buff *skb) skb->tstamp = 0; } +static inline ktime_t skb_tstamp(const struct sk_buff *skb) +{ + if (skb->mono_delivery_time) + return 0; + + return skb->tstamp; +} + static inline u8 skb_metadata_len(const struct sk_buff *skb) { return skb_shinfo(skb)->meta_len; -- cgit v1.2.3 From d93376f503c7a586707925957592c0f16f4db0b1 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 2 Mar 2022 11:55:44 -0800 Subject: net: Clear mono_delivery_time bit in __skb_tstamp_tx() In __skb_tstamp_tx(), it may clone the egress skb and queues the clone to the sk_error_queue. The outgoing skb may have the mono delivery_time while the (rcv) timestamp is expected for the clone, so the skb->mono_delivery_time bit needs to be cleared from the clone. This patch adds the skb->mono_delivery_time clearing to the existing __net_timestamp() and use it in __skb_tstamp_tx(). The __net_timestamp() fast path usage in dev.c is changed to directly call ktime_get_real() since the mono_delivery_time bit is not set at that point. Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7e2d796ece80..8e8a4af4f9e2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3981,6 +3981,7 @@ static inline void skb_get_new_timestampns(const struct sk_buff *skb, static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); + skb->mono_delivery_time = 0; } static inline ktime_t net_timedelta(ktime_t t) -- cgit v1.2.3 From d98d58a002619b5c165f1eedcd731e2fe2c19088 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 2 Mar 2022 11:55:50 -0800 Subject: net: Set skb->mono_delivery_time and clear it after sch_handle_ingress() The previous patches handled the delivery_time before sch_handle_ingress(). This patch can now set the skb->mono_delivery_time to flag the skb->tstamp is used as the mono delivery_time (EDT) instead of the (rcv) timestamp and also clear it with skb_clear_delivery_time() after sch_handle_ingress(). This will make the bpf_redirect_*() to keep the mono delivery_time and used by a qdisc (fq) of the egress-ing interface. A latter patch will postpone the skb_clear_delivery_time() until the stack learns that the skb is being delivered locally and that will make other kernel forwarding paths (ip[6]_forward) able to keep the delivery_time also. Thus, like the previous patches on using the skb->mono_delivery_time bit, calling skb_clear_delivery_time() is not limited within the CONFIG_NET_INGRESS to avoid too many code churns among this set. Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8e8a4af4f9e2..0f5fd53059cd 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3993,8 +3993,7 @@ static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, bool mono) { skb->tstamp = kt; - /* Setting mono_delivery_time will be enabled later */ - skb->mono_delivery_time = 0; + skb->mono_delivery_time = kt && mono; } DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); -- cgit v1.2.3 From 8672406eb5d77333ca14e9612e3166704b367c40 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 2 Mar 2022 11:55:57 -0800 Subject: net: ip: Handle delivery_time in ip defrag A latter patch will postpone the delivery_time clearing until the stack knows the skb is being delivered locally. That will allow other kernel forwarding path (e.g. ip[6]_forward) to keep the delivery_time also. An earlier attempt was to do skb_clear_delivery_time() in ip_local_deliver() and ip6_input(). The discussion [0] requested to move it one step later into ip_local_deliver_finish() and ip6_input_finish() so that the delivery_time can be kept for the ip_vs forwarding path also. To do that, this patch also needs to take care of the (rcv) timestamp usecase in ip_is_fragment(). It needs to expect delivery_time in the skb->tstamp, so it needs to save the mono_delivery_time bit in inet_frag_queue such that the delivery_time (if any) can be restored in the final defragmented skb. [Note that it will only happen when the locally generated skb is looping from egress to ingress over a virtual interface (e.g. veth, loopback...), skb->tstamp may have the delivery time before it is known that it will be delivered locally and received by another sk.] [0]: https://lore.kernel.org/netdev/ca728d81-80e8-3767-d5e-d44f6ad96e43@ssi.bg/ Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 63540be0fc34..911ad930867d 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -70,6 +70,7 @@ struct frag_v6_compare_key { * @stamp: timestamp of the last received fragment * @len: total length of the original datagram * @meat: length of received fragments so far + * @mono_delivery_time: stamp has a mono delivery time (EDT) * @flags: fragment queue flags * @max_size: maximum received fragment size * @fqdir: pointer to struct fqdir @@ -90,6 +91,7 @@ struct inet_frag_queue { ktime_t stamp; int len; int meat; + u8 mono_delivery_time; __u8 flags; u16 max_size; struct fqdir *fqdir; -- cgit v1.2.3 From b6561f8491ca899e5a08311796085c9738d631ae Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 2 Mar 2022 11:56:09 -0800 Subject: net: ipv6: Get rcv timestamp if needed when handling hop-by-hop IOAM option IOAM is a hop-by-hop option with a temporary iana allocation (49). Since it is hop-by-hop, it is done before the input routing decision. One of the traced data field is the (rcv) timestamp. When the locally generated skb is looping from egress to ingress over a virtual interface (e.g. veth, loopback...), skb->tstamp may have the delivery time before it is known that it will be delivered locally and received by another sk. Like handling the network tapping (tcpdump) in the earlier patch, this patch gets the timestamp if needed without over-writing the delivery_time in the skb->tstamp. skb_tstamp_cond() is added to do the ktime_get_real() with an extra cond arg to check on top of the netstamp_needed_key static key. skb_tstamp_cond() will also be used in a latter patch and it needs the netstamp_needed_key check. Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/skbuff.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0f5fd53059cd..4b5b926a81f2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4028,6 +4028,17 @@ static inline ktime_t skb_tstamp(const struct sk_buff *skb) return skb->tstamp; } +static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond) +{ + if (!skb->mono_delivery_time && skb->tstamp) + return skb->tstamp; + + if (static_branch_unlikely(&netstamp_needed_key) || cond) + return ktime_get_real(); + + return 0; +} + static inline u8 skb_metadata_len(const struct sk_buff *skb) { return skb_shinfo(skb)->meta_len; -- cgit v1.2.3 From 7449197d600d30d038b1ce6285ab4747096eac7f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 2 Mar 2022 11:56:28 -0800 Subject: bpf: Keep the (rcv) timestamp behavior for the existing tc-bpf@ingress The current tc-bpf@ingress reads and writes the __sk_buff->tstamp as a (rcv) timestamp which currently could either be 0 (not available) or ktime_get_real(). This patch is to backward compatible with the (rcv) timestamp expectation at ingress. If the skb->tstamp has the delivery_time, the bpf insn rewrite will read 0 for tc-bpf running at ingress as it is not available. When writing at ingress, it will also clear the skb->mono_delivery_time bit. /* BPF_READ: a = __sk_buff->tstamp */ if (!skb->tc_at_ingress || !skb->mono_delivery_time) a = skb->tstamp; else a = 0 /* BPF_WRITE: __sk_buff->tstamp = a */ if (skb->tc_at_ingress) skb->mono_delivery_time = 0; skb->tstamp = a; [ A note on the BPF_CGROUP_INET_INGRESS which can also access skb->tstamp. At that point, the skb is delivered locally and skb_clear_delivery_time() has already been done, so the skb->tstamp will only have the (rcv) timestamp. ] If the tc-bpf@egress writes 0 to skb->tstamp, the skb->mono_delivery_time has to be cleared also. It could be done together during convert_ctx_access(). However, the latter patch will also expose the skb->mono_delivery_time bit as __sk_buff->delivery_time_type. Changing the delivery_time_type in the background may surprise the user, e.g. the 2nd read on __sk_buff->delivery_time_type may need a READ_ONCE() to avoid compiler optimization. Thus, in expecting the needs in the latter patch, this patch does a check on !skb->tstamp after running the tc-bpf and clears the skb->mono_delivery_time bit if needed. The earlier discussion on v4 [0]. The bpf insn rewrite requires the skb's mono_delivery_time bit and tc_at_ingress bit. They are moved up in sk_buff so that bpf rewrite can be done at a fixed offset. tc_skip_classify is moved together with tc_at_ingress. To get one bit for mono_delivery_time, csum_not_inet is moved down and this bit is currently used by sctp. [0]: https://lore.kernel.org/bpf/20220217015043.khqwqklx45c4m4se@kafai-mbp.dhcp.thefacebook.com/ Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/skbuff.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4b5b926a81f2..5445860e1ba6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -941,8 +941,12 @@ struct sk_buff { __u8 vlan_present:1; /* See PKT_VLAN_PRESENT_BIT */ __u8 csum_complete_sw:1; __u8 csum_level:2; - __u8 csum_not_inet:1; __u8 dst_pending_confirm:1; + __u8 mono_delivery_time:1; +#ifdef CONFIG_NET_CLS_ACT + __u8 tc_skip_classify:1; + __u8 tc_at_ingress:1; +#endif #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif @@ -953,10 +957,6 @@ struct sk_buff { #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; __u8 offload_l3_fwd_mark:1; -#endif -#ifdef CONFIG_NET_CLS_ACT - __u8 tc_skip_classify:1; - __u8 tc_at_ingress:1; #endif __u8 redirected:1; #ifdef CONFIG_NET_REDIRECT @@ -969,7 +969,7 @@ struct sk_buff { __u8 decrypted:1; #endif __u8 slow_gro:1; - __u8 mono_delivery_time:1; + __u8 csum_not_inet:1; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -1047,10 +1047,16 @@ struct sk_buff { /* if you move pkt_vlan_present around you also must adapt these constants */ #ifdef __BIG_ENDIAN_BITFIELD #define PKT_VLAN_PRESENT_BIT 7 +#define TC_AT_INGRESS_MASK (1 << 0) +#define SKB_MONO_DELIVERY_TIME_MASK (1 << 2) #else #define PKT_VLAN_PRESENT_BIT 0 +#define TC_AT_INGRESS_MASK (1 << 7) +#define SKB_MONO_DELIVERY_TIME_MASK (1 << 5) #endif #define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) +#define TC_AT_INGRESS_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) +#define SKB_MONO_DELIVERY_TIME_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) #ifdef __KERNEL__ /* -- cgit v1.2.3 From 8d21ec0e46ed6e39994accff8eb4f2be3d2e76b5 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 2 Mar 2022 11:56:34 -0800 Subject: bpf: Add __sk_buff->delivery_time_type and bpf_skb_set_skb_delivery_time() * __sk_buff->delivery_time_type: This patch adds __sk_buff->delivery_time_type. It tells if the delivery_time is stored in __sk_buff->tstamp or not. It will be most useful for ingress to tell if the __sk_buff->tstamp has the (rcv) timestamp or delivery_time. If delivery_time_type is 0 (BPF_SKB_DELIVERY_TIME_NONE), it has the (rcv) timestamp. Two non-zero types are defined for the delivery_time_type, BPF_SKB_DELIVERY_TIME_MONO and BPF_SKB_DELIVERY_TIME_UNSPEC. For UNSPEC, it can only happen in egress because only mono delivery_time can be forwarded to ingress now. The clock of UNSPEC delivery_time can be deduced from the skb->sk->sk_clockid which is how the sch_etf doing it also. * Provide forwarded delivery_time to tc-bpf@ingress: With the help of the new delivery_time_type, the tc-bpf has a way to tell if the __sk_buff->tstamp has the (rcv) timestamp or the delivery_time. During bpf load time, the verifier will learn if the bpf prog has accessed the new __sk_buff->delivery_time_type. If it does, it means the tc-bpf@ingress is expecting the skb->tstamp could have the delivery_time. The kernel will then read the skb->tstamp as-is during bpf insn rewrite without checking the skb->mono_delivery_time. This is done by adding a new prog->delivery_time_access bit. The same goes for writing skb->tstamp. * bpf_skb_set_delivery_time(): The bpf_skb_set_delivery_time() helper is added to allow setting both delivery_time and the delivery_time_type at the same time. If the tc-bpf does not need to change the delivery_time_type, it can directly write to the __sk_buff->tstamp as the existing tc-bpf has already been doing. It will be most useful at ingress to change the __sk_buff->tstamp from the (rcv) timestamp to a mono delivery_time and then bpf_redirect_*(). bpf only has mono clock helper (bpf_ktime_get_ns), and the current known use case is the mono EDT for fq, and only mono delivery time can be kept during forward now, so bpf_skb_set_delivery_time() only supports setting BPF_SKB_DELIVERY_TIME_MONO. It can be extended later when use cases come up and the forwarding path also supports other clock bases. Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- include/uapi/linux/bpf.h | 41 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1cb1af917617..9bf26307247f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -572,7 +572,8 @@ struct bpf_prog { has_callchain_buf:1, /* callchain buffer allocated? */ enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ - call_get_func_ip:1; /* Do we call get_func_ip() */ + call_get_func_ip:1, /* Do we call get_func_ip() */ + delivery_time_access:1; /* Accessed __sk_buff->delivery_time_type */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index afe3d0d7f5f2..4eebea830613 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5086,6 +5086,37 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. On error * *dst* buffer is zeroed out. + * + * long bpf_skb_set_delivery_time(struct sk_buff *skb, u64 dtime, u32 dtime_type) + * Description + * Set a *dtime* (delivery time) to the __sk_buff->tstamp and also + * change the __sk_buff->delivery_time_type to *dtime_type*. + * + * When setting a delivery time (non zero *dtime*) to + * __sk_buff->tstamp, only BPF_SKB_DELIVERY_TIME_MONO *dtime_type* + * is supported. It is the only delivery_time_type that will be + * kept after bpf_redirect_*(). + * + * If there is no need to change the __sk_buff->delivery_time_type, + * the delivery time can be directly written to __sk_buff->tstamp + * instead. + * + * *dtime* 0 and *dtime_type* BPF_SKB_DELIVERY_TIME_NONE + * can be used to clear any delivery time stored in + * __sk_buff->tstamp. + * + * Only IPv4 and IPv6 skb->protocol are supported. + * + * This function is most useful when it needs to set a + * mono delivery time to __sk_buff->tstamp and then + * bpf_redirect_*() to the egress of an iface. For example, + * changing the (rcv) timestamp in __sk_buff->tstamp at + * ingress to a mono delivery time and then bpf_redirect_*() + * to sch_fq@phy-dev. + * Return + * 0 on success. + * **-EINVAL** for invalid input + * **-EOPNOTSUPP** for unsupported delivery_time_type and protocol */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5280,6 +5311,7 @@ union bpf_attr { FN(xdp_load_bytes), \ FN(xdp_store_bytes), \ FN(copy_from_user_task), \ + FN(skb_set_delivery_time), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5469,6 +5501,12 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +enum { + BPF_SKB_DELIVERY_TIME_NONE, + BPF_SKB_DELIVERY_TIME_UNSPEC, + BPF_SKB_DELIVERY_TIME_MONO, +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -5509,7 +5547,8 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; - __u32 :32; /* Padding, future use. */ + __u8 delivery_time_type; + __u32 :24; /* Padding, future use. */ __u64 hwtstamp; }; -- cgit v1.2.3 From 2d3916f3189172d5c69d33065c3c21119fe539fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Mar 2022 09:37:28 -0800 Subject: ipv6: fix skb drops in igmp6_event_query() and igmp6_event_report() While investigating on why a synchronize_net() has been added recently in ipv6_mc_down(), I found that igmp6_event_query() and igmp6_event_report() might drop skbs in some cases. Discussion about removing synchronize_net() from ipv6_mc_down() will happen in a different thread. Fixes: f185de28d9ae ("mld: add new workqueues for process mld events") Signed-off-by: Eric Dumazet Cc: Taehee Yoo Cc: Cong Wang Cc: David Ahern Link: https://lore.kernel.org/r/20220303173728.937869-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/ndisc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 53cb8de0e589..47ffb360ddfa 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -475,9 +475,9 @@ int igmp6_late_init(void); void igmp6_cleanup(void); void igmp6_late_cleanup(void); -int igmp6_event_query(struct sk_buff *skb); +void igmp6_event_query(struct sk_buff *skb); -int igmp6_event_report(struct sk_buff *skb); +void igmp6_event_report(struct sk_buff *skb); #ifdef CONFIG_SYSCTL -- cgit v1.2.3 From 5c3f1f9cc4cbbf491233982b5975ae2d284de5df Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 15:31:35 +1100 Subject: mm: remove the __KERNEL__ guard from __KERNEL__ ifdefs don't make sense outside of include/uapi/. Link: https://lkml.kernel.org/r/20220210072828.2930359-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Chaitanya Kulkarni Reviewed-by: Muchun Song Reviewed-by: Dan Williams Tested-by: "Sierra Guiza, Alejandro (Alex)" Cc: Alex Deucher Cc: Alistair Popple Cc: Ben Skeggs Cc: Christian Knig Cc: Felix Kuehling Cc: Karol Herbst Cc: Lyude Paul Cc: Miaohe Lin Cc: "Pan, Xinhui" Cc: Ralph Campbell Signed-off-by: Andrew Morton Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/mm.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 74ee50c2033b..2cca8cd30186 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3,9 +3,6 @@ #define _LINUX_MM_H #include - -#ifdef __KERNEL__ - #include #include #include @@ -3379,5 +3376,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start, } #endif -#endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ -- cgit v1.2.3 From 730ff52194cdb324b7680e5054c546f7b52de8a2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 15:31:35 +1100 Subject: mm: remove pointless includes from hmm.h pulls in the world for no good reason at all. Remove the includes and push a few ones into the users instead. Link: https://lkml.kernel.org/r/20220210072828.2930359-4-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Chaitanya Kulkarni Reviewed-by: Muchun Song Tested-by: "Sierra Guiza, Alejandro (Alex)" Cc: Alex Deucher Cc: Alistair Popple Cc: Ben Skeggs Cc: Christian Knig Cc: Dan Williams Cc: Felix Kuehling Cc: Karol Herbst Cc: Lyude Paul Cc: Miaohe Lin Cc: "Pan, Xinhui" Cc: Ralph Campbell Signed-off-by: Andrew Morton Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/hmm.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 2fd2e91d5107..d5a6f101f843 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -9,14 +9,9 @@ #ifndef LINUX_HMM_H #define LINUX_HMM_H -#include -#include +#include -#include -#include -#include -#include -#include +struct mmu_interval_notifier; /* * On output: -- cgit v1.2.3 From 75e55d8a107edb2fd6e02b1fa8a81531209cda04 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 15:31:35 +1100 Subject: mm: move free_devmap_managed_page to memremap.c free_devmap_managed_page has nothing to do with the code in swap.c, move it to live with the rest of the code for devmap handling. Link: https://lkml.kernel.org/r/20220210072828.2930359-5-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Chaitanya Kulkarni Reviewed-by: Muchun Song Reviewed-by: Dan Williams Tested-by: "Sierra Guiza, Alejandro (Alex)" Cc: Alex Deucher Cc: Alistair Popple Cc: Ben Skeggs Cc: Christian Knig Cc: Felix Kuehling Cc: Karol Herbst Cc: Lyude Paul Cc: Miaohe Lin Cc: "Pan, Xinhui" Cc: Ralph Campbell Signed-off-by: Andrew Morton Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2cca8cd30186..a9d6473fc045 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1092,7 +1092,6 @@ static inline bool is_zone_movable_page(const struct page *page) } #ifdef CONFIG_DEV_PAGEMAP_OPS -void free_devmap_managed_page(struct page *page); DECLARE_STATIC_KEY_FALSE(devmap_managed_key); static inline bool page_is_devmap_managed(struct page *page) -- cgit v1.2.3 From 895749455f6054e0c7b40a6ec449a3ab6db51bdd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 15:31:35 +1100 Subject: mm: simplify freeing of devmap managed pages Make put_devmap_managed_page return if it took charge of the page or not and remove the separate page_is_devmap_managed helper. Link: https://lkml.kernel.org/r/20220210072828.2930359-6-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Chaitanya Kulkarni Reviewed-by: Dan Williams Tested-by: "Sierra Guiza, Alejandro (Alex)" Cc: Alex Deucher Cc: Alistair Popple Cc: Ben Skeggs Cc: Christian Knig Cc: Felix Kuehling Cc: Karol Herbst Cc: Lyude Paul Cc: Miaohe Lin Cc: Muchun Song Cc: "Pan, Xinhui" Cc: Ralph Campbell Signed-off-by: Andrew Morton Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/mm.h | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a9d6473fc045..8a59f0456149 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1094,33 +1094,24 @@ static inline bool is_zone_movable_page(const struct page *page) #ifdef CONFIG_DEV_PAGEMAP_OPS DECLARE_STATIC_KEY_FALSE(devmap_managed_key); -static inline bool page_is_devmap_managed(struct page *page) +bool __put_devmap_managed_page(struct page *page); +static inline bool put_devmap_managed_page(struct page *page) { if (!static_branch_unlikely(&devmap_managed_key)) return false; if (!is_zone_device_page(page)) return false; - switch (page->pgmap->type) { - case MEMORY_DEVICE_PRIVATE: - case MEMORY_DEVICE_FS_DAX: - return true; - default: - break; - } - return false; + if (page->pgmap->type != MEMORY_DEVICE_PRIVATE && + page->pgmap->type != MEMORY_DEVICE_FS_DAX) + return false; + return __put_devmap_managed_page(page); } -void put_devmap_managed_page(struct page *page); - #else /* CONFIG_DEV_PAGEMAP_OPS */ -static inline bool page_is_devmap_managed(struct page *page) +static inline bool put_devmap_managed_page(struct page *page) { return false; } - -static inline void put_devmap_managed_page(struct page *page) -{ -} #endif /* CONFIG_DEV_PAGEMAP_OPS */ static inline bool is_device_private_page(const struct page *page) @@ -1220,16 +1211,11 @@ static inline void put_page(struct page *page) struct folio *folio = page_folio(page); /* - * For devmap managed pages we need to catch refcount transition from - * 2 to 1, when refcount reach one it means the page is free and we - * need to inform the device driver through callback. See - * include/linux/memremap.h and HMM for details. + * For some devmap managed pages we need to catch refcount transition + * from 2 to 1: */ - if (page_is_devmap_managed(&folio->page)) { - put_devmap_managed_page(&folio->page); + if (put_devmap_managed_page(&folio->page)) return; - } - folio_put(folio); } -- cgit v1.2.3 From dc90f0846df4870b6cc8528c31e5c60f18fb68be Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 15:31:36 +1100 Subject: mm: don't include in Move the check for the actual pgmap types that need the free at refcount one behavior into the out of line helper, and thus avoid the need to pull memremap.h into mm.h. Link: https://lkml.kernel.org/r/20220210072828.2930359-7-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Williams Acked-by: Felix Kuehling Tested-by: "Sierra Guiza, Alejandro (Alex)" Cc: Alex Deucher Cc: Alistair Popple Cc: Ben Skeggs Cc: Chaitanya Kulkarni Cc: Karol Herbst Cc: Lyude Paul Cc: Miaohe Lin Cc: Muchun Song Cc: "Pan, Xinhui" Cc: Ralph Campbell Signed-off-by: Andrew Morton Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/memremap.h | 18 ++++++++++++++++++ include/linux/mm.h | 20 -------------------- 2 files changed, 18 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 1fafcc38acba..514ab46f597e 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MEMREMAP_H_ #define _LINUX_MEMREMAP_H_ + +#include #include #include #include @@ -129,6 +131,22 @@ static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap) return 1 << pgmap->vmemmap_shift; } +static inline bool is_device_private_page(const struct page *page) +{ + return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) && + IS_ENABLED(CONFIG_DEVICE_PRIVATE) && + is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PRIVATE; +} + +static inline bool is_pci_p2pdma_page(const struct page *page) +{ + return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) && + IS_ENABLED(CONFIG_PCI_P2PDMA) && + is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; +} + #ifdef CONFIG_ZONE_DEVICE void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); diff --git a/include/linux/mm.h b/include/linux/mm.h index 8a59f0456149..cb8bee88e70c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -1101,9 +1100,6 @@ static inline bool put_devmap_managed_page(struct page *page) return false; if (!is_zone_device_page(page)) return false; - if (page->pgmap->type != MEMORY_DEVICE_PRIVATE && - page->pgmap->type != MEMORY_DEVICE_FS_DAX) - return false; return __put_devmap_managed_page(page); } @@ -1114,22 +1110,6 @@ static inline bool put_devmap_managed_page(struct page *page) } #endif /* CONFIG_DEV_PAGEMAP_OPS */ -static inline bool is_device_private_page(const struct page *page) -{ - return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) && - IS_ENABLED(CONFIG_DEVICE_PRIVATE) && - is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_PRIVATE; -} - -static inline bool is_pci_p2pdma_page(const struct page *page) -{ - return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) && - IS_ENABLED(CONFIG_PCI_P2PDMA) && - is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; -} - /* 127: arbitrary random number, small enough to assemble well */ #define folio_ref_zero_or_close_to_overflow(folio) \ ((unsigned int) folio_ref_count(folio) + 127u <= 127u) -- cgit v1.2.3 From 27674ef6c73f0c9096a9827dc5d6ba9fc7808422 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 15:31:36 +1100 Subject: mm: remove the extra ZONE_DEVICE struct page refcount ZONE_DEVICE struct pages have an extra reference count that complicates the code for put_page() and several places in the kernel that need to check the reference count to see that a page is not being used (gup, compaction, migration, etc.). Clean up the code so the reference count doesn't need to be treated specially for ZONE_DEVICE pages. Note that this excludes the special idle page wakeup for fsdax pages, which still happens at refcount 1. This is a separate issue and will be sorted out later. Given that only fsdax pages require the notifiacation when the refcount hits 1 now, the PAGEMAP_OPS Kconfig symbol can go away and be replaced with a FS_DAX check for this hook in the put_page fastpath. Based on an earlier patch from Ralph Campbell . Link: https://lkml.kernel.org/r/20220210072828.2930359-8-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Reviewed-by: Ralph Campbell Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Williams Acked-by: Felix Kuehling Tested-by: "Sierra Guiza, Alejandro (Alex)" Cc: Alex Deucher Cc: Alistair Popple Cc: Ben Skeggs Cc: Chaitanya Kulkarni Cc: Christian Knig Cc: Karol Herbst Cc: Lyude Paul Cc: Miaohe Lin Cc: Muchun Song Cc: "Pan, Xinhui" Signed-off-by: Andrew Morton Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/memremap.h | 12 +++++------- include/linux/mm.h | 6 +++--- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 514ab46f597e..d6a114dd5ea8 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -68,9 +68,9 @@ enum memory_type { struct dev_pagemap_ops { /* - * Called once the page refcount reaches 1. (ZONE_DEVICE pages never - * reach 0 refcount unless there is a refcount bug. This allows the - * device driver to implement its own memory management.) + * Called once the page refcount reaches 0. The reference count will be + * reset to one by the core code after the method is called to prepare + * for handing out the page again. */ void (*page_free)(struct page *page); @@ -133,16 +133,14 @@ static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap) static inline bool is_device_private_page(const struct page *page) { - return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) && - IS_ENABLED(CONFIG_DEVICE_PRIVATE) && + return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && is_zone_device_page(page) && page->pgmap->type == MEMORY_DEVICE_PRIVATE; } static inline bool is_pci_p2pdma_page(const struct page *page) { - return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) && - IS_ENABLED(CONFIG_PCI_P2PDMA) && + return IS_ENABLED(CONFIG_PCI_P2PDMA) && is_zone_device_page(page) && page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; } diff --git a/include/linux/mm.h b/include/linux/mm.h index cb8bee88e70c..0201d258c646 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1090,7 +1090,7 @@ static inline bool is_zone_movable_page(const struct page *page) return page_zonenum(page) == ZONE_MOVABLE; } -#ifdef CONFIG_DEV_PAGEMAP_OPS +#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) DECLARE_STATIC_KEY_FALSE(devmap_managed_key); bool __put_devmap_managed_page(struct page *page); @@ -1103,12 +1103,12 @@ static inline bool put_devmap_managed_page(struct page *page) return __put_devmap_managed_page(page); } -#else /* CONFIG_DEV_PAGEMAP_OPS */ +#else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ static inline bool put_devmap_managed_page(struct page *page) { return false; } -#endif /* CONFIG_DEV_PAGEMAP_OPS */ +#endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ /* 127: arbitrary random number, small enough to assemble well */ #define folio_ref_zero_or_close_to_overflow(folio) \ -- cgit v1.2.3 From dc4e8c07e9e2f69387579c49caca26ba239f7270 Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Sun, 27 Feb 2022 20:25:45 +0800 Subject: ACPI: APEI: explicit init of HEST and GHES in apci_init() From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage the estatus memory pool. On the other hand, ghes_init() relies on sdei_init() to detect the SDEI version and (un)register events. The dependencies are as follows: ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init() ghes_init() => sdei_init() HEST is not PCI-specific and initcall ordering is implicit and not well-defined within a level. Based on above, remove acpi_hest_init() from acpi_pci_root_init() and convert ghes_init() and sdei_init() from initcalls to explicit calls in the following order: acpi_hest_init() ghes_init() sdei_init() Signed-off-by: Shuai Xue Signed-off-by: Rafael J. Wysocki --- include/acpi/apei.h | 4 +++- include/linux/arm_sdei.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/apei.h b/include/acpi/apei.h index ece0a8af2bae..4e60dd73c3bb 100644 --- a/include/acpi/apei.h +++ b/include/acpi/apei.h @@ -27,14 +27,16 @@ extern int hest_disable; extern int erst_disable; #ifdef CONFIG_ACPI_APEI_GHES extern bool ghes_disable; +void __init ghes_init(void); #else #define ghes_disable 1 +static inline void ghes_init(void) { } #endif #ifdef CONFIG_ACPI_APEI void __init acpi_hest_init(void); #else -static inline void acpi_hest_init(void) { return; } +static inline void acpi_hest_init(void) { } #endif int erst_write(const struct cper_record_header *record); diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 0a241c5c911d..14dc461b0e82 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes); /* For use by arch code when CPU hotplug notifiers are not appropriate. */ int sdei_mask_local_cpu(void); int sdei_unmask_local_cpu(void); +void __init sdei_init(void); #else static inline int sdei_mask_local_cpu(void) { return 0; } static inline int sdei_unmask_local_cpu(void) { return 0; } +static inline void sdei_init(void) { } #endif /* CONFIG_ARM_SDE_INTERFACE */ -- cgit v1.2.3 From 27e932a31496f75b78ea41fd5ccadd0f75d8e8be Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Sun, 27 Feb 2022 20:25:46 +0800 Subject: ACPI: APEI: rename ghes_init() with an "acpi_" prefix ghes_init() sticks out in acpi_init() because it is the only functions without an "acpi_" prefix. Rename ghes_init with an "acpi_" prefix, then all looks fine. Signed-off-by: Shuai Xue Signed-off-by: Rafael J. Wysocki --- include/acpi/apei.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/apei.h b/include/acpi/apei.h index 4e60dd73c3bb..afaca3a075e8 100644 --- a/include/acpi/apei.h +++ b/include/acpi/apei.h @@ -27,10 +27,10 @@ extern int hest_disable; extern int erst_disable; #ifdef CONFIG_ACPI_APEI_GHES extern bool ghes_disable; -void __init ghes_init(void); +void __init acpi_ghes_init(void); #else #define ghes_disable 1 -static inline void ghes_init(void) { } +static inline void acpi_ghes_init(void) { } #endif #ifdef CONFIG_ACPI_APEI -- cgit v1.2.3 From f17bc788f7b97c36b8f3fbef14555a2a16ee3f69 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 13 Jan 2022 17:00:41 +0200 Subject: media: media-entity: Add media_pad_is_streaming() helper function Add a function to test if a pad is part of a pipeline currently streaming, and use it through drivers to replace direct access to the stream_count field. This will help reworking pipeline start/stop without disturbing drivers. Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus --- include/media/media-entity.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/media/media-entity.h b/include/media/media-entity.h index fea489f03d57..8546f13c42a9 100644 --- a/include/media/media-entity.h +++ b/include/media/media-entity.h @@ -858,6 +858,18 @@ struct media_link *media_entity_find_link(struct media_pad *source, */ struct media_pad *media_entity_remote_pad(const struct media_pad *pad); +/** + * media_entity_is_streaming - Test if an entity is part of a streaming pipeline + * @entity: The entity + * + * Return: True if the entity is part of a pipeline started with the + * media_pipeline_start() function, false otherwise. + */ +static inline bool media_entity_is_streaming(const struct media_entity *entity) +{ + return entity->stream_count > 0; +} + /** * media_entity_get_fwnode_pad - Get pad number from fwnode * -- cgit v1.2.3 From 3056a8e936bb090865402bfe6f3a730a28790033 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 13 Jan 2022 17:00:42 +0200 Subject: media: media-entity: Simplify media_pipeline_start() The media_pipeline_start() function has two purposes: it constructs a pipeline by recording the entities that are part of it, gathered from a graph walk, and validate the media links. The pipeline pointer is stored in the media_entity structure as part of this process, and the entity's stream count is increased, to record that the entity is streaming. When multiple video nodes are present in a pipeline, media_pipeline_start() is typically called on all of them, with the same pipeline pointer. This is taken into account in media_pipeline_start() by skipping validation for entities that are already part of the pipeline, while returning an error if an entity is part of a different pipeline. It turns out that this process is overly complicated. When media_pipeline_start() is called for the first time, it constructs the full pipeline, adding all entities and validating all the links. Subsequent calls to media_pipeline_start() are then nearly no-ops, they only increase the stream count on the pipeline and on all entities. The media_entity stream_count field is used for two purposes: checking if the entity is streaming, and detecting when a call to media_pipeline_stop() balances needs to reset the entity pipe pointer to NULL. The former can easily be replaced by a check of the pipe pointer. Simplify media_pipeline_start() by avoiding the pipeline walk on all calls but the first one, and drop the media_entity stream_count field. media_pipeline_stop() is updated accordingly. Signed-off-by: Laurent Pinchart [Sakari Ailus: Drop redundant '!= NULL' as discussed] Signed-off-by: Sakari Ailus --- include/media/media-entity.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/media/media-entity.h b/include/media/media-entity.h index 8546f13c42a9..33ee595c13f4 100644 --- a/include/media/media-entity.h +++ b/include/media/media-entity.h @@ -268,7 +268,6 @@ enum media_entity_type { * @pads: Pads array with the size defined by @num_pads. * @links: List of data links. * @ops: Entity operations. - * @stream_count: Stream count for the entity. * @use_count: Use count for the entity. * @pipe: Pipeline this entity belongs to. * @info: Union with devnode information. Kept just for backward @@ -283,10 +282,9 @@ enum media_entity_type { * * .. note:: * - * @stream_count and @use_count reference counts must never be - * negative, but are signed integers on purpose: a simple ``WARN_ON(<0)`` - * check can be used to detect reference count bugs that would make them - * negative. + * The @use_count reference count must never be negative, but is a signed + * integer on purpose: a simple ``WARN_ON(<0)`` check can be used to detect + * reference count bugs that would make it negative. */ struct media_entity { struct media_gobj graph_obj; /* must be first field in struct */ @@ -305,7 +303,6 @@ struct media_entity { const struct media_entity_operations *ops; - int stream_count; int use_count; struct media_pipeline *pipe; @@ -867,7 +864,7 @@ struct media_pad *media_entity_remote_pad(const struct media_pad *pad); */ static inline bool media_entity_is_streaming(const struct media_entity *entity) { - return entity->stream_count > 0; + return entity->pipe; } /** -- cgit v1.2.3 From 443bf23d0048e014065d1a7fac8144fb0a40805b Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 24 Feb 2022 14:57:47 +0200 Subject: media: media-entity: Clarify media_entity_cleanup() usage Being able to call cleanup functions on objects that haven't been initialized but whose memory has been zeroed simplifies error handling. The media_entity_cleanup() function documentation doesn't tell whether this is allowed or not, and inspection of its implementation doesn't provide any clue as the function is currently empty. Update the documentation to explicitly allow this usage pattern. Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus --- include/media/media-entity.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/media/media-entity.h b/include/media/media-entity.h index 33ee595c13f4..742918962d46 100644 --- a/include/media/media-entity.h +++ b/include/media/media-entity.h @@ -654,6 +654,10 @@ int media_entity_pads_init(struct media_entity *entity, u16 num_pads, * * This function must be called during the cleanup phase after unregistering * the entity (currently, it does nothing). + * + * Calling media_entity_cleanup() on a media_entity whose memory has been + * zeroed but that has not been initialized with media_entity_pad_init() is + * valid and is a no-op. */ #if IS_ENABLED(CONFIG_MEDIA_CONTROLLER) static inline void media_entity_cleanup(struct media_entity *entity) {} -- cgit v1.2.3 From 51ef2be546e2e480e56fdb59fdeb9a4406e8d52e Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 17 Feb 2022 16:44:07 +0100 Subject: media: i2c: isl7998x: Add driver for Intersil ISL7998x Add driver for the Intersil ISL7998x Analog to MIPI CSI-2/BT656 decoder. This chip supports 1/2/4 analog video inputs and converts them into 1/2/4 VCs in MIPI CSI2 stream. This driver currently supports ISL79987 and both 720x480 and 720x576 resolutions, however as per specification, all inputs must use the same resolution and standard. The only supported pixel format is now YUYV/YUV422. The chip should support RGB565 on the CSI2 as well, but this is currently unsupported. Signed-off-by: Marek Vasut Cc: Sakari Ailus Cc: Mauro Carvalho Chehab Cc: Rob Herring To: linux-media@vger.kernel.org Signed-off-by: Michael Tretter Acked-by: Hans Verkuil [Sakari Ailus: Always call pm_runtime_get_and_resume in pre_streamon] Signed-off-by: Sakari Ailus --- include/uapi/linux/v4l2-controls.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index c8e0f84d204d..92576ed03fc4 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -219,6 +219,12 @@ enum v4l2_colorfx { */ #define V4L2_CID_USER_ALLEGRO_BASE (V4L2_CID_USER_BASE + 0x1170) +/* + * The base for the isl7998x driver controls. + * We reserve 16 controls for this driver. + */ +#define V4L2_CID_USER_ISL7998X_BASE (V4L2_CID_USER_BASE + 0x1180) + /* MPEG-class control IDs */ /* The MPEG controls are applicable to all codec controls * and the 'MPEG' part of the define is historical */ -- cgit v1.2.3 From bfa26ba343c727e055223be04e08f2ebdd43c293 Mon Sep 17 00:00:00 2001 From: William Mahon Date: Thu, 3 Mar 2022 18:23:42 -0800 Subject: HID: add mapping for KEY_DICTATE Numerous keyboards are adding dictate keys which allows for text messages to be dictated by a microphone. This patch adds a new key definition KEY_DICTATE and maps 0x0c/0x0d8 usage code to this new keycode. Additionally hid-debug is adjusted to recognize this new usage code as well. Signed-off-by: William Mahon Acked-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20220303021501.1.I5dbf50eb1a7a6734ee727bda4a8573358c6d3ec0@changeid Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 225ec87d4f22..4db5d41848e4 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -612,6 +612,7 @@ #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ #define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ +#define KEY_DICTATE 0x24a /* Start or Stop Voice Dictation Session (HUTRR99) */ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ -- cgit v1.2.3 From 327b89f0acc4c20a06ed59e4d9af7f6d804dc2e2 Mon Sep 17 00:00:00 2001 From: William Mahon Date: Thu, 3 Mar 2022 18:26:22 -0800 Subject: HID: add mapping for KEY_ALL_APPLICATIONS This patch adds a new key definition for KEY_ALL_APPLICATIONS and aliases KEY_DASHBOARD to it. It also maps the 0x0c/0x2a2 usage code to KEY_ALL_APPLICATIONS. Signed-off-by: William Mahon Acked-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20220303035618.1.I3a7746ad05d270161a18334ae06e3b6db1a1d339@changeid Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 4db5d41848e4..7989d9483ea7 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -278,7 +278,8 @@ #define KEY_PAUSECD 201 #define KEY_PROG3 202 #define KEY_PROG4 203 -#define KEY_DASHBOARD 204 /* AL Dashboard */ +#define KEY_ALL_APPLICATIONS 204 /* AC Desktop Show All Applications */ +#define KEY_DASHBOARD KEY_ALL_APPLICATIONS #define KEY_SUSPEND 205 #define KEY_CLOSE 206 /* AC Close */ #define KEY_PLAY 207 -- cgit v1.2.3 From b70f5cd874ccf85c20882e12ba75a61a11ce4018 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 24 Feb 2022 01:11:29 +0100 Subject: media: noon010p30: Convert to use GPIO descriptors The noon010pc30 sensor driver is using legacy gpio numbers passed through platform data and open coding reverse polarity on the GPIOs used for reset and standby. Nothing in the kernel defines any platform data for this driver so we can just convert the driver to use GPIO descriptors and requires that these specify the correct polarity instead. Cc: Sylwester Nawrocki Cc: Hans Verkuil Signed-off-by: Linus Walleij Signed-off-by: Sakari Ailus --- include/media/i2c/noon010pc30.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/media/i2c/noon010pc30.h b/include/media/i2c/noon010pc30.h index d1b2e06a1de0..1880dad25cf0 100644 --- a/include/media/i2c/noon010pc30.h +++ b/include/media/i2c/noon010pc30.h @@ -12,14 +12,10 @@ /** * struct noon010pc30_platform_data - platform data * @clk_rate: the clock frequency in Hz - * @gpio_nreset: GPIO driving nRESET pin - * @gpio_nstby: GPIO driving nSTBY pin */ struct noon010pc30_platform_data { unsigned long clk_rate; - int gpio_nreset; - int gpio_nstby; }; #endif /* NOON010PC30_H */ -- cgit v1.2.3 From aaaf357fa61c00376cd8718d36bf06b7f0cbeead Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 24 Feb 2022 01:13:07 +0100 Subject: media: m5mols: Convert to use GPIO descriptors The Fujitsu M5MOLS sensor driver is using a reset GPIO number passed from platform data. No machine/board descriptor file in the kernel is using this so let's replace it with a GPIO descriptor. Cc: Kyungmin Park Cc: Heungjun Kim Signed-off-by: Linus Walleij Signed-off-by: Sakari Ailus --- include/media/i2c/m5mols.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/media/i2c/m5mols.h b/include/media/i2c/m5mols.h index 9cec5a09e125..a56ae353c891 100644 --- a/include/media/i2c/m5mols.h +++ b/include/media/i2c/m5mols.h @@ -14,15 +14,11 @@ /** * struct m5mols_platform_data - platform data for M-5MOLS driver - * @gpio_reset: GPIO driving the reset pin of M-5MOLS - * @reset_polarity: active state for gpio_reset pin, 0 or 1 * @set_power: an additional callback to the board setup code * to be called after enabling and before disabling * the sensor's supply regulators */ struct m5mols_platform_data { - int gpio_reset; - u8 reset_polarity; int (*set_power)(struct device *dev, int on); }; -- cgit v1.2.3 From 2d684f4e155c1e80ff63bd503930171c460eac5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20J=C3=BCcker?= Date: Sat, 29 Jan 2022 23:01:53 +0100 Subject: drm/exynos: fimd: add BGR support for exynos4/5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the downstream kernels for exynos4 and exynos5 devices, there is an undocumented register that controls the order of the RGB output. It can be set to either normal order or reversed, which enables BGR support for those SoCs. This patch enables the BGR support for all the SoCs that were found to have at least one device with this logic in the corresponding downstream kernels. Signed-off-by: Martin Jücker Signed-off-by: Inki Dae --- include/video/samsung_fimd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/video/samsung_fimd.h b/include/video/samsung_fimd.h index c4a93ce1de48..e6966d187591 100644 --- a/include/video/samsung_fimd.h +++ b/include/video/samsung_fimd.h @@ -476,6 +476,10 @@ * 1111 -none- -none- -none- -none- -none- */ +#define WIN_RGB_ORDER(_win) (0x2020 + ((_win) * 4)) +#define WIN_RGB_ORDER_FORWARD (0 << 11) +#define WIN_RGB_ORDER_REVERSE (1 << 11) + /* FIMD Version 8 register offset definitions */ #define FIMD_V8_VIDTCON0 0x20010 #define FIMD_V8_VIDTCON1 0x20014 -- cgit v1.2.3 From 31b9887c7258ca47d9c665a80f19f006c86756b1 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Mon, 17 Jan 2022 17:48:15 +0000 Subject: i3c: remove i2c board info from i2c_dev_desc I2C board info is only required during adapter setup so there is no requirement to keeping a pointer to it once running. To support dynamic device addition we can't rely on board info - user-space creation through sysfs won't have a boardinfo. Cc: Alexandre Belloni Signed-off-by: Jamie Iles Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220117174816.1963463-2-quic_jiles@quicinc.com --- include/linux/i3c/master.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 9cb39d901cd5..604a126b78c8 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -85,7 +85,6 @@ struct i2c_dev_boardinfo { */ struct i2c_dev_desc { struct i3c_i2c_dev_desc common; - const struct i2c_dev_boardinfo *boardinfo; struct i2c_client *dev; u16 addr; u8 lvr; -- cgit v1.2.3 From 98b4d7a4e7374a44c4afd9f08330e72f6ad0d644 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Fri, 4 Mar 2022 14:00:40 +0800 Subject: net: dev: use kfree_skb_reason() for sch_handle_egress() Replace kfree_skb() used in sch_handle_egress() with kfree_skb_reason(). The drop reason SKB_DROP_REASON_TC_EGRESS is introduced. Considering the code path of tc egerss, we make it distinct with the drop reason of SKB_DROP_REASON_QDISC_DROP in the next commit. Signed-off-by: Menglong Dong Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/trace/events/skb.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5445860e1ba6..1ffe64616741 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -394,6 +394,7 @@ enum skb_drop_reason { * entry is full */ SKB_DROP_REASON_NEIGH_DEAD, /* neigh entry is dead */ + SKB_DROP_REASON_TC_EGRESS, /* dropped in TC egress HOOK */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 1977f301260d..53755e8191a1 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -45,6 +45,7 @@ EM(SKB_DROP_REASON_NEIGH_FAILED, NEIGH_FAILED) \ EM(SKB_DROP_REASON_NEIGH_QUEUEFULL, NEIGH_QUEUEFULL) \ EM(SKB_DROP_REASON_NEIGH_DEAD, NEIGH_DEAD) \ + EM(SKB_DROP_REASON_TC_EGRESS, TC_EGRESS) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 215b0f1963d4e34fccac6992b3debe26f78a6eb8 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Fri, 4 Mar 2022 14:00:41 +0800 Subject: net: skb: introduce the function kfree_skb_list_reason() To report reasons of skb drops, introduce the function kfree_skb_list_reason() and make kfree_skb_list() an inline call to it. This function will be used in the next commit in __dev_xmit_skb(). Signed-off-by: Menglong Dong Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1ffe64616741..1f2de153755c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1202,10 +1202,16 @@ static inline void kfree_skb(struct sk_buff *skb) } void skb_release_head_state(struct sk_buff *skb); -void kfree_skb_list(struct sk_buff *segs); +void kfree_skb_list_reason(struct sk_buff *segs, + enum skb_drop_reason reason); void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); void skb_tx_error(struct sk_buff *skb); +static inline void kfree_skb_list(struct sk_buff *segs) +{ + kfree_skb_list_reason(segs, SKB_DROP_REASON_NOT_SPECIFIED); +} + #ifdef CONFIG_TRACEPOINTS void consume_skb(struct sk_buff *skb); #else -- cgit v1.2.3 From 7faef0547f4c29031a68d058918b031a8e520d49 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Fri, 4 Mar 2022 14:00:42 +0800 Subject: net: dev: add skb drop reasons to __dev_xmit_skb() Add reasons for skb drops to __dev_xmit_skb() by replacing kfree_skb_list() with kfree_skb_list_reason(). The drop reason of SKB_DROP_REASON_QDISC_DROP is introduced for qdisc enqueue fails. Signed-off-by: Menglong Dong Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ include/trace/events/skb.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1f2de153755c..769d54ba2f3b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -395,6 +395,10 @@ enum skb_drop_reason { */ SKB_DROP_REASON_NEIGH_DEAD, /* neigh entry is dead */ SKB_DROP_REASON_TC_EGRESS, /* dropped in TC egress HOOK */ + SKB_DROP_REASON_QDISC_DROP, /* dropped by qdisc when packet + * outputting (failed to enqueue to + * current qdisc) + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 53755e8191a1..dbf3e2e3c1b4 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -46,6 +46,7 @@ EM(SKB_DROP_REASON_NEIGH_QUEUEFULL, NEIGH_QUEUEFULL) \ EM(SKB_DROP_REASON_NEIGH_DEAD, NEIGH_DEAD) \ EM(SKB_DROP_REASON_TC_EGRESS, TC_EGRESS) \ + EM(SKB_DROP_REASON_QDISC_DROP, QDISC_DROP) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 44f0bd40803c0e04f1c8cd59df3c7acce783ae9c Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Fri, 4 Mar 2022 14:00:43 +0800 Subject: net: dev: use kfree_skb_reason() for enqueue_to_backlog() Replace kfree_skb() used in enqueue_to_backlog() with kfree_skb_reason(). The skb rop reason SKB_DROP_REASON_CPU_BACKLOG is introduced for the case of failing to enqueue the skb to the per CPU backlog queue. The further reason can be backlog queue full or RPS flow limition, and I think we needn't to make further distinctions. Signed-off-by: Menglong Dong Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ include/trace/events/skb.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 769d54ba2f3b..44ecbfff2b69 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -399,6 +399,12 @@ enum skb_drop_reason { * outputting (failed to enqueue to * current qdisc) */ + SKB_DROP_REASON_CPU_BACKLOG, /* failed to enqueue the skb to + * the per CPU backlog queue. This + * can be caused by backlog queue + * full (see netdev_max_backlog in + * net.rst) or RPS flow limit + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index dbf3e2e3c1b4..3bb90ca893ae 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -47,6 +47,7 @@ EM(SKB_DROP_REASON_NEIGH_DEAD, NEIGH_DEAD) \ EM(SKB_DROP_REASON_TC_EGRESS, TC_EGRESS) \ EM(SKB_DROP_REASON_QDISC_DROP, QDISC_DROP) \ + EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 7e726ed81e1ddd5fdc431e02b94fcfe2a9876d42 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Fri, 4 Mar 2022 14:00:44 +0800 Subject: net: dev: use kfree_skb_reason() for do_xdp_generic() Replace kfree_skb() used in do_xdp_generic() with kfree_skb_reason(). The drop reason SKB_DROP_REASON_XDP is introduced for this case. Signed-off-by: Menglong Dong Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/trace/events/skb.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 44ecbfff2b69..7a38d0f90cef 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -405,6 +405,7 @@ enum skb_drop_reason { * full (see netdev_max_backlog in * net.rst) or RPS flow limit */ + SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 3bb90ca893ae..8c4c343c830f 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -48,6 +48,7 @@ EM(SKB_DROP_REASON_TC_EGRESS, TC_EGRESS) \ EM(SKB_DROP_REASON_QDISC_DROP, QDISC_DROP) \ EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \ + EM(SKB_DROP_REASON_XDP, XDP) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From a568aff26ac03ee9eb1482683514914a5ec3b4c3 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Fri, 4 Mar 2022 14:00:45 +0800 Subject: net: dev: use kfree_skb_reason() for sch_handle_ingress() Replace kfree_skb() used in sch_handle_ingress() with kfree_skb_reason(). Following drop reasons are introduced: SKB_DROP_REASON_TC_INGRESS Signed-off-by: Menglong Dong Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/trace/events/skb.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7a38d0f90cef..6b14b2d297d2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -406,6 +406,7 @@ enum skb_drop_reason { * net.rst) or RPS flow limit */ SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ + SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 8c4c343c830f..514dd2de8776 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -49,6 +49,7 @@ EM(SKB_DROP_REASON_QDISC_DROP, QDISC_DROP) \ EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \ EM(SKB_DROP_REASON_XDP, XDP) \ + EM(SKB_DROP_REASON_TC_INGRESS, TC_INGRESS) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 6c2728b7c14164928cb7cb9c847dead101b2d503 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Fri, 4 Mar 2022 14:00:46 +0800 Subject: net: dev: use kfree_skb_reason() for __netif_receive_skb_core() Add reason for skb drops to __netif_receive_skb_core() when packet_type not found to handle the skb. For this purpose, the drop reason SKB_DROP_REASON_PTYPE_ABSENT is introduced. Take ether packets for example, this case mainly happens when L3 protocol is not supported. Signed-off-by: Menglong Dong Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ include/trace/events/skb.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6b14b2d297d2..2be263184d1e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -407,6 +407,11 @@ enum skb_drop_reason { */ SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ + SKB_DROP_REASON_PTYPE_ABSENT, /* not packet_type found to handle + * the skb. For an etner packet, + * this means that L3 protocol is + * not supported + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 514dd2de8776..c0769d943f8e 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -50,6 +50,7 @@ EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \ EM(SKB_DROP_REASON_XDP, XDP) \ EM(SKB_DROP_REASON_TC_INGRESS, TC_INGRESS) \ + EM(SKB_DROP_REASON_PTYPE_ABSENT, PTYPE_ABSENT) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 838d6d3461db0fdbf33fc5f8a69c27b50b4a46da Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 14 Jan 2022 14:56:15 -0500 Subject: virtio: unexport virtio_finalize_features virtio_finalize_features is only used internally within virtio. No reason to export it. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Acked-by: Jason Wang --- include/linux/virtio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 72292a62cd90..5464f398912a 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -133,7 +133,6 @@ bool is_virtio_device(struct device *dev); void virtio_break_device(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev); -int virtio_finalize_features(struct virtio_device *dev); #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); -- cgit v1.2.3 From 4fa59ede95195f267101a1b8916992cf3f245cdb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 14 Jan 2022 14:58:41 -0500 Subject: virtio: acknowledge all features before access The feature negotiation was designed in a way that makes it possible for devices to know which config fields will be accessed by drivers. This is broken since commit 404123c2db79 ("virtio: allow drivers to validate features") with fallout in at least block and net. We have a partial work-around in commit 2f9a174f918e ("virtio: write back F_VERSION_1 before validate") which at least lets devices find out which format should config space have, but this is a partial fix: guests should not access config space without acknowledging features since otherwise we'll never be able to change the config space format. To fix, split finalize_features from virtio_finalize_features and call finalize_features with all feature bits before validation, and then - if validation changed any bits - once again after. Since virtio_finalize_features no longer writes out features rename it to virtio_features_ok - since that is what it does: checks that features are ok with the device. As a side effect, this also reduces the amount of hypervisor accesses - we now only acknowledge features once unless we are clearing any features when validating (which is uncommon). IRC I think that this was more or less always the intent in the spec but unfortunately the way the spec is worded does not say this explicitly, I plan to address this at the spec level, too. Acked-by: Jason Wang Cc: stable@vger.kernel.org Fixes: 404123c2db79 ("virtio: allow drivers to validate features") Fixes: 2f9a174f918e ("virtio: write back F_VERSION_1 before validate") Cc: "Halil Pasic" Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 4d107ad31149..dafdc7f48c01 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -64,8 +64,9 @@ struct virtio_shm_region { * Returns the first 64 feature bits (all we currently need). * @finalize_features: confirm what device features we'll be using. * vdev: the virtio_device - * This gives the final feature bits for the device: it can change + * This sends the driver feature bits to the device: it can change * the dev->feature bits if it wants. + * Note: despite the name this can be called any number of times. * Returns 0 on success or error status * @bus_name: return the bus name associated with the device (optional) * vdev: the virtio_device -- cgit v1.2.3 From bf9ad37dc8a30cce22ae95d6c2ca6abf8731d305 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: signal, x86: Delay calling signals in atomic on RT enabled kernels On x86_64 we must disable preemption before we enable interrupts for stack faults, int3 and debugging, because the current task is using a per CPU debug stack defined by the IST. If we schedule out, another task can come in and use the same stack and cause the stack to be corrupted and crash the kernel on return. When CONFIG_PREEMPT_RT is enabled, spinlock_t locks become sleeping, and one of these is the spin lock used in signal handling. Some of the debug code (int3) causes do_trap() to send a signal. This function calls a spinlock_t lock that has been converted to a sleeping lock. If this happens, the above issues with the corrupted stack is possible. Instead of calling the signal right away, for PREEMPT_RT and x86, the signal information is stored on the stacks task_struct and TIF_NOTIFY_RESUME is set. Then on exit of the trap, the signal resume code will send the signal when preemption is enabled. [ rostedt: Switched from #ifdef CONFIG_PREEMPT_RT to ARCH_RT_DELAYS_SIGNAL_SEND and added comments to the code. ] [bigeasy: Add on 32bit as per Yang Shi, minor rewording. ] [ tglx: Use a config option ] Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/Ygq5aBB/qMQw6aP5@linutronix.de --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 75ba8aa60248..098e37fd770a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1087,6 +1087,9 @@ struct task_struct { /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; +#ifdef CONFIG_RT_DELAYED_SIGNALS + struct kernel_siginfo forced_info; +#endif unsigned long sas_ss_sp; size_t sas_ss_size; unsigned int sas_ss_flags; -- cgit v1.2.3 From 92499dec3aa9c251e605b42e1024e805bbaa50ad Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 11 Jan 2022 15:05:56 +0200 Subject: ARM: dts: at91: sama7g5: Add NAND support Add NAND support. The sama7g5's SMC IP is the same as sama5d2's with a slightly change: it provides a synchronous clock output (SMC clock) that is dedicated to FPGA usage. Since this doesn't interfere with the SMC NAND configuration, thus code will not be added in the current nand driver to address the FPGA usage, use the sama5d2's compatible and choose not to introduce dedicated compatibles for sama7g5. Tested with Micron MT29F4G08ABAEAWP NAND flash. Signed-off-by: Tudor Ambarus [nicolas.ferre@microchip.com: add the definition of PMC_MCK1 in include/dt-bindings/clock/at91.h from another patch] Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220111130556.905978-1-tudor.ambarus@microchip.com --- include/dt-bindings/clock/at91.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/at91.h b/include/dt-bindings/clock/at91.h index 98e1b2ab6403..8498c0cd95fe 100644 --- a/include/dt-bindings/clock/at91.h +++ b/include/dt-bindings/clock/at91.h @@ -35,6 +35,7 @@ #define PMC_AUDIOIOPLL (PMC_MAIN + 7) #define PMC_ETHPLL (PMC_MAIN + 8) #define PMC_CPU (PMC_MAIN + 9) +#define PMC_MCK1 (PMC_MAIN + 10) #ifndef AT91_PMC_MOSCS #define AT91_PMC_MOSCS 0 /* MOSCS Flag */ -- cgit v1.2.3 From 9a45081bb1fd5d15253994fcdbed652a56174d5e Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 28 Feb 2022 10:09:05 +0800 Subject: dt-bindings: clock: add i.MX93 clock definition Add i.MX93 clock definition Reviewed-by: Abel Vesa Acked-by: Krzysztof Kozlowski Acked-by: Rob Herring Reviewed-by: Stephen Boyd Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20220228020908.2810346-3-peng.fan@oss.nxp.com Signed-off-by: Abel Vesa --- include/dt-bindings/clock/imx93-clock.h | 201 ++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 include/dt-bindings/clock/imx93-clock.h (limited to 'include') diff --git a/include/dt-bindings/clock/imx93-clock.h b/include/dt-bindings/clock/imx93-clock.h new file mode 100644 index 000000000000..21fda9c5cb5e --- /dev/null +++ b/include/dt-bindings/clock/imx93-clock.h @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR MIT */ +/* + * Copyright 2022 NXP + */ + +#ifndef __DT_BINDINGS_CLOCK_IMX93_CLK_H +#define __DT_BINDINGS_CLOCK_IMX93_CLK_H + +#define IMX93_CLK_DUMMY 0 +#define IMX93_CLK_24M 1 +#define IMX93_CLK_EXT1 2 +#define IMX93_CLK_SYS_PLL_PFD0 3 +#define IMX93_CLK_SYS_PLL_PFD0_DIV2 4 +#define IMX93_CLK_SYS_PLL_PFD1 5 +#define IMX93_CLK_SYS_PLL_PFD1_DIV2 6 +#define IMX93_CLK_SYS_PLL_PFD2 7 +#define IMX93_CLK_SYS_PLL_PFD2_DIV2 8 +#define IMX93_CLK_AUDIO_PLL 9 +#define IMX93_CLK_VIDEO_PLL 10 +#define IMX93_CLK_A55_PERIPH 11 +#define IMX93_CLK_A55_MTR_BUS 12 +#define IMX93_CLK_A55 13 +#define IMX93_CLK_M33 14 +#define IMX93_CLK_BUS_WAKEUP 15 +#define IMX93_CLK_BUS_AON 16 +#define IMX93_CLK_WAKEUP_AXI 17 +#define IMX93_CLK_SWO_TRACE 18 +#define IMX93_CLK_M33_SYSTICK 19 +#define IMX93_CLK_FLEXIO1 20 +#define IMX93_CLK_FLEXIO2 21 +#define IMX93_CLK_LPIT1 22 +#define IMX93_CLK_LPIT2 23 +#define IMX93_CLK_LPTMR1 24 +#define IMX93_CLK_LPTMR2 25 +#define IMX93_CLK_TPM1 26 +#define IMX93_CLK_TPM2 27 +#define IMX93_CLK_TPM3 28 +#define IMX93_CLK_TPM4 29 +#define IMX93_CLK_TPM5 30 +#define IMX93_CLK_TPM6 31 +#define IMX93_CLK_FLEXSPI1 32 +#define IMX93_CLK_CAN1 33 +#define IMX93_CLK_CAN2 34 +#define IMX93_CLK_LPUART1 35 +#define IMX93_CLK_LPUART2 36 +#define IMX93_CLK_LPUART3 37 +#define IMX93_CLK_LPUART4 38 +#define IMX93_CLK_LPUART5 39 +#define IMX93_CLK_LPUART6 40 +#define IMX93_CLK_LPUART7 41 +#define IMX93_CLK_LPUART8 42 +#define IMX93_CLK_LPI2C1 43 +#define IMX93_CLK_LPI2C2 44 +#define IMX93_CLK_LPI2C3 45 +#define IMX93_CLK_LPI2C4 46 +#define IMX93_CLK_LPI2C5 47 +#define IMX93_CLK_LPI2C6 48 +#define IMX93_CLK_LPI2C7 49 +#define IMX93_CLK_LPI2C8 50 +#define IMX93_CLK_LPSPI1 51 +#define IMX93_CLK_LPSPI2 52 +#define IMX93_CLK_LPSPI3 53 +#define IMX93_CLK_LPSPI4 54 +#define IMX93_CLK_LPSPI5 55 +#define IMX93_CLK_LPSPI6 56 +#define IMX93_CLK_LPSPI7 57 +#define IMX93_CLK_LPSPI8 58 +#define IMX93_CLK_I3C1 59 +#define IMX93_CLK_I3C2 60 +#define IMX93_CLK_USDHC1 61 +#define IMX93_CLK_USDHC2 62 +#define IMX93_CLK_USDHC3 63 +#define IMX93_CLK_SAI1 64 +#define IMX93_CLK_SAI2 65 +#define IMX93_CLK_SAI3 66 +#define IMX93_CLK_CCM_CKO1 67 +#define IMX93_CLK_CCM_CKO2 68 +#define IMX93_CLK_CCM_CKO3 69 +#define IMX93_CLK_CCM_CKO4 70 +#define IMX93_CLK_HSIO 71 +#define IMX93_CLK_HSIO_USB_TEST_60M 72 +#define IMX93_CLK_HSIO_ACSCAN_80M 73 +#define IMX93_CLK_HSIO_ACSCAN_480M 74 +#define IMX93_CLK_ML_APB 75 +#define IMX93_CLK_ML 76 +#define IMX93_CLK_MEDIA_AXI 77 +#define IMX93_CLK_MEDIA_APB 78 +#define IMX93_CLK_MEDIA_LDB 79 +#define IMX93_CLK_MEDIA_DISP_PIX 80 +#define IMX93_CLK_CAM_PIX 81 +#define IMX93_CLK_MIPI_TEST_BYTE 82 +#define IMX93_CLK_MIPI_PHY_CFG 83 +#define IMX93_CLK_ADC 84 +#define IMX93_CLK_PDM 85 +#define IMX93_CLK_TSTMR1 86 +#define IMX93_CLK_TSTMR2 87 +#define IMX93_CLK_MQS1 88 +#define IMX93_CLK_MQS2 89 +#define IMX93_CLK_AUDIO_XCVR 90 +#define IMX93_CLK_SPDIF 91 +#define IMX93_CLK_ENET 92 +#define IMX93_CLK_ENET_TIMER1 93 +#define IMX93_CLK_ENET_TIMER2 94 +#define IMX93_CLK_ENET_REF 95 +#define IMX93_CLK_ENET_REF_PHY 96 +#define IMX93_CLK_I3C1_SLOW 97 +#define IMX93_CLK_I3C2_SLOW 98 +#define IMX93_CLK_USB_PHY_BURUNIN 99 +#define IMX93_CLK_PAL_CAME_SCAN 100 +#define IMX93_CLK_A55_GATE 101 +#define IMX93_CLK_CM33_GATE 102 +#define IMX93_CLK_ADC1_GATE 103 +#define IMX93_CLK_WDOG1_GATE 104 +#define IMX93_CLK_WDOG2_GATE 105 +#define IMX93_CLK_WDOG3_GATE 106 +#define IMX93_CLK_WDOG4_GATE 107 +#define IMX93_CLK_WDOG5_GATE 108 +#define IMX93_CLK_SEMA1_GATE 109 +#define IMX93_CLK_SEMA2_GATE 110 +#define IMX93_CLK_MU_A_GATE 111 +#define IMX93_CLK_MU_B_GATE 112 +#define IMX93_CLK_EDMA1_GATE 113 +#define IMX93_CLK_EDMA2_GATE 114 +#define IMX93_CLK_FLEXSPI1_GATE 115 +#define IMX93_CLK_GPIO1_GATE 116 +#define IMX93_CLK_GPIO2_GATE 117 +#define IMX93_CLK_GPIO3_GATE 118 +#define IMX93_CLK_GPIO4_GATE 119 +#define IMX93_CLK_FLEXIO1_GATE 120 +#define IMX93_CLK_FLEXIO2_GATE 121 +#define IMX93_CLK_LPIT1_GATE 122 +#define IMX93_CLK_LPIT2_GATE 123 +#define IMX93_CLK_LPTMR1_GATE 124 +#define IMX93_CLK_LPTMR2_GATE 125 +#define IMX93_CLK_TPM1_GATE 126 +#define IMX93_CLK_TPM2_GATE 127 +#define IMX93_CLK_TPM3_GATE 128 +#define IMX93_CLK_TPM4_GATE 129 +#define IMX93_CLK_TPM5_GATE 130 +#define IMX93_CLK_TPM6_GATE 131 +#define IMX93_CLK_CAN1_GATE 132 +#define IMX93_CLK_CAN2_GATE 133 +#define IMX93_CLK_LPUART1_GATE 134 +#define IMX93_CLK_LPUART2_GATE 135 +#define IMX93_CLK_LPUART3_GATE 136 +#define IMX93_CLK_LPUART4_GATE 137 +#define IMX93_CLK_LPUART5_GATE 138 +#define IMX93_CLK_LPUART6_GATE 139 +#define IMX93_CLK_LPUART7_GATE 140 +#define IMX93_CLK_LPUART8_GATE 141 +#define IMX93_CLK_LPI2C1_GATE 142 +#define IMX93_CLK_LPI2C2_GATE 143 +#define IMX93_CLK_LPI2C3_GATE 144 +#define IMX93_CLK_LPI2C4_GATE 145 +#define IMX93_CLK_LPI2C5_GATE 146 +#define IMX93_CLK_LPI2C6_GATE 147 +#define IMX93_CLK_LPI2C7_GATE 148 +#define IMX93_CLK_LPI2C8_GATE 149 +#define IMX93_CLK_LPSPI1_GATE 150 +#define IMX93_CLK_LPSPI2_GATE 151 +#define IMX93_CLK_LPSPI3_GATE 152 +#define IMX93_CLK_LPSPI4_GATE 153 +#define IMX93_CLK_LPSPI5_GATE 154 +#define IMX93_CLK_LPSPI6_GATE 155 +#define IMX93_CLK_LPSPI7_GATE 156 +#define IMX93_CLK_LPSPI8_GATE 157 +#define IMX93_CLK_I3C1_GATE 158 +#define IMX93_CLK_I3C2_GATE 159 +#define IMX93_CLK_USDHC1_GATE 160 +#define IMX93_CLK_USDHC2_GATE 161 +#define IMX93_CLK_USDHC3_GATE 162 +#define IMX93_CLK_SAI1_GATE 163 +#define IMX93_CLK_SAI2_GATE 164 +#define IMX93_CLK_SAI3_GATE 165 +#define IMX93_CLK_MIPI_CSI_GATE 166 +#define IMX93_CLK_MIPI_DSI_GATE 167 +#define IMX93_CLK_LVDS_GATE 168 +#define IMX93_CLK_LCDIF_GATE 169 +#define IMX93_CLK_PXP_GATE 170 +#define IMX93_CLK_ISI_GATE 171 +#define IMX93_CLK_NIC_MEDIA_GATE 172 +#define IMX93_CLK_USB_CONTROLLER_GATE 173 +#define IMX93_CLK_USB_TEST_60M_GATE 174 +#define IMX93_CLK_HSIO_TROUT_24M_GATE 175 +#define IMX93_CLK_PDM_GATE 176 +#define IMX93_CLK_MQS1_GATE 177 +#define IMX93_CLK_MQS2_GATE 178 +#define IMX93_CLK_AUD_XCVR_GATE 179 +#define IMX93_CLK_SPDIF_GATE 180 +#define IMX93_CLK_HSIO_32K_GATE 181 +#define IMX93_CLK_ENET1_GATE 182 +#define IMX93_CLK_ENET_QOS_GATE 183 +#define IMX93_CLK_SYS_CNT_GATE 184 +#define IMX93_CLK_TSTMR1_GATE 185 +#define IMX93_CLK_TSTMR2_GATE 186 +#define IMX93_CLK_TMC_GATE 187 +#define IMX93_CLK_PMRO_GATE 188 +#define IMX93_CLK_32K 189 +#define IMX93_CLK_END 190 + +#endif -- cgit v1.2.3 From 3a0318140a6f8c3ab60f1f46c3f203923cb01882 Mon Sep 17 00:00:00 2001 From: Changcheng Deng Date: Fri, 21 Jan 2022 01:35:08 +0000 Subject: Bluetooth: mgmt: Replace zero-length array with flexible-array member There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use "flexible array members" for these cases. The older style of one-element or zero-length arrays should no longer be used. Reference: https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays Reported-by: Zeal Robot Signed-off-by: Changcheng Deng Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/mgmt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 99266f7aebdc..3d26e6a3478b 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -1112,7 +1112,7 @@ struct mgmt_ev_adv_monitor_device_found { __s8 rssi; __le32 flags; __le16 eir_len; - __u8 eir[0]; + __u8 eir[]; } __packed; #define MGMT_EV_ADV_MONITOR_DEVICE_LOST 0x0030 -- cgit v1.2.3 From 9b392e0e0b6d026da5a62bb79a08f32e27af858e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 3 Mar 2022 13:11:57 -0800 Subject: Bluetooth: Fix not checking for valid hdev on bt_dev_{info,warn,err,dbg} This fixes attemting to print hdev->name directly which causes them to print an error: kernel: read_version:367: (efault): sock 000000006a3008f2 Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index a647e5fabdbd..2aa5e95808a5 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -204,19 +204,21 @@ void bt_err_ratelimited(const char *fmt, ...); #define BT_DBG(fmt, ...) pr_debug(fmt "\n", ##__VA_ARGS__) #endif +#define bt_dev_name(hdev) ((hdev) ? (hdev)->name : "null") + #define bt_dev_info(hdev, fmt, ...) \ - BT_INFO("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_INFO("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_warn(hdev, fmt, ...) \ - BT_WARN("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_WARN("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_err(hdev, fmt, ...) \ - BT_ERR("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_ERR("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_dbg(hdev, fmt, ...) \ - BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_DBG("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_warn_ratelimited(hdev, fmt, ...) \ - bt_warn_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + bt_warn_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_err_ratelimited(hdev, fmt, ...) \ - bt_err_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + bt_err_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) /* Connection and socket states */ enum { -- cgit v1.2.3 From 402e6688a7df482cc57be0b0dd5b443d995073fb Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 1 Mar 2022 10:01:48 +0800 Subject: iommu/vt-d: Remove intel_iommu::domains The "domains" field of the intel_iommu structure keeps the mapping of domain_id to dmar_domain. This information is not used anywhere. Remove and cleanup it to avoid unnecessary memory consumption. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220214025704.3184654-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220301020159.633356-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 5cfda90b2cca..8c7591b5f3e2 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -578,7 +578,6 @@ struct intel_iommu { #ifdef CONFIG_INTEL_IOMMU unsigned long *domain_ids; /* bitmap of domains */ - struct dmar_domain ***domains; /* ptr to domains */ spinlock_t lock; /* protect context, domain ids */ struct root_entry *root_entry; /* virtual address */ -- cgit v1.2.3 From 586081d3f6b13ec9dfdfdf3d7842a688b376fa5e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 1 Mar 2022 10:01:52 +0800 Subject: iommu/vt-d: Remove DEFER_DEVICE_DOMAIN_INFO Allocate and set the per-device iommu private data during iommu device probe. This makes the per-device iommu private data always available during iommu_probe_device() and iommu_release_device(). With this changed, the dummy DEFER_DEVICE_DOMAIN_INFO pointer could be removed. The wrappers for getting the private data and domain are also cleaned. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220214025704.3184654-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220301020159.633356-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 8c7591b5f3e2..03f1134fc2fe 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -733,8 +733,6 @@ int for_each_device_domain(int (*fn)(struct device_domain_info *info, void *data), void *data); void iommu_flush_write_buffer(struct intel_iommu *iommu); int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); -struct dmar_domain *find_domain(struct device *dev); -struct device_domain_info *get_domain_info(struct device *dev); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); #ifdef CONFIG_INTEL_IOMMU_SVM -- cgit v1.2.3 From 2852631d96a643e0b21a9b14ad38caf465d7225f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 1 Mar 2022 10:01:56 +0800 Subject: iommu/vt-d: Move intel_iommu_ops to header file Compiler is not happy about hidden declaration of intel_iommu_ops. .../iommu.c:414:24: warning: symbol 'intel_iommu_ops' was not declared. Should it be static? Move declaration to header file to make compiler happy. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220207141240.8253-1-andriy.shevchenko@linux.intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20220301020159.633356-10-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 03f1134fc2fe..4909d6c9ac21 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -783,6 +783,8 @@ bool context_present(struct context_entry *context); struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, u8 devfn, int alloc); +extern const struct iommu_ops intel_iommu_ops; + #ifdef CONFIG_INTEL_IOMMU extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); -- cgit v1.2.3 From 97f2f2c5317f55ae3440733a090a96a251da222b Mon Sep 17 00:00:00 2001 From: Yian Chen Date: Tue, 1 Mar 2022 10:01:59 +0800 Subject: iommu/vt-d: Enable ATS for the devices in SATC table Starting from Intel VT-d v3.2, Intel platform BIOS can provide additional SATC table structure. SATC table includes a list of SoC integrated devices that support ATC (Address translation cache). Enabling ATC (via ATS capability) can be a functional requirement for SATC device operation or optional to enhance device performance/functionality. This is determined by the bit of ATC_REQUIRED in SATC table. When IOMMU is working in scalable mode, software chooses to always enable ATS for every device in SATC table because Intel SoC devices in SATC table are trusted to use ATS. On the other hand, if IOMMU is in legacy mode, ATS of SATC capable devices can work transparently to software and be automatically enabled by IOMMU hardware. As the result, there is no need for software to enable ATS on these devices. This also removes dmar_find_matched_atsr_unit() helper as it becomes dead code now. Signed-off-by: Yian Chen Link: https://lore.kernel.org/r/20220222185416.1722611-1-yian.chen@intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20220301020159.633356-13-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4909d6c9ac21..2f9891cb3d00 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -693,7 +693,6 @@ static inline int nr_pte_to_next_page(struct dma_pte *pte) } extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); -extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); extern int dmar_enable_qi(struct intel_iommu *iommu); extern void dmar_disable_qi(struct intel_iommu *iommu); -- cgit v1.2.3 From 26c9da51949916b73d995a2c89412c346903273d Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Wed, 16 Feb 2022 13:42:23 +0530 Subject: remoteproc: Introduce sysfs_read_only flag The remoteproc framework provides sysfs interfaces for changing the firmware name and for starting/stopping a remote processor through the sysfs files 'state' and 'firmware'. The 'coredump' file is used to set the coredump configuration. The 'recovery' sysfs file can also be used similarly to control the error recovery state machine of a remoteproc. These interfaces are currently allowed irrespective of how the remoteprocs were booted (like remoteproc self auto-boot, remoteproc client-driven boot etc). These interfaces can adversely affect a remoteproc and its clients especially when a remoteproc is being controlled by a remoteproc client driver(s). Also, not all remoteproc drivers may want to support the sysfs interfaces by default. Add support to make the remoteproc sysfs files read only by introducing a state flag 'sysfs_read_only' that the individual remoteproc drivers can set based on their usage needs. The default behavior is to allow the sysfs operations as before. Implement attribute_group->is_visible() to make the sysfs entries read only when 'sysfs_read_only' flag is set. Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20220216081224.9956-2-p-mohan@ti.com Signed-off-by: Mathieu Poirier --- include/linux/remoteproc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index e0600e1e5c17..93a1d0050fbc 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -523,6 +523,7 @@ struct rproc_dump_segment { * @table_sz: size of @cached_table * @has_iommu: flag to indicate if remote processor is behind an MMU * @auto_boot: flag to indicate if remote processor should be auto-started + * @sysfs_read_only: flag to make remoteproc sysfs files read only * @dump_segments: list of segments in the firmware * @nb_vdev: number of vdev currently handled by rproc * @elf_class: firmware ELF class @@ -562,6 +563,7 @@ struct rproc { size_t table_sz; bool has_iommu; bool auto_boot; + bool sysfs_read_only; struct list_head dump_segments; int nb_vdev; u8 elf_class; -- cgit v1.2.3 From e0077cc13b831f8fad5557442f73bf7728683713 Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Fri, 14 Jan 2022 19:27:59 -0500 Subject: vdpa: factor out vdpa_set_features_unlocked for vdpa internal use No functional change introduced. vdpa bus driver such as virtio_vdpa or vhost_vdpa is not supposed to take care of the locking for core by its own. The locked API vdpa_set_features should suffice the bus driver's need. Signed-off-by: Si-Wei Liu Reviewed-by: Eli Cohen Link: https://lore.kernel.org/r/1642206481-30721-2-git-send-email-si-wei.liu@oracle.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/linux/vdpa.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 2de442ececae..721089bb4c84 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -401,18 +401,24 @@ static inline int vdpa_reset(struct vdpa_device *vdev) return ret; } -static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features, bool locked) +static inline int vdpa_set_features_unlocked(struct vdpa_device *vdev, u64 features) { const struct vdpa_config_ops *ops = vdev->config; int ret; - if (!locked) - mutex_lock(&vdev->cf_mutex); - vdev->features_valid = true; ret = ops->set_driver_features(vdev, features); - if (!locked) - mutex_unlock(&vdev->cf_mutex); + + return ret; +} + +static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features) +{ + int ret; + + mutex_lock(&vdev->cf_mutex); + ret = vdpa_set_features_unlocked(vdev, features); + mutex_unlock(&vdev->cf_mutex); return ret; } -- cgit v1.2.3 From b4060db9251f919506e4d672737c6b8ab9a84701 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 23 Feb 2022 08:34:48 -0800 Subject: PM: runtime: Have devm_pm_runtime_enable() handle pm_runtime_dont_use_autosuspend() The PM Runtime docs say: Drivers in ->remove() callback should undo the runtime PM changes done in ->probe(). Usually this means calling pm_runtime_disable(), pm_runtime_dont_use_autosuspend() etc. From grepping code, it's clear that many people aren't aware of the need to call pm_runtime_dont_use_autosuspend(). When brainstorming solutions, one idea that came up was to leverage the new-ish devm_pm_runtime_enable() function. The idea here is that: * When the devm action is called we know that the driver is being removed. It's the perfect time to undo the use_autosuspend. * The code of pm_runtime_dont_use_autosuspend() already handles the case of being called when autosuspend wasn't enabled. Suggested-by: Laurent Pinchart Signed-off-by: Douglas Anderson Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 9f09601c465a..2bff6a10095d 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -567,6 +567,10 @@ static inline void pm_runtime_disable(struct device *dev) * Allow the runtime PM autosuspend mechanism to be used for @dev whenever * requested (or "autosuspend" will be handled as direct runtime-suspend for * it). + * + * NOTE: It's important to undo this with pm_runtime_dont_use_autosuspend() + * at driver exit time unless your driver initially enabled pm_runtime + * with devm_pm_runtime_enable() (which handles it for you). */ static inline void pm_runtime_use_autosuspend(struct device *dev) { -- cgit v1.2.3 From f7ddbf5581b474fe4a0a29244acaa1bf72234675 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 3 Mar 2022 16:52:15 -0800 Subject: drm/msm: Add SET_PARAM ioctl It was always expected to have a use for this some day, so we left a placeholder. Now we do. (And I expect another use in the not too distant future when we start allowing userspace to allocate GPU iova.) Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20220304005317.776110-3-robdclark@gmail.com --- include/uapi/drm/msm_drm.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 6b8fffc28a50..cf5de53836e7 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -67,16 +67,20 @@ struct drm_msm_timespec { __s64 tv_nsec; /* nanoseconds */ }; -#define MSM_PARAM_GPU_ID 0x01 -#define MSM_PARAM_GMEM_SIZE 0x02 -#define MSM_PARAM_CHIP_ID 0x03 -#define MSM_PARAM_MAX_FREQ 0x04 -#define MSM_PARAM_TIMESTAMP 0x05 -#define MSM_PARAM_GMEM_BASE 0x06 -#define MSM_PARAM_PRIORITIES 0x07 /* The # of priority levels */ -#define MSM_PARAM_PP_PGTABLE 0x08 /* => 1 for per-process pagetables, else 0 */ -#define MSM_PARAM_FAULTS 0x09 -#define MSM_PARAM_SUSPENDS 0x0a +/* Below "RO" indicates a read-only param, "WO" indicates write-only, and + * "RW" indicates a param that can be both read (GET_PARAM) and written + * (SET_PARAM) + */ +#define MSM_PARAM_GPU_ID 0x01 /* RO */ +#define MSM_PARAM_GMEM_SIZE 0x02 /* RO */ +#define MSM_PARAM_CHIP_ID 0x03 /* RO */ +#define MSM_PARAM_MAX_FREQ 0x04 /* RO */ +#define MSM_PARAM_TIMESTAMP 0x05 /* RO */ +#define MSM_PARAM_GMEM_BASE 0x06 /* RO */ +#define MSM_PARAM_PRIORITIES 0x07 /* RO: The # of priority levels */ +#define MSM_PARAM_PP_PGTABLE 0x08 /* RO: Deprecated, always returns zero */ +#define MSM_PARAM_FAULTS 0x09 /* RO */ +#define MSM_PARAM_SUSPENDS 0x0a /* RO */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # @@ -333,9 +337,7 @@ struct drm_msm_submitqueue_query { }; #define DRM_MSM_GET_PARAM 0x00 -/* placeholder: #define DRM_MSM_SET_PARAM 0x01 - */ #define DRM_MSM_GEM_NEW 0x02 #define DRM_MSM_GEM_INFO 0x03 #define DRM_MSM_GEM_CPU_PREP 0x04 @@ -351,6 +353,7 @@ struct drm_msm_submitqueue_query { #define DRM_MSM_SUBMITQUEUE_QUERY 0x0C #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param) +#define DRM_IOCTL_MSM_SET_PARAM DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SET_PARAM, struct drm_msm_param) #define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new) #define DRM_IOCTL_MSM_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_INFO, struct drm_msm_gem_info) #define DRM_IOCTL_MSM_GEM_CPU_PREP DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_PREP, struct drm_msm_gem_cpu_prep) -- cgit v1.2.3 From 90f45c42d7d7b0ec0fd797485c07fc421c474e12 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 3 Mar 2022 16:52:16 -0800 Subject: drm/msm: Add SYSPROF param (v2) Add a SYSPROF param for system profiling tools like Mesa's pps-producer (perfetto) to control behavior related to system-wide performance counter collection. In particular, for profiling, one wants to ensure that GPU context switches do not effect perfcounter state, and might want to suppress suspend (which would cause counters to lose state). v2: Swap the order in msm_file_private_set_sysprof() [sboyd] and initialize the sysprof_active refcount to one (because the under/ overflow checking in refcount_t doesn't expect a 0->1 transition) meaning that values greater than 1 means sysprof is active. Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20220304005317.776110-4-robdclark@gmail.com --- include/uapi/drm/msm_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index cf5de53836e7..2ee03ba08681 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -81,6 +81,7 @@ struct drm_msm_timespec { #define MSM_PARAM_PP_PGTABLE 0x08 /* RO: Deprecated, always returns zero */ #define MSM_PARAM_FAULTS 0x09 /* RO */ #define MSM_PARAM_SUSPENDS 0x0a /* RO */ +#define MSM_PARAM_SYSPROF 0x0b /* WO: 1 preserves perfcntrs, 2 also disables suspend */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # -- cgit v1.2.3 From 92c45b63ce22c8898aa41806e8d6692bcd577510 Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Thu, 6 Aug 2020 16:14:55 +1200 Subject: PCI: Reduce warnings on possible RW1C corruption For hardware that only supports 32-bit writes to PCI there is the possibility of clearing RW1C (write-one-to-clear) bits. A rate-limited messages was introduced by fb2659230120, but rate-limiting is not the best choice here. Some devices may not show the warnings they should if another device has just produced a bunch of warnings. Also, the number of messages can be a nuisance on devices which are otherwise working fine. Change the ratelimit to a single warning per bus. This ensures no bus is 'starved' of emitting a warning and also that there isn't a continuous stream of warnings. It would be preferable to have a warning per device, but the pci_dev structure is not available here, and a lookup from devfn would be far too slow. Suggested-by: Bjorn Helgaas Fixes: fb2659230120 ("PCI: Warn on possible RW1C corruption for sub-32 bit config writes") Link: https://lore.kernel.org/r/20200806041455.11070-1-mark.tomlinson@alliedtelesis.co.nz Signed-off-by: Mark Tomlinson Signed-off-by: Bjorn Helgaas Reviewed-by: Florian Fainelli Reviewed-by: Rob Herring Acked-by: Scott Branden --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 8253a5413d7c..678fecdf6b81 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -668,6 +668,7 @@ struct pci_bus { struct bin_attribute *legacy_io; /* Legacy I/O for this bus */ struct bin_attribute *legacy_mem; /* Legacy mem */ unsigned int is_added:1; + unsigned int unsafe_warn:1; /* warned about RW1C config write */ }; #define to_pci_bus(n) container_of(n, struct pci_bus, dev) -- cgit v1.2.3 From 17154addc5c1a175bcf3441ff0d9598efa1f05cd Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 24 Feb 2022 14:23:21 -0800 Subject: drm/msm: Add MSM_SUBMIT_FENCE_SN_IN Add a way for userspace to specify the sequence number fence used to track completion of the submit. As the seqno fence is simply an incrementing counter which is local to the submitqueue, it is easy for userspace to know the next value. This is useful for native userspace drivers in a vm guest, as the guest to host roundtrip can have high latency. Assigning the fence seqno in the guest userspace allows the guest to continue without waiting for response from the host. Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20220224222321.60653-1-robdclark@gmail.com --- include/uapi/drm/msm_drm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 2ee03ba08681..07efc8033492 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -232,6 +232,7 @@ struct drm_msm_gem_submit_bo { #define MSM_SUBMIT_SUDO 0x10000000 /* run submitted cmds from RB */ #define MSM_SUBMIT_SYNCOBJ_IN 0x08000000 /* enable input syncobj */ #define MSM_SUBMIT_SYNCOBJ_OUT 0x04000000 /* enable output syncobj */ +#define MSM_SUBMIT_FENCE_SN_IN 0x02000000 /* userspace passes in seqno fence */ #define MSM_SUBMIT_FLAGS ( \ MSM_SUBMIT_NO_IMPLICIT | \ MSM_SUBMIT_FENCE_FD_IN | \ @@ -239,6 +240,7 @@ struct drm_msm_gem_submit_bo { MSM_SUBMIT_SUDO | \ MSM_SUBMIT_SYNCOBJ_IN | \ MSM_SUBMIT_SYNCOBJ_OUT | \ + MSM_SUBMIT_FENCE_SN_IN | \ 0) #define MSM_SUBMIT_SYNCOBJ_RESET 0x00000001 /* Reset syncobj after wait. */ @@ -258,7 +260,7 @@ struct drm_msm_gem_submit_syncobj { */ struct drm_msm_gem_submit { __u32 flags; /* MSM_PIPE_x | MSM_SUBMIT_x */ - __u32 fence; /* out */ + __u32 fence; /* out (or in with MSM_SUBMIT_FENCE_SN_IN flag) */ __u32 nr_bos; /* in, number of submit_bo's */ __u32 nr_cmds; /* in, number of submit_cmd's */ __u64 bos; /* in, ptr to array of submit_bo's */ -- cgit v1.2.3 From cd87fecdedd7b54c61f5d7c2b7237b43126ac50a Mon Sep 17 00:00:00 2001 From: Luiz Angelo Daros de Luca Date: Wed, 2 Mar 2022 22:52:34 -0300 Subject: net: dsa: tag_rtl8_4: add rtl8_4t trailing variant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Realtek switches supports the same tag both before ethertype or between payload and the CRC. Signed-off-by: Luiz Angelo Daros de Luca Reviewed-by: Alvin Šipraga Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 71cc363dbbd4..759479fe8573 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -52,6 +52,7 @@ struct phylink_link_state; #define DSA_TAG_PROTO_BRCM_LEGACY_VALUE 22 #define DSA_TAG_PROTO_SJA1110_VALUE 23 #define DSA_TAG_PROTO_RTL8_4_VALUE 24 +#define DSA_TAG_PROTO_RTL8_4T_VALUE 25 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -79,6 +80,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_SEVILLE = DSA_TAG_PROTO_SEVILLE_VALUE, DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE, DSA_TAG_PROTO_RTL8_4 = DSA_TAG_PROTO_RTL8_4_VALUE, + DSA_TAG_PROTO_RTL8_4T = DSA_TAG_PROTO_RTL8_4T_VALUE, }; struct dsa_switch; -- cgit v1.2.3 From 5c26f6ac9416b63d093e29c30e79b3297e425472 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 4 Mar 2022 20:28:51 -0800 Subject: mm: refactor vm_area_struct::anon_vma_name usage code Avoid mixing strings and their anon_vma_name referenced pointers by using struct anon_vma_name whenever possible. This simplifies the code and allows easier sharing of anon_vma_name structures when they represent the same name. [surenb@google.com: fix comment] Link: https://lkml.kernel.org/r/20220223153613.835563-1-surenb@google.com Link: https://lkml.kernel.org/r/20220224231834.1481408-1-surenb@google.com Signed-off-by: Suren Baghdasaryan Suggested-by: Matthew Wilcox Suggested-by: Michal Hocko Acked-by: Michal Hocko Cc: Colin Cross Cc: Sumit Semwal Cc: Dave Hansen Cc: Kees Cook Cc: "Kirill A. Shutemov" Cc: Vlastimil Babka Cc: Johannes Weiner Cc: "Eric W. Biederman" Cc: Christian Brauner Cc: Alexey Gladkov Cc: Sasha Levin Cc: Chris Hyser Cc: Davidlohr Bueso Cc: Peter Collingbourne Cc: Xiaofeng Cao Cc: David Hildenbrand Cc: Cyrill Gorcunov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 ++-- include/linux/mm_inline.h | 87 ++++++++++++++++++++++++++++++++--------------- include/linux/mm_types.h | 5 ++- 3 files changed, 67 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 213cc569b192..5744a3fc4716 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2626,7 +2626,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, - struct mempolicy *, struct vm_userfaultfd_ctx, const char *); + struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int __split_vma(struct mm_struct *, struct vm_area_struct *, unsigned long addr, int new_below); @@ -3372,11 +3372,12 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) #ifdef CONFIG_ANON_VMA_NAME int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, - unsigned long len_in, const char *name); + unsigned long len_in, + struct anon_vma_name *anon_name); #else static inline int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, - unsigned long len_in, const char *name) { + unsigned long len_in, struct anon_vma_name *anon_name) { return 0; } #endif diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index b725839dfe71..dd3accaa4e6d 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -140,50 +140,81 @@ static __always_inline void del_page_from_lru_list(struct page *page, #ifdef CONFIG_ANON_VMA_NAME /* - * mmap_lock should be read-locked when calling vma_anon_name() and while using - * the returned pointer. + * mmap_lock should be read-locked when calling anon_vma_name(). Caller should + * either keep holding the lock while using the returned pointer or it should + * raise anon_vma_name refcount before releasing the lock. */ -extern const char *vma_anon_name(struct vm_area_struct *vma); +extern struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma); +extern struct anon_vma_name *anon_vma_name_alloc(const char *name); +extern void anon_vma_name_free(struct kref *kref); -/* - * mmap_lock should be read-locked for orig_vma->vm_mm. - * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be - * isolated. - */ -extern void dup_vma_anon_name(struct vm_area_struct *orig_vma, - struct vm_area_struct *new_vma); +/* mmap_lock should be read-locked */ +static inline void anon_vma_name_get(struct anon_vma_name *anon_name) +{ + if (anon_name) + kref_get(&anon_name->kref); +} -/* - * mmap_lock should be write-locked or vma should have been isolated under - * write-locked mmap_lock protection. - */ -extern void free_vma_anon_name(struct vm_area_struct *vma); +static inline void anon_vma_name_put(struct anon_vma_name *anon_name) +{ + if (anon_name) + kref_put(&anon_name->kref, anon_vma_name_free); +} -/* mmap_lock should be read-locked */ -static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, - const char *name) +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) +{ + struct anon_vma_name *anon_name = anon_vma_name(orig_vma); + + if (anon_name) { + anon_vma_name_get(anon_name); + new_vma->anon_name = anon_name; + } +} + +static inline void free_anon_vma_name(struct vm_area_struct *vma) { - const char *vma_name = vma_anon_name(vma); + /* + * Not using anon_vma_name because it generates a warning if mmap_lock + * is not held, which might be the case here. + */ + if (!vma->vm_file) + anon_vma_name_put(vma->anon_name); +} - /* either both NULL, or pointers to same string */ - if (vma_name == name) +static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, + struct anon_vma_name *anon_name2) +{ + if (anon_name1 == anon_name2) return true; - return name && vma_name && !strcmp(name, vma_name); + return anon_name1 && anon_name2 && + !strcmp(anon_name1->name, anon_name2->name); } + #else /* CONFIG_ANON_VMA_NAME */ -static inline const char *vma_anon_name(struct vm_area_struct *vma) +static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) { return NULL; } -static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma, - struct vm_area_struct *new_vma) {} -static inline void free_vma_anon_name(struct vm_area_struct *vma) {} -static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, - const char *name) + +static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) +{ + return NULL; +} + +static inline void anon_vma_name_get(struct anon_vma_name *anon_name) {} +static inline void anon_vma_name_put(struct anon_vma_name *anon_name) {} +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) {} +static inline void free_anon_vma_name(struct vm_area_struct *vma) {} + +static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, + struct anon_vma_name *anon_name2) { return true; } + #endif /* CONFIG_ANON_VMA_NAME */ static inline void init_tlb_flush_pending(struct mm_struct *mm) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5140e5feb486..0f549870da6a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -416,7 +416,10 @@ struct vm_area_struct { struct rb_node rb; unsigned long rb_subtree_last; } shared; - /* Serialized by mmap_sem. */ + /* + * Serialized by mmap_sem. Never use directly because it is + * valid only when vm_file is NULL. Use anon_vma_name instead. + */ struct anon_vma_name *anon_name; }; -- cgit v1.2.3 From 96403e11283def1d1c465c8279514c9a504d8630 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 4 Mar 2022 20:28:55 -0800 Subject: mm: prevent vm_area_struct::anon_name refcount saturation A deep process chain with many vmas could grow really high. With default sysctl_max_map_count (64k) and default pid_max (32k) the max number of vmas in the system is 2147450880 and the refcounter has headroom of 1073774592 before it reaches REFCOUNT_SATURATED (3221225472). Therefore it's unlikely that an anonymous name refcounter will overflow with these defaults. Currently the max for pid_max is PID_MAX_LIMIT (4194304) and for sysctl_max_map_count it's INT_MAX (2147483647). In this configuration anon_vma_name refcount overflow becomes theoretically possible (that still require heavy sharing of that anon_vma_name between processes). kref refcounting interface used in anon_vma_name structure will detect a counter overflow when it reaches REFCOUNT_SATURATED value but will only generate a warning and freeze the ref counter. This would lead to the refcounted object never being freed. A determined attacker could leak memory like that but it would be rather expensive and inefficient way to do so. To ensure anon_vma_name refcount does not overflow, stop anon_vma_name sharing when the refcount reaches REFCOUNT_MAX (2147483647), which still leaves INT_MAX/2 (1073741823) values before the counter reaches REFCOUNT_SATURATED. This should provide enough headroom for raising the refcounts temporarily. Link: https://lkml.kernel.org/r/20220223153613.835563-2-surenb@google.com Signed-off-by: Suren Baghdasaryan Suggested-by: Michal Hocko Acked-by: Michal Hocko Cc: Alexey Gladkov Cc: Chris Hyser Cc: Christian Brauner Cc: Colin Cross Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: "Eric W. Biederman" Cc: Johannes Weiner Cc: Kees Cook Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Peter Collingbourne Cc: Sasha Levin Cc: Sumit Semwal Cc: Vlastimil Babka Cc: Xiaofeng Cao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index dd3accaa4e6d..cf90b1fa2c60 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -161,15 +161,25 @@ static inline void anon_vma_name_put(struct anon_vma_name *anon_name) kref_put(&anon_name->kref, anon_vma_name_free); } +static inline +struct anon_vma_name *anon_vma_name_reuse(struct anon_vma_name *anon_name) +{ + /* Prevent anon_name refcount saturation early on */ + if (kref_read(&anon_name->kref) < REFCOUNT_MAX) { + anon_vma_name_get(anon_name); + return anon_name; + + } + return anon_vma_name_alloc(anon_name->name); +} + static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, struct vm_area_struct *new_vma) { struct anon_vma_name *anon_name = anon_vma_name(orig_vma); - if (anon_name) { - anon_vma_name_get(anon_name); - new_vma->anon_name = anon_name; - } + if (anon_name) + new_vma->anon_name = anon_vma_name_reuse(anon_name); } static inline void free_anon_vma_name(struct vm_area_struct *vma) -- cgit v1.2.3 From 25b35dd28138f61f9a0fb8b76c0483761fd228bd Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 5 Mar 2022 04:16:38 +0530 Subject: bpf: Add check_func_arg_reg_off function Lift the list of register types allowed for having fixed and variable offsets when passed as helper function arguments into a common helper, so that they can be reused for kfunc checks in later commits. Keeping a common helper aids maintainability and allows us to follow the same consistent rules across helpers and kfuncs. Also, convert check_func_arg to use this function. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220304224645.3677453-2-memxor@gmail.com --- include/linux/bpf_verifier.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7a7be8c057f2..38b24ee8d8c2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -521,6 +521,9 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); int check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno); +int check_func_arg_reg_off(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno, + enum bpf_arg_type arg_type); int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, -- cgit v1.2.3 From 24d5bb806c7e2c0b9972564fd493069f612d90dd Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 5 Mar 2022 04:16:41 +0530 Subject: bpf: Harden register offset checks for release helpers and kfuncs Let's ensure that the PTR_TO_BTF_ID reg being passed in to release BPF helpers and kfuncs always has its offset set to 0. While not a real problem now, there's a very real possibility this will become a problem when more and more kfuncs are exposed, and more BPF helpers are added which can release PTR_TO_BTF_ID. Previous commits already protected against non-zero var_off. One of the case we are concerned about now is when we have a type that can be returned by e.g. an acquire kfunc: struct foo { int a; int b; struct bar b; }; ... and struct bar is also a type that can be returned by another acquire kfunc. Then, doing the following sequence: struct foo *f = bpf_get_foo(); // acquire kfunc if (!f) return 0; bpf_put_bar(&f->b); // release kfunc ... would work with the current code, since the btf_struct_ids_match takes reg->off into account for matching pointer type with release kfunc argument type, but would obviously be incorrect, and most likely lead to a kernel crash. A test has been included later to prevent regressions in this area. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220304224645.3677453-5-memxor@gmail.com --- include/linux/bpf_verifier.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 38b24ee8d8c2..c1fc4af47f69 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -523,7 +523,8 @@ int check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno); int check_func_arg_reg_off(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, - enum bpf_arg_type arg_type); + enum bpf_arg_type arg_type, + bool is_release_func); int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, -- cgit v1.2.3 From f014a00bbeb09cea16017b82448d32a468a6b96f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 5 Mar 2022 04:16:42 +0530 Subject: compiler-clang.h: Add __diag infrastructure for clang Add __diag macros similar to those in compiler-gcc.h, so that warnings that need to be adjusted for specific cases but not globally can be ignored when building with clang. Signed-off-by: Nathan Chancellor Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220304224645.3677453-6-memxor@gmail.com [ Kartikeya: wrote commit message ] --- include/linux/compiler-clang.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 3c4de9b6c6e3..f1aa41d520bd 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -68,3 +68,25 @@ #define __nocfi __attribute__((__no_sanitize__("cfi"))) #define __cficanonical __attribute__((__cfi_canonical_jump_table__)) + +/* + * Turn individual warnings and errors on and off locally, depending + * on version. + */ +#define __diag_clang(version, severity, s) \ + __diag_clang_ ## version(__diag_clang_ ## severity s) + +/* Severity used in pragma directives */ +#define __diag_clang_ignore ignored +#define __diag_clang_warn warning +#define __diag_clang_error error + +#define __diag_str1(s) #s +#define __diag_str(s) __diag_str1(s) +#define __diag(s) _Pragma(__diag_str(clang diagnostic s)) + +#if CONFIG_CLANG_VERSION >= 110000 +#define __diag_clang_11(s) __diag(s) +#else +#define __diag_clang_11(s) +#endif -- cgit v1.2.3 From 4d1ea705d797e66edd70ffa708b83888a210a437 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 5 Mar 2022 04:16:43 +0530 Subject: compiler_types.h: Add unified __diag_ignore_all for GCC/LLVM Add a __diag_ignore_all macro, to ignore warnings for both GCC and LLVM, without having to specify the compiler type and version. By default, GCC 8 and clang 11 are used. This will be used by bpf subsystem to ignore -Wmissing-prototypes warning for functions that are meant to be global functions so that they are in vmlinux BTF, but don't have a prototype. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220304224645.3677453-7-memxor@gmail.com --- include/linux/compiler-clang.h | 3 +++ include/linux/compiler-gcc.h | 3 +++ include/linux/compiler_types.h | 4 ++++ 3 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index f1aa41d520bd..babb1347148c 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -90,3 +90,6 @@ #else #define __diag_clang_11(s) #endif + +#define __diag_ignore_all(option, comment) \ + __diag_clang(11, ignore, option) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index ccbbd31b3aae..d364c98a4a80 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -151,6 +151,9 @@ #define __diag_GCC_8(s) #endif +#define __diag_ignore_all(option, comment) \ + __diag_GCC(8, ignore, option) + /* * Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size" * attribute) do not work, and must be disabled. diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 3f31ff400432..8e5d2f50f951 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -371,4 +371,8 @@ struct ftrace_likely_data { #define __diag_error(compiler, version, option, comment) \ __diag_ ## compiler(version, error, option) +#ifndef __diag_ignore_all +#define __diag_ignore_all(option, comment) +#endif + #endif /* __LINUX_COMPILER_TYPES_H */ -- cgit v1.2.3 From 9216c916237805c93d054ed022afb172ddbc3ed1 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Fri, 4 Mar 2022 11:16:55 -0800 Subject: compiler_types: Define __percpu as __attribute__((btf_type_tag("percpu"))) This is similar to commit 7472d5a642c9 ("compiler_types: define __user as __attribute__((btf_type_tag("user")))"), where a type tag "user" was introduced to identify the pointers that point to user memory. With that change, the newest compile toolchain can encode __user information into vmlinux BTF, which can be used by the BPF verifier to enforce safe program behaviors. Similarly, we have __percpu attribute, which is mainly used to indicate memory is allocated in percpu region. The __percpu pointers in kernel are supposed to be used together with functions like per_cpu_ptr() and this_cpu_ptr(), which perform necessary calculation on the pointer's base address. Without the btf_type_tag introduced in this patch, __percpu pointers will be treated as regular memory pointers in vmlinux BTF and BPF programs are allowed to directly dereference them, generating incorrect behaviors. Now with "percpu" btf_type_tag, the BPF verifier is able to differentiate __percpu pointers from regular pointers and forbids unexpected behaviors like direct load. The following is an example similar to the one given in commit 7472d5a642c9: [$ ~] cat test.c #define __percpu __attribute__((btf_type_tag("percpu"))) int foo(int __percpu *arg) { return *arg; } [$ ~] clang -O2 -g -c test.c [$ ~] pahole -JV test.o ... File test.o: [1] INT int size=4 nr_bits=32 encoding=SIGNED [2] TYPE_TAG percpu type_id=1 [3] PTR (anon) type_id=2 [4] FUNC_PROTO (anon) return=1 args=(3 arg) [5] FUNC foo type_id=4 [$ ~] for the function argument "int __percpu *arg", its type is described as PTR -> TYPE_TAG(percpu) -> INT The kernel can use this information for bpf verification or other use cases. Like commit 7472d5a642c9, this feature requires clang (>= clang14) and pahole (>= 1.23). Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220304191657.981240-3-haoluo@google.com --- include/linux/compiler_types.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 8e5d2f50f951..b9a8ae9440c7 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -38,7 +38,12 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __user # endif # define __iomem -# define __percpu +# if defined(CONFIG_DEBUG_INFO_BTF) && defined(CONFIG_PAHOLE_HAS_BTF_TAG) && \ + __has_attribute(btf_type_tag) +# define __percpu __attribute__((btf_type_tag("percpu"))) +# else +# define __percpu +# endif # define __rcu # define __chk_user_ptr(x) (void)0 # define __chk_io_ptr(x) (void)0 -- cgit v1.2.3 From 5844101a1be9b8636024cb31c865ef13c7cc6db3 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Fri, 4 Mar 2022 11:16:56 -0800 Subject: bpf: Reject programs that try to load __percpu memory. With the introduction of the btf_type_tag "percpu", we can add a MEM_PERCPU to identify those pointers that point to percpu memory. The ability of differetiating percpu pointers from regular memory pointers have two benefits: 1. It forbids unexpected use of percpu pointers, such as direct loads. In kernel, there are special functions used for accessing percpu memory. Directly loading percpu memory is meaningless. We already have BPF helpers like bpf_per_cpu_ptr() and bpf_this_cpu_ptr() that wrap the kernel percpu functions. So we can now convert percpu pointers into regular pointers in a safe way. 2. Previously, bpf_per_cpu_ptr() and bpf_this_cpu_ptr() only work on PTR_TO_PERCPU_BTF_ID, a special reg_type which describes static percpu variables in kernel (we rely on pahole to encode them into vmlinux BTF). Now, since we can identify __percpu tagged pointers, we can also identify dynamically allocated percpu memory as well. It means we can use bpf_xxx_cpu_ptr() on dynamic percpu memory. This would be very convenient when accessing fields like "cgroup->rstat_cpu". Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220304191657.981240-4-haoluo@google.com --- include/linux/bpf.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f19abc59b6cd..88449fbbe063 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -334,7 +334,15 @@ enum bpf_type_flag { /* MEM is in user address space. */ MEM_USER = BIT(3 + BPF_BASE_TYPE_BITS), - __BPF_TYPE_LAST_FLAG = MEM_USER, + /* MEM is a percpu memory. MEM_PERCPU tags PTR_TO_BTF_ID. When tagged + * with MEM_PERCPU, PTR_TO_BTF_ID _cannot_ be directly accessed. In + * order to drop this tag, it must be passed into bpf_per_cpu_ptr() + * or bpf_this_cpu_ptr(), which will return the pointer corresponding + * to the specified cpu. + */ + MEM_PERCPU = BIT(4 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_LAST_FLAG = MEM_PERCPU, }; /* Max number of base types. */ @@ -516,7 +524,6 @@ enum bpf_reg_type { */ PTR_TO_MEM, /* reg points to valid memory region */ PTR_TO_BUF, /* reg points to a read/write buffer */ - PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ PTR_TO_FUNC, /* reg points to a bpf program function */ __BPF_REG_TYPE_MAX, -- cgit v1.2.3 From 736f16de75f9bb32d76f652cb66f04d1bc685057 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Fri, 4 Mar 2022 06:55:05 -0800 Subject: net: tap: track dropped skb via kfree_skb_reason() The TAP can be used as vhost-net backend. E.g., the tap_handle_frame() is the interface to forward the skb from TAP to vhost-net/virtio-net. However, there are many "goto drop" in the TAP driver. Therefore, the kfree_skb_reason() is involved at each "goto drop" to help userspace ftrace/ebpf to track the reason for the loss of packets. The below reasons are introduced: - SKB_DROP_REASON_SKB_CSUM - SKB_DROP_REASON_SKB_GSO_SEG - SKB_DROP_REASON_SKB_UCOPY_FAULT - SKB_DROP_REASON_DEV_HDR - SKB_DROP_REASON_FULL_RING Cc: Joao Martins Cc: Joe Jin Signed-off-by: Dongli Zhang Signed-off-by: David S. Miller --- include/linux/skbuff.h | 13 +++++++++++++ include/trace/events/skb.h | 5 +++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2be263184d1e..67cfff4065b6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -412,6 +412,19 @@ enum skb_drop_reason { * this means that L3 protocol is * not supported */ + SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation + * error + */ + SKB_DROP_REASON_SKB_GSO_SEG, /* gso segmentation error */ + SKB_DROP_REASON_SKB_UCOPY_FAULT, /* failed to copy data from + * user space, e.g., via + * zerocopy_sg_from_iter() + * or skb_orphan_frags_rx() + */ + SKB_DROP_REASON_DEV_HDR, /* device driver specific + * header/metadata is invalid + */ + SKB_DROP_REASON_FULL_RING, /* ring buffer is full */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index c0769d943f8e..240e7e7591fc 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -51,6 +51,11 @@ EM(SKB_DROP_REASON_XDP, XDP) \ EM(SKB_DROP_REASON_TC_INGRESS, TC_INGRESS) \ EM(SKB_DROP_REASON_PTYPE_ABSENT, PTYPE_ABSENT) \ + EM(SKB_DROP_REASON_SKB_CSUM, SKB_CSUM) \ + EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG) \ + EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT) \ + EM(SKB_DROP_REASON_DEV_HDR, DEV_HDR) \ + EM(SKB_DROP_REASON_FULL_RING, FULL_RING) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 4b4f052e2d89c2eb7e13ee28ba9e85f8097aef3d Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Fri, 4 Mar 2022 06:55:07 -0800 Subject: net: tun: track dropped skb via kfree_skb_reason() The TUN can be used as vhost-net backend. E.g, the tun_net_xmit() is the interface to forward the skb from TUN to vhost-net/virtio-net. However, there are many "goto drop" in the TUN driver. Therefore, the kfree_skb_reason() is involved at each "goto drop" to help userspace ftrace/ebpf to track the reason for the loss of packets. The below reasons are introduced: - SKB_DROP_REASON_DEV_READY - SKB_DROP_REASON_NOMEM - SKB_DROP_REASON_HDR_TRUNC - SKB_DROP_REASON_TAP_FILTER - SKB_DROP_REASON_TAP_TXFILTER Cc: Joao Martins Cc: Joe Jin Signed-off-by: Dongli Zhang Signed-off-by: David S. Miller --- include/linux/skbuff.h | 18 ++++++++++++++++++ include/trace/events/skb.h | 5 +++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 67cfff4065b6..34f572271c0c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -424,7 +424,25 @@ enum skb_drop_reason { SKB_DROP_REASON_DEV_HDR, /* device driver specific * header/metadata is invalid */ + /* the device is not ready to xmit/recv due to any of its data + * structure that is not up/ready/initialized, e.g., the IFF_UP is + * not set, or driver specific tun->tfiles[txq] is not initialized + */ + SKB_DROP_REASON_DEV_READY, SKB_DROP_REASON_FULL_RING, /* ring buffer is full */ + SKB_DROP_REASON_NOMEM, /* error due to OOM */ + SKB_DROP_REASON_HDR_TRUNC, /* failed to trunc/extract the header + * from networking data, e.g., failed + * to pull the protocol header from + * frags via pskb_may_pull() + */ + SKB_DROP_REASON_TAP_FILTER, /* dropped by (ebpf) filter directly + * attached to tun/tap, e.g., via + * TUNSETFILTEREBPF + */ + SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented + * at tun/tap, e.g., check_filter() + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 240e7e7591fc..e1670e1e4934 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -55,7 +55,12 @@ EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG) \ EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT) \ EM(SKB_DROP_REASON_DEV_HDR, DEV_HDR) \ + EM(SKB_DROP_REASON_DEV_READY, DEV_READY) \ EM(SKB_DROP_REASON_FULL_RING, FULL_RING) \ + EM(SKB_DROP_REASON_NOMEM, NOMEM) \ + EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC) \ + EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER) \ + EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 40b358f6156ac516e33e5252a8769737054da24e Mon Sep 17 00:00:00 2001 From: Shunzhou Jiang Date: Mon, 7 Mar 2022 10:53:56 +0800 Subject: dt-bindings: power: add Amlogic s4 power domains bindings Add the bindings for the Amlogic Secure power domains, controlling the secure power domains. The bindings targets the Amlogic s4, in which the power domains registers are in secure world. Signed-off-by: Shunzhou Jiang Reviewed-by: Martin Blumenstingl Acked-by: Rob Herring Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20220307025357.1368673-2-shunzhou.jiang@amlogic.com --- include/dt-bindings/power/meson-s4-power.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/dt-bindings/power/meson-s4-power.h (limited to 'include') diff --git a/include/dt-bindings/power/meson-s4-power.h b/include/dt-bindings/power/meson-s4-power.h new file mode 100644 index 000000000000..462dd2cb938b --- /dev/null +++ b/include/dt-bindings/power/meson-s4-power.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: (GPL-2.0+ or MIT) */ +/* + * Copyright (c) 2021 Amlogic, Inc. + * Author: Shunzhou Jiang + */ + +#ifndef _DT_BINDINGS_MESON_S4_POWER_H +#define _DT_BINDINGS_MESON_S4_POWER_H + +#define PWRC_S4_DOS_HEVC_ID 0 +#define PWRC_S4_DOS_VDEC_ID 1 +#define PWRC_S4_VPU_HDMI_ID 2 +#define PWRC_S4_USB_COMB_ID 3 +#define PWRC_S4_GE2D_ID 4 +#define PWRC_S4_ETH_ID 5 +#define PWRC_S4_DEMOD_ID 6 +#define PWRC_S4_AUDIO_ID 7 + +#endif -- cgit v1.2.3 From 6b1775f26a2da2b05a6dc8ec2b5d14e9a4701a1a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:54 +0100 Subject: xen/grant-table: add gnttab_try_end_foreign_access() Add a new grant table function gnttab_try_end_foreign_access(), which will remove and free a grant if it is not in use. Its main use case is to either free a grant if it is no longer in use, or to take some other action if it is still in use. This other action can be an error exit, or (e.g. in the case of blkfront persistent grant feature) some special handling. This is CVE-2022-23036, CVE-2022-23038 / part of XSA-396. Reported-by: Demi Marie Obenour Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V2: - new patch V4: - add comments to header (Jan Beulich) --- include/xen/grant_table.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index cb854df031ce..358d2817741b 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -104,10 +104,22 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); * access has been ended, free the given page too. Access will be ended * immediately iff the grant entry is not in use, otherwise it will happen * some time later. page may be 0, in which case no freeing will occur. + * Note that the granted page might still be accessed (read or write) by the + * other side after gnttab_end_foreign_access() returns, so even if page was + * specified as 0 it is not allowed to just reuse the page for other + * purposes immediately. */ void gnttab_end_foreign_access(grant_ref_t ref, int readonly, unsigned long page); +/* + * End access through the given grant reference, iff the grant entry is + * no longer in use. In case of success ending foreign access, the + * grant reference is deallocated. + * Return 1 if the grant entry was freed, 0 if it is still in use. + */ +int gnttab_try_end_foreign_access(grant_ref_t ref); + int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); -- cgit v1.2.3 From 1dbd11ca75fe664d3e54607547771d021f531f59 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:54 +0100 Subject: xen: remove gnttab_query_foreign_access() Remove gnttab_query_foreign_access(), as it is unused and unsafe to use. All previous use cases assumed a grant would not be in use after gnttab_query_foreign_access() returned 0. This information is useless in best case, as it only refers to a situation in the past, which could have changed already. Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- include/xen/grant_table.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 358d2817741b..ab9e692a0ef4 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -125,8 +125,6 @@ int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); -int gnttab_query_foreign_access(grant_ref_t ref); - /* * operations on reserved batches of grant references */ -- cgit v1.2.3 From 42baefac638f06314298087394b982ead9ec444b Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:55 +0100 Subject: xen/gnttab: fix gnttab_end_foreign_access() without page specified gnttab_end_foreign_access() is used to free a grant reference and optionally to free the associated page. In case the grant is still in use by the other side processing is being deferred. This leads to a problem in case no page to be freed is specified by the caller: the caller doesn't know that the page is still mapped by the other side and thus should not be used for other purposes. The correct way to handle this situation is to take an additional reference to the granted page in case handling is being deferred and to drop that reference when the grant reference could be freed finally. This requires that there are no users of gnttab_end_foreign_access() left directly repurposing the granted page after the call, as this might result in clobbered data or information leaks via the not yet freed grant reference. This is part of CVE-2022-23041 / XSA-396. Reported-by: Simon Gaiser Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V4: - expand comment in header V5: - get page ref in case of kmalloc() failure, too --- include/xen/grant_table.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index ab9e692a0ef4..c9fea9389ebe 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -107,7 +107,12 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); * Note that the granted page might still be accessed (read or write) by the * other side after gnttab_end_foreign_access() returns, so even if page was * specified as 0 it is not allowed to just reuse the page for other - * purposes immediately. + * purposes immediately. gnttab_end_foreign_access() will take an additional + * reference to the granted page in this case, which is dropped only after + * the grant is no longer in use. + * This requires that multi page allocations for areas subject to + * gnttab_end_foreign_access() are done via alloc_pages_exact() (and freeing + * via free_pages_exact()) in order to avoid high order pages. */ void gnttab_end_foreign_access(grant_ref_t ref, int readonly, unsigned long page); -- cgit v1.2.3 From 96ba61ee5331eb6e2f4c2baeb994b9ceb01d8266 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 20 Feb 2022 21:46:16 +0100 Subject: media: v4l2-ctrls: Add new V4L2_H264_DECODE_PARAM_FLAG_P/BFRAME flags Add new V4L2_H264_DECODE_PARAM_FLAG_P/BFRAME flags that are needed by NVIDIA Tegra video decoder. Userspace will have to set these flags in accordance to the type of a decoded frame. Reviewed-by: Nicolas Dufresne Signed-off-by: Dmitry Osipenko Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index c8e0f84d204d..e3d48d571062 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -1563,6 +1563,8 @@ struct v4l2_h264_dpb_entry { #define V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC 0x01 #define V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC 0x02 #define V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD 0x04 +#define V4L2_H264_DECODE_PARAM_FLAG_PFRAME 0x08 +#define V4L2_H264_DECODE_PARAM_FLAG_BFRAME 0x10 #define V4L2_CID_STATELESS_H264_DECODE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 7) /** -- cgit v1.2.3 From f72de02ebece2e962462bc0c1e9efd29eaa029b2 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Sat, 5 Mar 2022 12:21:25 +0100 Subject: ptp: Add generic PTP is_sync() function PHY drivers such as micrel or dp83640 need to analyze whether a given skb is a PTP sync message for one step functionality. In order to avoid code duplication introduce a generic function and move it to ptp classify. Signed-off-by: Kurt Kanzenbach Signed-off-by: David S. Miller --- include/linux/ptp_classify.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index 9afd34a2d36c..fefa7790dc46 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -126,6 +126,17 @@ static inline u8 ptp_get_msgtype(const struct ptp_header *hdr, return msgtype; } +/** + * ptp_msg_is_sync - Evaluates whether the given skb is a PTP Sync message + * @skb: packet buffer + * @type: type of the packet (see ptp_classify_raw()) + * + * This function evaluates whether the given skb is a PTP Sync message. + * + * Return: true if sync message, false otherwise + */ +bool ptp_msg_is_sync(struct sk_buff *skb, unsigned int type); + void __init ptp_classifier_init(void); #else static inline void ptp_classifier_init(void) @@ -148,5 +159,9 @@ static inline u8 ptp_get_msgtype(const struct ptp_header *hdr, */ return PTP_MSGTYPE_SYNC; } +static inline bool ptp_msg_is_sync(struct sk_buff *skb, unsigned int type) +{ + return false; +} #endif #endif /* _PTP_CLASSIFY_H_ */ -- cgit v1.2.3 From 2655926aea9beea62c9ba80c032485456fd848f0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sun, 6 Mar 2022 22:57:52 +0100 Subject: net: Remove netif_rx_any_context() and netif_rx_ni(). Remove netif_rx_any_context and netif_rx_ni() because there are no more users in tree. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 19a27ac361ef..29a850a8d460 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3718,16 +3718,6 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); int netif_rx(struct sk_buff *skb); int __netif_rx(struct sk_buff *skb); -static inline int netif_rx_ni(struct sk_buff *skb) -{ - return netif_rx(skb); -} - -static inline int netif_rx_any_context(struct sk_buff *skb) -{ - return netif_rx(skb); -} - int netif_receive_skb(struct sk_buff *skb); int netif_receive_skb_core(struct sk_buff *skb); void netif_receive_skb_list_internal(struct list_head *head); -- cgit v1.2.3 From ebe48d368e97d007bfeb76fcb065d6cfc4c96645 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 7 Mar 2022 13:11:39 +0100 Subject: esp: Fix possible buffer overflow in ESP transformation The maximum message size that can be send is bigger than the maximum site that skb_page_frag_refill can allocate. So it is possible to write beyond the allocated buffer. Fix this by doing a fallback to COW in that case. v2: Avoid get get_order() costs as suggested by Linus Torvalds. Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible") Fixes: 03e2a30f6a27 ("esp6: Avoid skb_cow_data whenever possible") Reported-by: valis Signed-off-by: Steffen Klassert --- include/net/esp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/esp.h b/include/net/esp.h index 9c5637d41d95..90cd02ff77ef 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -4,6 +4,8 @@ #include +#define ESP_SKB_FRAG_MAXSIZE (PAGE_SIZE << SKB_FRAG_PAGE_ORDER) + struct ip_esp_hdr; static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb) -- cgit v1.2.3 From 23c7f8d7989e1646aac82f75761b7648c355cb8a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 7 Mar 2022 13:11:41 +0100 Subject: net: Fix esp GSO on inter address family tunnels. The esp tunnel GSO handlers use skb_mac_gso_segment to push the inner packet to the segmentation handlers. However, skb_mac_gso_segment takes the Ethernet Protocol ID from 'skb->protocol' which is wrong for inter address family tunnels. We fix this by introducing a new skb_eth_gso_segment function. This function can be used if it is necessary to pass the Ethernet Protocol ID directly to the segmentation handler. First users of this function will be the esp4 and esp6 tunnel segmentation handlers. Fixes: c35fe4106b92 ("xfrm: Add mode handlers for IPsec on layer 2") Signed-off-by: Steffen Klassert --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8b5a314db167..f53ea7038441 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4602,6 +4602,8 @@ int skb_csum_hwoffload_help(struct sk_buff *skb, struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); +struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb, + netdev_features_t features, __be16 type); struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, netdev_features_t features); -- cgit v1.2.3 From 4aaa06b227f737da5c10feb93a6b203920d5a1e7 Mon Sep 17 00:00:00 2001 From: Curtis Malainey Date: Fri, 4 Mar 2022 14:57:31 -0600 Subject: ASoC: SOF: fix 32 signed bit overflow Shifting in a signed 32bit container past the signed bit is technically undefined behaviour. Fix by using unsigned types. Found via cppcheck. Reviewed-by: Ranjani Sridharan Reviewed-by: Liam Girdwood Signed-off-by: Curtis Malainey Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20220304205733.62233-9-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof/header.h | 2 +- include/uapi/sound/sof/abi.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/sof/header.h b/include/sound/sof/header.h index b97a76bcb655..b22e925c70e2 100644 --- a/include/sound/sof/header.h +++ b/include/sound/sof/header.h @@ -31,7 +31,7 @@ /* Global Message - Generic */ #define SOF_GLB_TYPE_SHIFT 28 -#define SOF_GLB_TYPE_MASK (0xfL << SOF_GLB_TYPE_SHIFT) +#define SOF_GLB_TYPE_MASK (0xfUL << SOF_GLB_TYPE_SHIFT) #define SOF_GLB_TYPE(x) ((x) << SOF_GLB_TYPE_SHIFT) /* Command Message - Generic */ diff --git a/include/uapi/sound/sof/abi.h b/include/uapi/sound/sof/abi.h index f4232d289a22..e052653a6e4c 100644 --- a/include/uapi/sound/sof/abi.h +++ b/include/uapi/sound/sof/abi.h @@ -27,7 +27,7 @@ /* SOF ABI version major, minor and patch numbers */ #define SOF_ABI_MAJOR 3 #define SOF_ABI_MINOR 19 -#define SOF_ABI_PATCH 0 +#define SOF_ABI_PATCH 1 /* SOF ABI version number. Format within 32bit word is MMmmmppp */ #define SOF_ABI_MAJOR_SHIFT 24 -- cgit v1.2.3 From 1e974e5b82b3d75069b50445cd248cee0199654e Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 28 Feb 2022 17:27:54 +0000 Subject: ASoC: audio_graph_card2: Add support for variable slot widths Some audio hardware cannot support the same slot width for all sample widths, or a slot width equal to the sample width for all sample widths. This is usually due either to limitations of the audio serial port or system clocking restrictions. A typical example would be: - 16-bit samples in 16-bit slots - 24-bit samples in 32-bit slots The new dai-tdm-slot-width-map property allows setting a mapping of sample widths and the corresponding tdm slot widths and slot counts. Although the slot count is usually the same for all cases this does allow for adding padding slots to maintain the same bitclk frequency. The property is added to each endpoint node that needs the component DAI to be told the TDM slot width and count. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20220228172754.453783-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/simple_card_utils.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h index 5ee269c59aac..8faa649f712b 100644 --- a/include/sound/simple_card_utils.h +++ b/include/sound/simple_card_utils.h @@ -16,6 +16,12 @@ #define asoc_simple_init_mic(card, sjack, prefix) \ asoc_simple_init_jack(card, sjack, 0, prefix, NULL) +struct asoc_simple_tdm_width_map { + u8 sample_bits; + u8 slot_count; + u16 slot_width; +}; + struct asoc_simple_dai { const char *name; unsigned int sysclk; @@ -26,6 +32,8 @@ struct asoc_simple_dai { unsigned int rx_slot_mask; struct clk *clk; bool clk_fixed; + struct asoc_simple_tdm_width_map *tdm_width_map; + int n_tdm_widths; }; struct asoc_simple_data { @@ -132,6 +140,9 @@ int asoc_simple_parse_daifmt(struct device *dev, struct device_node *codec, char *prefix, unsigned int *retfmt); +int asoc_simple_parse_tdm_width_map(struct device *dev, struct device_node *np, + struct asoc_simple_dai *dai); + __printf(3, 4) int asoc_simple_set_dailink_name(struct device *dev, struct snd_soc_dai_link *dai_link, -- cgit v1.2.3 From 97939610b893de068c82c347d06319cd231a4602 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Mar 2022 19:01:05 +0100 Subject: block: remove bio_devname All callers are gone, so remove this wrapper. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220304180105.409765-11-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 7523aba4ddf7..4c21f6e69e18 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -491,8 +491,6 @@ static inline void bio_release_pages(struct bio *bio, bool mark_dirty) __bio_release_pages(bio, mark_dirty); } -extern const char *bio_devname(struct bio *bio, char *buffer); - #define bio_dev(bio) \ disk_devt((bio)->bi_bdev->bd_disk) -- cgit v1.2.3 From a26d84633c2ba3c41fd7692986fb2231c9228c4e Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 23 Feb 2022 18:59:02 +0100 Subject: rtc: max77686: Rename day-of-month defines RTC_DATE and REG_RTC_DATE are used for the registers holding the day of month. Rename these constants to mean what they mean. Signed-off-by: Luca Ceresoli Reviewed-by: Krzysztof Kozlowski Acked-by: Alexandre Belloni Signed-off-by: Lee Jones --- include/linux/mfd/max77686-private.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h index b1482b3cf353..3acceeedbaba 100644 --- a/include/linux/mfd/max77686-private.h +++ b/include/linux/mfd/max77686-private.h @@ -152,7 +152,7 @@ enum max77686_rtc_reg { MAX77686_RTC_WEEKDAY = 0x0A, MAX77686_RTC_MONTH = 0x0B, MAX77686_RTC_YEAR = 0x0C, - MAX77686_RTC_DATE = 0x0D, + MAX77686_RTC_MONTHDAY = 0x0D, MAX77686_ALARM1_SEC = 0x0E, MAX77686_ALARM1_MIN = 0x0F, MAX77686_ALARM1_HOUR = 0x10, @@ -352,7 +352,7 @@ enum max77802_rtc_reg { MAX77802_RTC_WEEKDAY = 0xCA, MAX77802_RTC_MONTH = 0xCB, MAX77802_RTC_YEAR = 0xCC, - MAX77802_RTC_DATE = 0xCD, + MAX77802_RTC_MONTHDAY = 0xCD, MAX77802_RTC_AE1 = 0xCE, MAX77802_ALARM1_SEC = 0xCF, MAX77802_ALARM1_MIN = 0xD0, -- cgit v1.2.3 From 60b050ff3a60273d56b4017d0f45d5ca71aae5ad Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 23 Feb 2022 18:59:05 +0100 Subject: mfd: max77714: Add driver for Maxim MAX77714 PMIC Add a simple driver for the Maxim MAX77714 PMIC, supporting RTC and watchdog only. Signed-off-by: Luca Ceresoli Reviewed-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- include/linux/mfd/max77714.h | 60 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 include/linux/mfd/max77714.h (limited to 'include') diff --git a/include/linux/mfd/max77714.h b/include/linux/mfd/max77714.h new file mode 100644 index 000000000000..a970dc455426 --- /dev/null +++ b/include/linux/mfd/max77714.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Maxim MAX77714 Register and data structures definition. + * + * Copyright (C) 2022 Luca Ceresoli + * Author: Luca Ceresoli + */ + +#ifndef __LINUX_MFD_MAX77714_H_ +#define __LINUX_MFD_MAX77714_H_ + +#include + +#define MAX77714_INT_TOP 0x00 +#define MAX77714_INT_TOPM 0x07 /* Datasheet says "read only", but it is RW */ + +#define MAX77714_INT_TOP_ONOFF BIT(1) +#define MAX77714_INT_TOP_RTC BIT(3) +#define MAX77714_INT_TOP_GPIO BIT(4) +#define MAX77714_INT_TOP_LDO BIT(5) +#define MAX77714_INT_TOP_SD BIT(6) +#define MAX77714_INT_TOP_GLBL BIT(7) + +#define MAX77714_32K_STATUS 0x30 +#define MAX77714_32K_STATUS_SIOSCOK BIT(5) +#define MAX77714_32K_STATUS_XOSCOK BIT(4) +#define MAX77714_32K_STATUS_32KSOURCE BIT(3) +#define MAX77714_32K_STATUS_32KLOAD_MSK 0x3 +#define MAX77714_32K_STATUS_32KLOAD_SHF 1 +#define MAX77714_32K_STATUS_CRYSTAL_CFG BIT(0) + +#define MAX77714_32K_CONFIG 0x31 +#define MAX77714_32K_CONFIG_XOSC_RETRY BIT(4) + +#define MAX77714_CNFG_GLBL2 0x91 +#define MAX77714_WDTEN BIT(2) +#define MAX77714_WDTSLPC BIT(3) +#define MAX77714_TWD_MASK 0x3 +#define MAX77714_TWD_2s 0x0 +#define MAX77714_TWD_16s 0x1 +#define MAX77714_TWD_64s 0x2 +#define MAX77714_TWD_128s 0x3 + +#define MAX77714_CNFG_GLBL3 0x92 +#define MAX77714_WDTC BIT(0) + +#define MAX77714_CNFG2_ONOFF 0x94 +#define MAX77714_WD_RST_WK BIT(5) + +/* Interrupts */ +enum { + MAX77714_IRQ_TOP_ONOFF, + MAX77714_IRQ_TOP_RTC, /* Real-time clock */ + MAX77714_IRQ_TOP_GPIO, /* GPIOs */ + MAX77714_IRQ_TOP_LDO, /* Low-dropout regulators */ + MAX77714_IRQ_TOP_SD, /* Step-down regulators */ + MAX77714_IRQ_TOP_GLBL, /* "Global resources": Low-Battery, overtemp... */ +}; + +#endif /* __LINUX_MFD_MAX77714_H_ */ -- cgit v1.2.3 From c47383f849097c2b3547e28365578cd9e5811378 Mon Sep 17 00:00:00 2001 From: Johnson Wang Date: Thu, 6 Jan 2022 14:54:04 +0800 Subject: mfd: Add support for the MediaTek MT6366 PMIC This adds support for the MediaTek MT6366 PMIC. This is a multifunction device with the following sub modules: - Regulator - RTC - Codec - Interrupt It is interfaced to the host controller using SPI interface by a proprietary hardware called PMIC wrapper or pwrap. MT6366 MFD is a child device of the pwrap. Signed-off-by: Johnson Wang Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220106065407.16036-2-johnson.wang@mediatek.com --- include/linux/mfd/mt6358/registers.h | 7 +++++++ include/linux/mfd/mt6397/core.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/mt6358/registers.h b/include/linux/mfd/mt6358/registers.h index 201139b12140..3d33517f178c 100644 --- a/include/linux/mfd/mt6358/registers.h +++ b/include/linux/mfd/mt6358/registers.h @@ -94,6 +94,10 @@ #define MT6358_BUCK_VCORE_CON0 0x1488 #define MT6358_BUCK_VCORE_DBG0 0x149e #define MT6358_BUCK_VCORE_DBG1 0x14a0 +#define MT6358_BUCK_VCORE_SSHUB_CON0 0x14a4 +#define MT6358_BUCK_VCORE_SSHUB_CON1 0x14a6 +#define MT6358_BUCK_VCORE_SSHUB_ELR0 MT6358_BUCK_VCORE_SSHUB_CON1 +#define MT6358_BUCK_VCORE_SSHUB_DBG1 MT6358_BUCK_VCORE_DBG1 #define MT6358_BUCK_VCORE_ELR0 0x14aa #define MT6358_BUCK_VGPU_CON0 0x1508 #define MT6358_BUCK_VGPU_DBG0 0x151e @@ -169,6 +173,9 @@ #define MT6358_LDO_VSRAM_OTHERS_CON0 0x1ba6 #define MT6358_LDO_VSRAM_OTHERS_DBG0 0x1bc0 #define MT6358_LDO_VSRAM_OTHERS_DBG1 0x1bc2 +#define MT6358_LDO_VSRAM_OTHERS_SSHUB_CON0 0x1bc4 +#define MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1 0x1bc6 +#define MT6358_LDO_VSRAM_OTHERS_SSHUB_DBG1 MT6358_LDO_VSRAM_OTHERS_DBG1 #define MT6358_LDO_VSRAM_GPU_CON0 0x1bc8 #define MT6358_LDO_VSRAM_GPU_DBG0 0x1be2 #define MT6358_LDO_VSRAM_GPU_DBG1 0x1be4 diff --git a/include/linux/mfd/mt6397/core.h b/include/linux/mfd/mt6397/core.h index 56f210eebc54..1cf78726503b 100644 --- a/include/linux/mfd/mt6397/core.h +++ b/include/linux/mfd/mt6397/core.h @@ -14,6 +14,7 @@ enum chip_id { MT6323_CHIP_ID = 0x23, MT6358_CHIP_ID = 0x58, MT6359_CHIP_ID = 0x59, + MT6366_CHIP_ID = 0x66, MT6391_CHIP_ID = 0x91, MT6397_CHIP_ID = 0x97, }; -- cgit v1.2.3 From 1092347165cf5ed1453c1f211641a859818a2828 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Wed, 9 Feb 2022 17:03:12 +0100 Subject: media: lirc: remove unused feature LIRC_CAN_SET_REC_DUTY_CYCLE There is no hardware which can filter input on the duty cycle, so no driver implements this. On top of that, LIRC_CAN_SET_REC_DUTY_CYCLE has the same value as LIRC_CAN_MEASURE_CARRIER (0x02000000). Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index 21c69a6a100d..23b0f2c8ba81 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -73,7 +73,6 @@ #define LIRC_CAN_REC_MASK LIRC_MODE2REC(LIRC_CAN_SEND_MASK) #define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16) -#define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16) #define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000 #define LIRC_CAN_GET_REC_RESOLUTION 0x20000000 -- cgit v1.2.3 From 72a74c8f0a0df12c7d7ea012aa70d95152858dea Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Thu, 24 Feb 2022 11:10:00 +0800 Subject: media: add nv12m_8l128 and nv12m_10be_8l128 video format. nv12m_8l128 is 8-bit tiled nv12 format used by amphion decoder. nv12m_10be_8l128 is 10-bit tiled format used by amphion decoder. The tile size is 8x128 Signed-off-by: Ming Qian Signed-off-by: Shijie Qin Signed-off-by: Zhou Peng Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index df8b9c486ba1..3768a0a80830 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -632,6 +632,8 @@ struct v4l2_pix_format { /* Tiled YUV formats, non contiguous planes */ #define V4L2_PIX_FMT_NV12MT v4l2_fourcc('T', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 64x32 tiles */ #define V4L2_PIX_FMT_NV12MT_16X16 v4l2_fourcc('V', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 16x16 tiles */ +#define V4L2_PIX_FMT_NV12M_8L128 v4l2_fourcc('N', 'A', '1', '2') /* Y/CbCr 4:2:0 8x128 tiles */ +#define V4L2_PIX_FMT_NV12M_10BE_8L128 v4l2_fourcc_be('N', 'T', '1', '2') /* Y/CbCr 4:2:0 10-bit 8x128 tiles */ /* Bayer formats - see http://www.siliconimaging.com/RGB%20Bayer.htm */ #define V4L2_PIX_FMT_SBGGR8 v4l2_fourcc('B', 'A', '8', '1') /* 8 BGBG.. GRGR.. */ -- cgit v1.2.3 From 8c4b810a87005eb46564a48a69b5b255e515fa62 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 3 Feb 2022 17:05:02 +0000 Subject: clocksource/drivers/arm_arch_timer: Use event stream scaling when available With FEAT_ECV and the 1GHz counter, it is pretty likely that the event stream divider doesn't fit in the field that holds the divider value (we only have 4 bits to describe counter bits [15:0] Thankfully, FEAT_ECV also provides a scaling mechanism to switch the field to cover counter bits [23:8] instead. Enable this on arm64 when ECV is available (32bit doesn't have any detection infrastructure and is unlikely to be run on an ARMv8.6 system anyway). Signed-off-by: Marc Zyngier Cc: Mark Rutland Cc: Daniel Lezcano Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220203170502.2694422-1-maz@kernel.org Signed-off-by: Daniel Lezcano --- include/clocksource/arm_arch_timer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index e715bdb720d5..057c8964aefb 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -56,6 +56,7 @@ enum arch_timer_spi_nr { #define ARCH_TIMER_EVT_TRIGGER_MASK (0xF << ARCH_TIMER_EVT_TRIGGER_SHIFT) #define ARCH_TIMER_USR_VT_ACCESS_EN (1 << 8) /* virtual timer registers */ #define ARCH_TIMER_USR_PT_ACCESS_EN (1 << 9) /* physical timer registers */ +#define ARCH_TIMER_EVT_INTERVAL_SCALE (1 << 17) /* EVNTIS in the ARMv8 ARM */ #define ARCH_TIMER_EVT_STREAM_PERIOD_US 100 #define ARCH_TIMER_EVT_STREAM_FREQ \ -- cgit v1.2.3 From aa6f8dcbab473f3a3c7454b74caa46d36cdc5d13 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Sat, 5 Mar 2022 18:07:14 +0100 Subject: swiotlb: rework "fix info leak with DMA_FROM_DEVICE" Unfortunately, we ended up merging an old version of the patch "fix info leak with DMA_FROM_DEVICE" instead of merging the latest one. Christoph (the swiotlb maintainer), he asked me to create an incremental fix (after I have pointed this out the mix up, and asked him for guidance). So here we go. The main differences between what we got and what was agreed are: * swiotlb_sync_single_for_device is also required to do an extra bounce * We decided not to introduce DMA_ATTR_OVERWRITE until we have exploiters * The implantation of DMA_ATTR_OVERWRITE is flawed: DMA_ATTR_OVERWRITE must take precedence over DMA_ATTR_SKIP_CPU_SYNC Thus this patch removes DMA_ATTR_OVERWRITE, and makes swiotlb_sync_single_for_device() bounce unconditionally (that is, also when dir == DMA_TO_DEVICE) in order do avoid synchronising back stale data from the swiotlb buffer. Let me note, that if the size used with dma_sync_* API is less than the size used with dma_[un]map_*, under certain circumstances we may still end up with swiotlb not being transparent. In that sense, this is no perfect fix either. To get this bullet proof, we would have to bounce the entire mapping/bounce buffer. For that we would have to figure out the starting address, and the size of the mapping in swiotlb_sync_single_for_device(). While this does seem possible, there seems to be no firm consensus on how things are supposed to work. Signed-off-by: Halil Pasic Fixes: ddbd89deb7d3 ("swiotlb: fix info leak with DMA_FROM_DEVICE") Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- include/linux/dma-mapping.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 6150d11a607e..dca2b1355bb1 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -61,14 +61,6 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 9) -/* - * This is a hint to the DMA-mapping subsystem that the device is expected - * to overwrite the entire mapped size, thus the caller does not require any - * of the previous buffer contents to be preserved. This allows - * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers. - */ -#define DMA_ATTR_OVERWRITE (1UL << 10) - /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can * be given to a device to use as a DMA source or target. It is specific to a -- cgit v1.2.3 From c75e707fe1aab32f1dc8e09845533b6542d9aaa9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Mar 2022 18:55:56 +0100 Subject: block: remove the per-bio/request write hint With the NVMe support for this gone, there are no consumers of these hints left, so remove them. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220304175556.407719-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 - include/linux/blkdev.h | 3 --- 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 5561e58d158a..ba8cfa57255f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -250,7 +250,6 @@ struct bio { */ unsigned short bi_flags; /* BIO_* below */ unsigned short bi_ioprio; - unsigned short bi_write_hint; blk_status_t bi_status; atomic_t __bi_remaining; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a12c031af887..226836ccd060 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -518,9 +518,6 @@ struct request_queue { bool mq_sysfs_init_done; -#define BLK_MAX_WRITE_HINTS 5 - u64 write_hints[BLK_MAX_WRITE_HINTS]; - /* * Independent sector access ranges. This is always NULL for * devices that do not have multiple independent access ranges. -- cgit v1.2.3 From c340b990d58c856c1636e0c10abb9e4351ad852a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 3 Mar 2022 12:13:05 -0800 Subject: block: support pi with extended metadata The nvme spec allows protection information formats with metadata extending beyond the pi field. Use the actual size of the metadata field for incrementing the buffer. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20220303201312.3255347-2-kbusch@kernel.org Signed-off-by: Jens Axboe --- include/linux/blk-integrity.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 8a038ea0717e..378b2459efe2 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -19,6 +19,7 @@ struct blk_integrity_iter { sector_t seed; unsigned int data_size; unsigned short interval; + unsigned char tuple_size; const char *disk_name; }; -- cgit v1.2.3 From c2ea5fcf53d5f21e6aff0de11d55bc202822df6a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 3 Mar 2022 12:13:07 -0800 Subject: asm-generic: introduce be48 unaligned accessors The NVMe protocol extended the data integrity fields with unaligned 48-bit reference tags. Provide some helper accessors in preparation for these. Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Acked-by: Arnd Bergmann Signed-off-by: Keith Busch Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220303201312.3255347-4-kbusch@kernel.org Signed-off-by: Jens Axboe --- include/asm-generic/unaligned.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h index 1c4242416c9f..8fc637379899 100644 --- a/include/asm-generic/unaligned.h +++ b/include/asm-generic/unaligned.h @@ -126,4 +126,30 @@ static inline void put_unaligned_le24(const u32 val, void *p) __put_unaligned_le24(val, p); } +static inline void __put_unaligned_be48(const u64 val, __u8 *p) +{ + *p++ = val >> 40; + *p++ = val >> 32; + *p++ = val >> 24; + *p++ = val >> 16; + *p++ = val >> 8; + *p++ = val; +} + +static inline void put_unaligned_be48(const u64 val, void *p) +{ + __put_unaligned_be48(val, p); +} + +static inline u64 __get_unaligned_be48(const u8 *p) +{ + return (u64)p[0] << 40 | (u64)p[1] << 32 | p[2] << 24 | + p[3] << 16 | p[4] << 8 | p[5]; +} + +static inline u64 get_unaligned_be48(const void *p) +{ + return __get_unaligned_be48(p); +} + #endif /* __ASM_GENERIC_UNALIGNED_H */ -- cgit v1.2.3 From 7ee8809df990d1de379002973baee1681e8d7dd3 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 3 Mar 2022 12:13:08 -0800 Subject: linux/kernel: introduce lower_48_bits function Recent data integrity field enhancements allow reference tags to be up to 48 bits. Introduce an inline helper function since this will be a repeated operation. Suggested-by: Bart Van Assche Signed-off-by: Keith Busch Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220303201312.3255347-5-kbusch@kernel.org Signed-off-by: Jens Axboe --- include/linux/kernel.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 33f47a996513..3877478681b9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -63,6 +63,15 @@ } \ ) +/** + * lower_48_bits() - return bits 0-47 of a number + * @n: the number we're accessing + */ +static inline u64 lower_48_bits(u64 n) +{ + return n & ((1ull << 48) - 1); +} + /** * upper_32_bits - return bits 32-63 of a number * @n: the number we're accessing -- cgit v1.2.3 From cbc0a40e17da361a2ada8d669413ccfbd2028f2d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 3 Mar 2022 12:13:09 -0800 Subject: lib: add rocksoft model crc64 The NVM Express specification extended data integrity fields to 64 bits using the Rocksoft parameters. Add the poly to the crc64 table generation, and provide a generic library routine implementing the algorithm. The Rocksoft 64-bit CRC model parameters are as follows: Poly: 0xAD93D23594C93659 Initial value: 0xFFFFFFFFFFFFFFFF Reflected Input: True Reflected Output: True Xor Final: 0xFFFFFFFFFFFFFFFF Since this model used reflected bits, the implementation generates the reflected table so the result is ordered consistently. Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Martin K. Petersen Signed-off-by: Keith Busch Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220303201312.3255347-6-kbusch@kernel.org Signed-off-by: Jens Axboe --- include/linux/crc64.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/crc64.h b/include/linux/crc64.h index c756e65a1b58..9480f38cc7cf 100644 --- a/include/linux/crc64.h +++ b/include/linux/crc64.h @@ -8,4 +8,6 @@ #include u64 __pure crc64_be(u64 crc, const void *p, size_t len); +u64 __pure crc64_rocksoft_generic(u64 crc, const void *p, size_t len); + #endif /* _LINUX_CRC64_H */ -- cgit v1.2.3 From f3813f4b287e480b1fcd62ca798d8556644b8278 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 3 Mar 2022 12:13:10 -0800 Subject: crypto: add rocksoft 64b crc guard tag framework Hardware specific features may be able to calculate a crc64, so provide a framework for drivers to register their implementation. If nothing is registered, fallback to the generic table lookup implementation. The implementation is modeled after the crct10dif equivalent. Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20220303201312.3255347-7-kbusch@kernel.org Signed-off-by: Jens Axboe --- include/linux/crc64.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/crc64.h b/include/linux/crc64.h index 9480f38cc7cf..e044c60d1e61 100644 --- a/include/linux/crc64.h +++ b/include/linux/crc64.h @@ -7,7 +7,12 @@ #include +#define CRC64_ROCKSOFT_STRING "crc64-rocksoft" + u64 __pure crc64_be(u64 crc, const void *p, size_t len); u64 __pure crc64_rocksoft_generic(u64 crc, const void *p, size_t len); +u64 crc64_rocksoft(const unsigned char *buffer, size_t len); +u64 crc64_rocksoft_update(u64 crc, const unsigned char *buffer, size_t len); + #endif /* _LINUX_CRC64_H */ -- cgit v1.2.3 From a7d4383f17e10f338ea757a849f02298790d24fb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 3 Mar 2022 12:13:11 -0800 Subject: block: add pi for extended integrity The NVMe specification defines new data integrity formats beyond the t10 tuple. Add support for the specification defined CRC64 formats, assuming the reference tag does not need to be split with the "storage tag". Cc: Hannes Reinecke Cc: "Martin K. Petersen" Signed-off-by: Keith Busch Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220303201312.3255347-8-kbusch@kernel.org Signed-off-by: Jens Axboe --- include/linux/t10-pi.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index c635c2e014e3..a4b1af581f69 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -53,4 +53,24 @@ extern const struct blk_integrity_profile t10_pi_type1_ip; extern const struct blk_integrity_profile t10_pi_type3_crc; extern const struct blk_integrity_profile t10_pi_type3_ip; +struct crc64_pi_tuple { + __be64 guard_tag; + __be16 app_tag; + __u8 ref_tag[6]; +}; + +static inline u64 ext_pi_ref_tag(struct request *rq) +{ + unsigned int shift = ilog2(queue_logical_block_size(rq->q)); + +#ifdef CONFIG_BLK_DEV_INTEGRITY + if (rq->q->integrity.interval_exp) + shift = rq->q->integrity.interval_exp; +#endif + return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT)); +} + +extern const struct blk_integrity_profile ext_pi_type1_crc64; +extern const struct blk_integrity_profile ext_pi_type3_crc64; + #endif -- cgit v1.2.3 From 4020aad85c6785ddac8d51f345ff9e3328ce773a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 3 Mar 2022 12:13:12 -0800 Subject: nvme: add support for enhanced metadata NVM Express ratified TP 4068 defines new protection information formats. Implement support for the CRC64 guard tags. Since the block layer doesn't support variable length reference tags, driver support for the Storage Tag space is not supported at this time. Cc: Hannes Reinecke Cc: "Martin K. Petersen" Cc: Klaus Jensen Signed-off-by: Keith Busch Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220303201312.3255347-9-kbusch@kernel.org Signed-off-by: Jens Axboe --- include/linux/nvme.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9dbc3ef4daf7..4f44f83817a9 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -244,6 +244,7 @@ enum { enum nvme_ctrl_attr { NVME_CTRL_ATTR_HID_128_BIT = (1 << 0), NVME_CTRL_ATTR_TBKAS = (1 << 6), + NVME_CTRL_ATTR_ELBAS = (1 << 15), }; struct nvme_id_ctrl { @@ -399,8 +400,7 @@ struct nvme_id_ns { __le16 endgid; __u8 nguid[16]; __u8 eui64[8]; - struct nvme_lbaf lbaf[16]; - __u8 rsvd192[192]; + struct nvme_lbaf lbaf[64]; __u8 vs[3712]; }; @@ -418,8 +418,7 @@ struct nvme_id_ns_zns { __le32 rrl; __le32 frl; __u8 rsvd20[2796]; - struct nvme_zns_lbafe lbafe[16]; - __u8 rsvd3072[768]; + struct nvme_zns_lbafe lbafe[64]; __u8 vs[256]; }; @@ -428,6 +427,30 @@ struct nvme_id_ctrl_zns { __u8 rsvd1[4095]; }; +struct nvme_id_ns_nvm { + __le64 lbstm; + __u8 pic; + __u8 rsvd9[3]; + __le32 elbaf[64]; + __u8 rsvd268[3828]; +}; + +enum { + NVME_ID_NS_NVM_STS_MASK = 0x3f, + NVME_ID_NS_NVM_GUARD_SHIFT = 7, + NVME_ID_NS_NVM_GUARD_MASK = 0x3, +}; + +static inline __u8 nvme_elbaf_sts(__u32 elbaf) +{ + return elbaf & NVME_ID_NS_NVM_STS_MASK; +} + +static inline __u8 nvme_elbaf_guard_type(__u32 elbaf) +{ + return (elbaf >> NVME_ID_NS_NVM_GUARD_SHIFT) & NVME_ID_NS_NVM_GUARD_MASK; +} + struct nvme_id_ctrl_nvm { __u8 vsl; __u8 wzsl; @@ -478,6 +501,8 @@ enum { NVME_NS_FEAT_IO_OPT = 1 << 4, NVME_NS_ATTR_RO = 1 << 0, NVME_NS_FLBAS_LBA_MASK = 0xf, + NVME_NS_FLBAS_LBA_UMASK = 0x60, + NVME_NS_FLBAS_LBA_SHIFT = 1, NVME_NS_FLBAS_META_EXT = 0x10, NVME_NS_NMIC_SHARED = 1 << 0, NVME_LBAF_RP_BEST = 0, @@ -496,6 +521,18 @@ enum { NVME_NS_DPS_PI_TYPE3 = 3, }; +enum { + NVME_NVM_NS_16B_GUARD = 0, + NVME_NVM_NS_32B_GUARD = 1, + NVME_NVM_NS_64B_GUARD = 2, +}; + +static inline __u8 nvme_lbaf_index(__u8 flbas) +{ + return (flbas & NVME_NS_FLBAS_LBA_MASK) | + ((flbas & NVME_NS_FLBAS_LBA_UMASK) >> NVME_NS_FLBAS_LBA_SHIFT); +} + /* Identify Namespace Metadata Capabilities (MC): */ enum { NVME_MC_EXTENDED_LBA = (1 << 0), @@ -842,7 +879,8 @@ struct nvme_rw_command { __u8 flags; __u16 command_id; __le32 nsid; - __u64 rsvd2; + __le32 cdw2; + __le32 cdw3; __le64 metadata; union nvme_data_ptr dptr; __le64 slba; @@ -996,11 +1034,14 @@ enum { struct nvme_feat_host_behavior { __u8 acre; - __u8 resv1[511]; + __u8 etdas; + __u8 lbafee; + __u8 resv1[509]; }; enum { NVME_ENABLE_ACRE = 1, + NVME_ENABLE_LBAFEE = 1, }; /* Admin commands */ -- cgit v1.2.3 From 2984539959dbaf4e65e19bf90c2419304a81a985 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 8 Feb 2022 17:16:33 +0100 Subject: tick/rcu: Remove obsolete rcu_needs_cpu() parameters With the removal of CONFIG_RCU_FAST_NO_HZ, the parameters in rcu_needs_cpu() are not necessary anymore. Simply remove them. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul E. McKenney Cc: Paul Menzel --- include/linux/rcutiny.h | 3 +-- include/linux/rcutree.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 858f4d429946..5fed476f977f 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -64,9 +64,8 @@ static inline void rcu_softirq_qs(void) rcu_tasks_qs(current, (preempt)); \ } while (0) -static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) +static inline int rcu_needs_cpu(void) { - *nextevt = KTIME_MAX; return 0; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 53209d669400..6cc91291d078 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -19,7 +19,7 @@ void rcu_softirq_qs(void); void rcu_note_context_switch(bool preempt); -int rcu_needs_cpu(u64 basem, u64 *nextevt); +int rcu_needs_cpu(void); void rcu_cpu_stall_reset(void); /* -- cgit v1.2.3 From 0345691b24c076655ce8f0f4bfd24cba3467ccbd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 8 Feb 2022 17:16:34 +0100 Subject: tick/rcu: Stop allowing RCU_SOFTIRQ in idle RCU_SOFTIRQ used to be special in that it could be raised on purpose within the idle path to prevent from stopping the tick. Some code still prevents from unnecessary warnings related to this specific behaviour while entering in dynticks-idle mode. However the nohz layout has changed quite a bit in ten years, and the removal of CONFIG_RCU_FAST_NO_HZ has been the final straw to this safe-conduct. Now the RCU_SOFTIRQ vector is expected to be raised from sane places. A remaining corner case is admitted though when the vector is invoked in fragile hotplug path. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul E. McKenney Cc: Paul Menzel --- include/linux/interrupt.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9367f1cb2e3c..9613326d2f8a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -579,7 +579,13 @@ enum NR_SOFTIRQS }; -#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) +/* + * Ignoring the RCU vector after ksoftirqd is parked is fine + * because: + * 1) rcutree_migrate_callbacks() takes care of the queue. + * 2) rcu_report_dead() reports the final quiescent states. + */ +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. -- cgit v1.2.3 From f96272a90d9eaea9933aaab704ddbd258feb3841 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 8 Feb 2022 17:16:35 +0100 Subject: lib/irq_poll: Declare IRQ_POLL softirq vector as ksoftirqd-parking safe The following warning may appear while setting a CPU down: NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #20!!! The IRQ_POLL_SOFTIRQ vector can be raised during the hotplug cpu_down() path after ksoftirqd is parked and before the CPU actually dies. However this is handled afterward at the CPUHP_IRQ_POLL_DEAD stage where the queue gets migrated. Hence this warning can be considered spurious and the vector can join the "hotplug-safe" list. Reported-and-tested-by: Paul Menzel Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul E. McKenney Cc: Paul Menzel --- include/linux/interrupt.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9613326d2f8a..f40754caaefa 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -580,12 +580,15 @@ enum }; /* - * Ignoring the RCU vector after ksoftirqd is parked is fine - * because: - * 1) rcutree_migrate_callbacks() takes care of the queue. + * The following vectors can be safely ignored after ksoftirqd is parked: + * + * _ RCU: + * 1) rcutree_migrate_callbacks() migrates the queue. * 2) rcu_report_dead() reports the final quiescent states. + * + * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue */ -#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ)) +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. -- cgit v1.2.3 From b0e846248de5bd57e4b785791f359ecb8f2d7311 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 27 Dec 2021 07:48:39 +0100 Subject: mfd: db8500-prcmu: Remove dead code for a non-existing config The config DBX500_PRCMU_QOS_POWER was never introduced in the kernel repository. So, the ifdef in ./include/linux/mfd/dbx500-prcmu.h was never effective. Remove these dead function prototypes. Signed-off-by: Lukas Bulwahn Reviewed-by: Linus Walleij Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20211227064839.21405-1-lukas.bulwahn@gmail.com --- include/linux/mfd/dbx500-prcmu.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h index cbf9d7619493..2a255362b5ac 100644 --- a/include/linux/mfd/dbx500-prcmu.h +++ b/include/linux/mfd/dbx500-prcmu.h @@ -556,22 +556,6 @@ static inline void prcmu_clear(unsigned int reg, u32 bits) #define PRCMU_QOS_ARM_OPP 3 #define PRCMU_QOS_DEFAULT_VALUE -1 -#ifdef CONFIG_DBX500_PRCMU_QOS_POWER - -unsigned long prcmu_qos_get_cpufreq_opp_delay(void); -void prcmu_qos_set_cpufreq_opp_delay(unsigned long); -void prcmu_qos_force_opp(int, s32); -int prcmu_qos_requirement(int pm_qos_class); -int prcmu_qos_add_requirement(int pm_qos_class, char *name, s32 value); -int prcmu_qos_update_requirement(int pm_qos_class, char *name, s32 new_value); -void prcmu_qos_remove_requirement(int pm_qos_class, char *name); -int prcmu_qos_add_notifier(int prcmu_qos_class, - struct notifier_block *notifier); -int prcmu_qos_remove_notifier(int prcmu_qos_class, - struct notifier_block *notifier); - -#else - static inline unsigned long prcmu_qos_get_cpufreq_opp_delay(void) { return 0; @@ -613,6 +597,4 @@ static inline int prcmu_qos_remove_notifier(int prcmu_qos_class, return 0; } -#endif - #endif /* __MACH_PRCMU_H */ -- cgit v1.2.3 From 56f216d8efbc1212bf5ff8a6ff5e29927965e8db Mon Sep 17 00:00:00 2001 From: Peter Geis Date: Tue, 8 Feb 2022 14:40:23 -0500 Subject: mfd: rk808: Add reboot support to rk808.c This adds reboot support to the rk808 pmic driver and enables it for the rk809 and rk817 devices. This only enables if the rockchip,system-power-controller flag is set. Signed-off-by: Peter Geis Signed-off-by: Frank Wunderlich Reviewed-by: Dmitry Osipenko Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220208194023.929720-1-pgwipeout@gmail.com --- include/linux/mfd/rk808.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index a96e6d43ca06..58602032e642 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -373,6 +373,7 @@ enum rk805_reg { #define SWITCH2_EN BIT(6) #define SWITCH1_EN BIT(5) #define DEV_OFF_RST BIT(3) +#define DEV_RST BIT(2) #define DEV_OFF BIT(0) #define RTC_STOP BIT(0) -- cgit v1.2.3 From 68fa55f0e05ce371c4b5de7932d9f570d61bf791 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Fri, 11 Feb 2022 10:23:46 +0530 Subject: perf/marvell: cn10k DDR perf event core ownership As DDR perf event counters are not per core, so they should be accessed only by one core at a time. Select new core when previously owning core is going offline. Signed-off-by: Bharat Bhushan Reviewed-by: Bhaskara Budiredla Link: https://lore.kernel.org/r/20220211045346.17894-5-bbhushan2@marvell.com Signed-off-by: Will Deacon --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 411a428ace4d..2bc550ac8dc7 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -231,6 +231,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE, CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE, + CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, -- cgit v1.2.3 From 56edb6c25f11f25df153f4804f2d5bced2b49a9e Mon Sep 17 00:00:00 2001 From: Eric Snowberg Date: Tue, 25 Jan 2022 21:58:30 -0500 Subject: KEYS: store reference to machine keyring Expose the .machine keyring created in integrity code by adding a reference. Store a reference to the machine keyring in system keyring code. The system keyring code needs this to complete the keyring link to the machine keyring. Signed-off-by: Eric Snowberg Reviewed-by: Jarkko Sakkinen Tested-by: Mimi Zohar Signed-off-by: Jarkko Sakkinen --- include/keys/system_keyring.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h index 6acd3cf13a18..98c9b10cdc17 100644 --- a/include/keys/system_keyring.h +++ b/include/keys/system_keyring.h @@ -38,6 +38,14 @@ extern int restrict_link_by_builtin_and_secondary_trusted( #define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted #endif +#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING +extern void __init set_machine_trusted_keys(struct key *keyring); +#else +static inline void __init set_machine_trusted_keys(struct key *keyring) +{ +} +#endif + extern struct pkcs7_message *pkcs7; #ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING extern int mark_hash_blacklisted(const char *hash); -- cgit v1.2.3 From 087aa4ed379054951cb3c8ccaa0c4dbafd903c01 Mon Sep 17 00:00:00 2001 From: Eric Snowberg Date: Tue, 25 Jan 2022 21:58:31 -0500 Subject: KEYS: Introduce link restriction for machine keys Introduce a new link restriction that includes the trusted builtin, secondary and machine keys. The restriction is based on the key to be added being vouched for by a key in any of these three keyrings. With the introduction of the machine keyring, the end-user may choose to trust Machine Owner Keys (MOK) within the kernel. If they have chosen to trust them, the .machine keyring will contain these keys. If not, the machine keyring will always be empty. Update the restriction check to allow the secondary trusted keyring to also trust machine keys. Allow the .machine keyring to be linked to the secondary_trusted_keys. After the link is created, keys contained in the .machine keyring will automatically be searched when searching secondary_trusted_keys. Suggested-by: Mimi Zohar Signed-off-by: Eric Snowberg Reviewed-by: Jarkko Sakkinen Tested-by: Mimi Zohar Signed-off-by: Jarkko Sakkinen --- include/keys/system_keyring.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h index 98c9b10cdc17..2419a735420f 100644 --- a/include/keys/system_keyring.h +++ b/include/keys/system_keyring.h @@ -39,8 +39,14 @@ extern int restrict_link_by_builtin_and_secondary_trusted( #endif #ifdef CONFIG_INTEGRITY_MACHINE_KEYRING +extern int restrict_link_by_builtin_secondary_and_machine( + struct key *dest_keyring, + const struct key_type *type, + const union key_payload *payload, + struct key *restrict_key); extern void __init set_machine_trusted_keys(struct key *keyring); #else +#define restrict_link_by_builtin_secondary_and_machine restrict_link_by_builtin_trusted static inline void __init set_machine_trusted_keys(struct key *keyring) { } -- cgit v1.2.3 From d3cff4a95ed78ca192fc4bbb2743d13b7a6cc555 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 28 Jan 2022 11:56:55 -0800 Subject: KEYS: remove support for asym_tpm keys asym_tpm keys are tied to TPM v1.2, which uses outdated crypto and has been deprecated in favor of TPM v2.0 for over 7 years. A very quick look at this code also immediately found some memory safety bugs (https://lore.kernel.org/r/20220113235440.90439-2-ebiggers@kernel.org). Note that this code is reachable by unprivileged users. According to Jarkko (one of the keyrings subsystem maintainers), this code has no practical use cases, and he isn't willing to maintain it (https://lore.kernel.org/r/YfFZPbKkgYJGWu1Q@iki.fi). Therefore, let's remove it. Note that this feature didn't have any documentation or tests, so we don't need to worry about removing those. Cc: David Howells Cc: Denis Kenzior Cc: James Morris Cc: Jarkko Sakkinen Cc: Marcel Holtmann Signed-off-by: Eric Biggers Reviewed-by: Jarkko Sakkinen Acked-by: Ard Biesheuvel Signed-off-by: Jarkko Sakkinen --- include/crypto/asym_tpm_subtype.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 include/crypto/asym_tpm_subtype.h (limited to 'include') diff --git a/include/crypto/asym_tpm_subtype.h b/include/crypto/asym_tpm_subtype.h deleted file mode 100644 index 48198c36d6b9..000000000000 --- a/include/crypto/asym_tpm_subtype.h +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#ifndef _LINUX_ASYM_TPM_SUBTYPE_H -#define _LINUX_ASYM_TPM_SUBTYPE_H - -#include - -struct tpm_key { - void *blob; - u32 blob_len; - uint16_t key_len; /* Size in bits of the key */ - const void *pub_key; /* pointer inside blob to the public key bytes */ - uint16_t pub_key_len; /* length of the public key */ -}; - -struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len); - -extern struct asymmetric_key_subtype asym_tpm_subtype; - -#endif /* _LINUX_ASYM_TPM_SUBTYPE_H */ -- cgit v1.2.3 From 64807c2321512f67959e51f09e302c145c336184 Mon Sep 17 00:00:00 2001 From: Arun Ramadoss Date: Mon, 7 Mar 2022 21:45:14 +0530 Subject: net: phy: exported the genphy_read_master_slave function genphy_read_master_slave function allows to configure the master/slave for gigabit phys only. In order to use this function irrespective of speed, moved the speed check to the genphy_read_status call. Signed-off-by: Arun Ramadoss Reviewed-by: Andrew Lunn Signed-off-by: Paolo Abeni --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index cd08cf1a8b0d..20beeaa7443b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1578,6 +1578,7 @@ int genphy_update_link(struct phy_device *phydev); int genphy_read_lpa(struct phy_device *phydev); int genphy_read_status_fixed(struct phy_device *phydev); int genphy_read_status(struct phy_device *phydev); +int genphy_read_master_slave(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); int genphy_loopback(struct phy_device *phydev, bool enable); -- cgit v1.2.3 From 1280f12f56a15abde23503ba876343e5f201c9c2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Feb 2022 18:56:03 +0000 Subject: drivers/perf: arm_pmu: Handle 47 bit counters The current ARM PMU framework can only deal with 32 or 64bit counters. Teach it about a 47bit flavour. Yes, this is odd. Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 2512e2f9cd4e..0407a38b470a 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -26,6 +26,8 @@ */ /* Event uses a 64bit counter */ #define ARMPMU_EVT_64BIT 1 +/* Event uses a 47bit counter */ +#define ARMPMU_EVT_47BIT 2 #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) PERF_COUNT_HW_CACHE_##_x -- cgit v1.2.3 From 015044e9610c8523794ea6cb55d5388bc00ba96a Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Mon, 7 Mar 2022 14:21:58 +0200 Subject: ASoC: dt-bindings: Document Microchip's PDMC Add DT bindings for the new Microchip PDMC embedded in sama7g5 SoCs. Signed-off-by: Codrin Ciubotariu Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220307122202.2251639-3-codrin.ciubotariu@microchip.com Signed-off-by: Mark Brown --- include/dt-bindings/sound/microchip,pdmc.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/dt-bindings/sound/microchip,pdmc.h (limited to 'include') diff --git a/include/dt-bindings/sound/microchip,pdmc.h b/include/dt-bindings/sound/microchip,pdmc.h new file mode 100644 index 000000000000..96cde94ce74f --- /dev/null +++ b/include/dt-bindings/sound/microchip,pdmc.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __DT_BINDINGS_MICROCHIP_PDMC_H__ +#define __DT_BINDINGS_MICROCHIP_PDMC_H__ + +/* PDM microphone's pin placement */ +#define MCHP_PDMC_DS0 0 +#define MCHP_PDMC_DS1 1 + +/* PDM microphone clock edge sampling */ +#define MCHP_PDMC_CLK_POSITIVE 0 +#define MCHP_PDMC_CLK_NEGATIVE 1 + +#endif /* __DT_BINDINGS_MICROCHIP_PDMC_H__ */ -- cgit v1.2.3 From a8749a35c39903120ec421ef2525acc8e0daa55c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 8 Mar 2022 04:47:22 -0500 Subject: mm: vmalloc: introduce array allocation functions Linux has dozens of occurrences of vmalloc(array_size()) and vzalloc(array_size()). Allow to simplify the code by providing vmalloc_array and vcalloc, as well as the underscored variants that let the caller specify the GFP flags. Acked-by: Michal Hocko Signed-off-by: Paolo Bonzini --- include/linux/vmalloc.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 880227b9f044..d1bbd4fd50c5 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -159,6 +159,11 @@ void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) __alloc_size(1); void *vmalloc_no_huge(unsigned long size) __alloc_size(1); +extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); +extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2); +extern void *__vcalloc(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); +extern void *vcalloc(size_t n, size_t size) __alloc_size(1, 2); + extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); -- cgit v1.2.3 From ee0a4dc9f317fb9a97f20037d219802ca8de939b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Mar 2022 17:28:38 +0100 Subject: Revert "netfilter: conntrack: tag conntracks picked up in local out hook" This was a prerequisite for the ill-fated "netfilter: nat: force port remap to prevent shadowing well-known ports". As this has been reverted, this change can be backed out too. Signed-off-by: Florian Westphal --- include/net/netfilter/nf_conntrack.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8731d5bcb47d..b08b70989d2c 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -97,7 +97,6 @@ struct nf_conn { unsigned long status; u16 cpu; - u16 local_origin:1; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) -- cgit v1.2.3 From a99a3e2efaf1f4454eb5c9176f47e66de075b134 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 31 Jan 2022 11:50:46 -0600 Subject: coredump: Move definition of struct coredump_params into coredump.h Move the definition of struct coredump_params into coredump.h where it belongs. Remove the slightly errorneous comment explaining why struct coredump_params was declared in binfmts.h. Signed-off-by: "Eric W. Biederman" --- include/linux/binfmts.h | 13 +------------ include/linux/coredump.h | 12 +++++++++++- 2 files changed, 12 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 5d651c219c99..3dc20c4f394c 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -8,6 +8,7 @@ #include struct filename; +struct coredump_params; #define CORENAME_MAX_SIZE 128 @@ -77,18 +78,6 @@ struct linux_binprm { #define BINPRM_FLAGS_PRESERVE_ARGV0_BIT 3 #define BINPRM_FLAGS_PRESERVE_ARGV0 (1 << BINPRM_FLAGS_PRESERVE_ARGV0_BIT) -/* Function parameter for binfmt->coredump */ -struct coredump_params { - const kernel_siginfo_t *siginfo; - struct pt_regs *regs; - struct file *file; - unsigned long limit; - unsigned long mm_flags; - loff_t written; - loff_t pos; - loff_t to_skip; -}; - /* * This structure defines the functions that are used to load the binary formats that * linux accepts. diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 248a68c668b4..2ee1460a1d66 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -14,11 +14,21 @@ struct core_vma_metadata { unsigned long dump_size; }; +struct coredump_params { + const kernel_siginfo_t *siginfo; + struct pt_regs *regs; + struct file *file; + unsigned long limit; + unsigned long mm_flags; + loff_t written; + loff_t pos; + loff_t to_skip; +}; + /* * These are the only things you should do on a core-file: use only these * functions to write out all the necessary info. */ -struct coredump_params; extern void dump_skip_to(struct coredump_params *cprm, unsigned long to); extern void dump_skip(struct coredump_params *cprm, size_t nr); extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); -- cgit v1.2.3 From 95c5436a4883841588dae86fb0b9325f47ba5ad3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 8 Mar 2022 12:55:29 -0600 Subject: coredump: Snapshot the vmas in do_coredump Move the call of dump_vma_snapshot and kvfree(vma_meta) out of the individual coredump routines into do_coredump itself. This makes the code less error prone and easier to maintain. Make the vma snapshot available to the coredump routines in struct coredump_params. This makes it easier to change and update what is captures in the vma snapshot and will be needed for fixing fill_file_notes. Reviewed-by: Jann Horn Reviewed-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/coredump.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 2ee1460a1d66..7d05370e555e 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -23,6 +23,9 @@ struct coredump_params { loff_t written; loff_t pos; loff_t to_skip; + int vma_count; + size_t vma_data_size; + struct core_vma_metadata *vma_meta; }; /* @@ -35,9 +38,6 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len); -int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, - struct core_vma_metadata **vma_meta, - size_t *vma_data_size_ptr); extern void do_coredump(const kernel_siginfo_t *siginfo); #else static inline void do_coredump(const kernel_siginfo_t *siginfo) {} -- cgit v1.2.3 From a759de6991b35ad437adba32b5f0cb2fd9e75929 Mon Sep 17 00:00:00 2001 From: Youngjin Jang Date: Tue, 8 Mar 2022 04:07:39 +0900 Subject: PM: sleep: Add device name to suspend_report_result() Currently, suspend_report_result() prints only function information. If any driver uses a common PM function, nobody knows who exactly called the failing function. A device pinter is needed to recognize the failing device. For example: PM: dpm_run_callback(): pnp_bus_suspend+0x0/0x10 returns 0 PM: dpm_run_callback(): pci_pm_suspend+0x0/0x150 returns 0 become after the change: serial 00:05: PM: dpm_run_callback(): pnp_bus_suspend+0x0/0x10 returns 0 pci 0000:00:01.3: PM: dpm_run_callback(): pci_pm_suspend+0x0/0x150 returns 0 Signed-off-by: Youngjin Jang [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index f7d2be686359..e65b3ab28377 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -770,11 +770,11 @@ extern int dpm_suspend_late(pm_message_t state); extern int dpm_suspend(pm_message_t state); extern int dpm_prepare(pm_message_t state); -extern void __suspend_report_result(const char *function, void *fn, int ret); +extern void __suspend_report_result(const char *function, struct device *dev, void *fn, int ret); -#define suspend_report_result(fn, ret) \ +#define suspend_report_result(dev, fn, ret) \ do { \ - __suspend_report_result(__func__, fn, ret); \ + __suspend_report_result(__func__, dev, fn, ret); \ } while (0) extern int device_pm_wait_for_dev(struct device *sub, struct device *dev); @@ -814,7 +814,7 @@ static inline int dpm_suspend_start(pm_message_t state) return 0; } -#define suspend_report_result(fn, ret) do {} while (0) +#define suspend_report_result(dev, fn, ret) do {} while (0) static inline int device_pm_wait_for_dev(struct device *a, struct device *b) { -- cgit v1.2.3 From 390031c942116d4733310f0684beb8db19885fe6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 8 Mar 2022 13:04:19 -0600 Subject: coredump: Use the vma snapshot in fill_files_note Matthew Wilcox reported that there is a missing mmap_lock in file_files_note that could possibly lead to a user after free. Solve this by using the existing vma snapshot for consistency and to avoid the need to take the mmap_lock anywhere in the coredump code except for dump_vma_snapshot. Update the dump_vma_snapshot to capture vm_pgoff and vm_file that are neeeded by fill_files_note. Add free_vma_snapshot to free the captured values of vm_file. Reported-by: Matthew Wilcox Link: https://lkml.kernel.org/r/20220131153740.2396974-1-willy@infradead.org Cc: stable@vger.kernel.org Fixes: a07279c9a8cd ("binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot") Fixes: 2aa362c49c31 ("coredump: extend core dump note section to contain file names of mapped files") Reviewed-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/coredump.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 7d05370e555e..08a1d3e7e46d 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -12,6 +12,8 @@ struct core_vma_metadata { unsigned long start, end; unsigned long flags; unsigned long dump_size; + unsigned long pgoff; + struct file *file; }; struct coredump_params { -- cgit v1.2.3 From c57bef0287dd5deeabaea5727950559fb9037cd9 Mon Sep 17 00:00:00 2001 From: Barret Rhoden Date: Thu, 6 Jan 2022 12:20:40 -0500 Subject: prlimit: make do_prlimit() static There are no other callers in the kernel. Fixed up a comment format and whitespace issue when moving do_prlimit() higher in sys.c. Signed-off-by: Barret Rhoden Link: https://lkml.kernel.org/r/20220106172041.522167-3-brho@google.com Signed-off-by: Eric W. Biederman --- include/linux/resource.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/resource.h b/include/linux/resource.h index bdf491cbcab7..4fdbc0c3f315 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -8,7 +8,5 @@ struct task_struct; void getrusage(struct task_struct *p, int who, struct rusage *ru); -int do_prlimit(struct task_struct *tsk, unsigned int resource, - struct rlimit *new_rlim, struct rlimit *old_rlim); #endif -- cgit v1.2.3 From 18c91bb2d87268d23868bf13508f5bc9cf04e89a Mon Sep 17 00:00:00 2001 From: Barret Rhoden Date: Thu, 6 Jan 2022 12:20:41 -0500 Subject: prlimit: do not grab the tasklist_lock Unnecessarily grabbing the tasklist_lock can be a scalability bottleneck for workloads that also must grab the tasklist_lock for waiting, killing, and cloning. The tasklist_lock was grabbed to protect tsk->sighand from disappearing (becoming NULL). tsk->signal was already protected by holding a reference to tsk. update_rlimit_cpu() assumed tsk->sighand != NULL. With this commit, it attempts to lock_task_sighand(). However, this means that update_rlimit_cpu() can fail. This only happens when a task is exiting. Note that during exec, sighand may *change*, but it will not be NULL. Prior to this commit, the do_prlimit() ensured that update_rlimit_cpu() would not fail by read locking the tasklist_lock and checking tsk->sighand != NULL. If update_rlimit_cpu() fails, there may be other tasks that are not exiting that share tsk->signal. However, the group_leader is the last task to be released, so if we cannot update_rlimit_cpu(group_leader), then the entire process is exiting. The only other caller of update_rlimit_cpu() is selinux_bprm_committing_creds(). It has tsk == current, so update_rlimit_cpu() cannot fail (current->sighand cannot disappear until current exits). This change resulted in a 14% speedup on a microbenchmark where parents kill and wait on their children, and children getpriority, setpriority, and getrlimit. Signed-off-by: Barret Rhoden Link: https://lkml.kernel.org/r/20220106172041.522167-4-brho@google.com Signed-off-by: Eric W. Biederman --- include/linux/posix-timers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 5bbcd280bfd2..9cf126c3b27f 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -253,7 +253,7 @@ void posix_cpu_timers_exit_group(struct task_struct *task); void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, u64 *newval, u64 *oldval); -void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); +int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); void posixtimer_rearm(struct kernel_siginfo *info); #endif -- cgit v1.2.3 From 6914b82f37215ef510f9867bec14d5150fd970e5 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 22 Feb 2022 02:15:28 +0100 Subject: dt-bindings: clock: add QCOM SM6350 display clock bindings Add device tree bindings for display clock controller for Qualcomm Technology Inc's SM6350 SoC. Signed-off-by: Konrad Dybcio Reviewed-by: Rob Herring Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220222011534.3502-1-konrad.dybcio@somainline.org --- include/dt-bindings/clock/qcom,dispcc-sm6350.h | 48 ++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,dispcc-sm6350.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,dispcc-sm6350.h b/include/dt-bindings/clock/qcom,dispcc-sm6350.h new file mode 100644 index 000000000000..cb54aae2723e --- /dev/null +++ b/include/dt-bindings/clock/qcom,dispcc-sm6350.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2021, Konrad Dybcio + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_DISP_CC_SM6350_H +#define _DT_BINDINGS_CLK_QCOM_DISP_CC_SM6350_H + +/* DISP_CC clocks */ +#define DISP_CC_PLL0 0 +#define DISP_CC_MDSS_AHB_CLK 1 +#define DISP_CC_MDSS_AHB_CLK_SRC 2 +#define DISP_CC_MDSS_BYTE0_CLK 3 +#define DISP_CC_MDSS_BYTE0_CLK_SRC 4 +#define DISP_CC_MDSS_BYTE0_DIV_CLK_SRC 5 +#define DISP_CC_MDSS_BYTE0_INTF_CLK 6 +#define DISP_CC_MDSS_DP_AUX_CLK 7 +#define DISP_CC_MDSS_DP_AUX_CLK_SRC 8 +#define DISP_CC_MDSS_DP_CRYPTO_CLK 9 +#define DISP_CC_MDSS_DP_CRYPTO_CLK_SRC 10 +#define DISP_CC_MDSS_DP_LINK_CLK 11 +#define DISP_CC_MDSS_DP_LINK_CLK_SRC 12 +#define DISP_CC_MDSS_DP_LINK_DIV_CLK_SRC 13 +#define DISP_CC_MDSS_DP_LINK_INTF_CLK 14 +#define DISP_CC_MDSS_DP_PIXEL_CLK 15 +#define DISP_CC_MDSS_DP_PIXEL_CLK_SRC 16 +#define DISP_CC_MDSS_ESC0_CLK 17 +#define DISP_CC_MDSS_ESC0_CLK_SRC 18 +#define DISP_CC_MDSS_MDP_CLK 19 +#define DISP_CC_MDSS_MDP_CLK_SRC 20 +#define DISP_CC_MDSS_MDP_LUT_CLK 21 +#define DISP_CC_MDSS_NON_GDSC_AHB_CLK 22 +#define DISP_CC_MDSS_PCLK0_CLK 23 +#define DISP_CC_MDSS_PCLK0_CLK_SRC 24 +#define DISP_CC_MDSS_ROT_CLK 25 +#define DISP_CC_MDSS_ROT_CLK_SRC 26 +#define DISP_CC_MDSS_RSCC_AHB_CLK 27 +#define DISP_CC_MDSS_RSCC_VSYNC_CLK 28 +#define DISP_CC_MDSS_VSYNC_CLK 29 +#define DISP_CC_MDSS_VSYNC_CLK_SRC 30 +#define DISP_CC_SLEEP_CLK 31 +#define DISP_CC_XO_CLK 32 + +/* GDSCs */ +#define MDSS_GDSC 0 + +#endif -- cgit v1.2.3 From 7b91b9d8cc6c374cf41f772dd7191ea48011621f Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 22 Feb 2022 02:15:30 +0100 Subject: dt-bindings: clock: add SM6350 QCOM Graphics clock bindings Add device tree bindings for graphics clock controller for Qualcomm Technology Inc's SM6350 SoCs. Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Konrad Dybcio Reviewed-by: Rob Herring Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220222011534.3502-3-konrad.dybcio@somainline.org --- include/dt-bindings/clock/qcom,gpucc-sm6350.h | 37 +++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,gpucc-sm6350.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gpucc-sm6350.h b/include/dt-bindings/clock/qcom,gpucc-sm6350.h new file mode 100644 index 000000000000..68e814fc8acd --- /dev/null +++ b/include/dt-bindings/clock/qcom,gpucc-sm6350.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2021, Konrad Dybcio + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_SM6350_H +#define _DT_BINDINGS_CLK_QCOM_GPU_CC_SM6350_H + +/* GPU_CC clocks */ +#define GPU_CC_PLL0 0 +#define GPU_CC_PLL1 1 +#define GPU_CC_ACD_AHB_CLK 2 +#define GPU_CC_ACD_CXO_CLK 3 +#define GPU_CC_AHB_CLK 4 +#define GPU_CC_CRC_AHB_CLK 5 +#define GPU_CC_CX_GFX3D_CLK 6 +#define GPU_CC_CX_GFX3D_SLV_CLK 7 +#define GPU_CC_CX_GMU_CLK 8 +#define GPU_CC_CX_SNOC_DVM_CLK 9 +#define GPU_CC_CXO_AON_CLK 10 +#define GPU_CC_CXO_CLK 11 +#define GPU_CC_GMU_CLK_SRC 12 +#define GPU_CC_GX_CXO_CLK 13 +#define GPU_CC_GX_GFX3D_CLK 14 +#define GPU_CC_GX_GFX3D_CLK_SRC 15 +#define GPU_CC_GX_GMU_CLK 16 +#define GPU_CC_GX_VSENSE_CLK 17 + +/* CLK_HW */ +#define GPU_CC_CRC_DIV 0 + +/* GDSCs */ +#define GPU_CX_GDSC 0 +#define GPU_GX_GDSC 1 + +#endif -- cgit v1.2.3 From b565d66403e3df303a058c0d8d00d0fc6aeb2ddc Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Sat, 26 Feb 2022 14:52:31 +0100 Subject: dt-bindings: clock: add ipq8064 ce5 clk define Add ipq8064 ce5 clk define needed for CryptoEngine in gcc driver. Define CE5_SRC is not used so it's OK to change and we align it to the QSDK naming. Signed-off-by: Ansuel Smith Acked-by: Rob Herring Reviewed-by: Stephen Boyd Tested-by: Jonathan McDowell Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220226135235.10051-12-ansuelsmth@gmail.com --- include/dt-bindings/clock/qcom,gcc-ipq806x.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-ipq806x.h b/include/dt-bindings/clock/qcom,gcc-ipq806x.h index 7deec14a6dee..02262d2ac899 100644 --- a/include/dt-bindings/clock/qcom,gcc-ipq806x.h +++ b/include/dt-bindings/clock/qcom,gcc-ipq806x.h @@ -240,7 +240,7 @@ #define PLL14 232 #define PLL14_VOTE 233 #define PLL18 234 -#define CE5_SRC 235 +#define CE5_A_CLK 235 #define CE5_H_CLK 236 #define CE5_CORE_CLK 237 #define CE3_SLEEP_CLK 238 @@ -283,5 +283,8 @@ #define EBI2_AON_CLK 281 #define NSSTCM_CLK_SRC 282 #define NSSTCM_CLK 283 +#define CE5_A_CLK_SRC 285 +#define CE5_H_CLK_SRC 286 +#define CE5_CORE_CLK_SRC 287 #endif -- cgit v1.2.3 From 887646c47d5c6fc07d281f6f728270bf04331b69 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Sat, 26 Feb 2022 14:52:33 +0100 Subject: dt-bindings: reset: add ipq8064 ce5 resets Add ipq8064 ce5 resets needed for CryptoEngine gcc driver. Signed-off-by: Ansuel Smith Reviewed-by: Bjorn Andersson Acked-by: Philipp Zabel Acked-by: Rob Herring Reviewed-by: Stephen Boyd Tested-by: Jonathan McDowell Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220226135235.10051-14-ansuelsmth@gmail.com --- include/dt-bindings/reset/qcom,gcc-ipq806x.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/reset/qcom,gcc-ipq806x.h b/include/dt-bindings/reset/qcom,gcc-ipq806x.h index 26b6f9200620..020c9cf18751 100644 --- a/include/dt-bindings/reset/qcom,gcc-ipq806x.h +++ b/include/dt-bindings/reset/qcom,gcc-ipq806x.h @@ -163,5 +163,10 @@ #define NSS_CAL_PRBS_RST_N_RESET 154 #define NSS_LCKDT_RST_N_RESET 155 #define NSS_SRDS_N_RESET 156 +#define CRYPTO_ENG1_RESET 157 +#define CRYPTO_ENG2_RESET 158 +#define CRYPTO_ENG3_RESET 159 +#define CRYPTO_ENG4_RESET 160 +#define CRYPTO_AHB_RESET 161 #endif -- cgit v1.2.3 From 5b2fa289c0d475bcae3a86fa04b81f1f678cc4d2 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 26 Feb 2022 22:41:24 +0100 Subject: dt-bindings: clock: qcom: rpmcc: Add RPM Modem SubSystem (MSS) clocks Add the missing clock definitions. Signed-off-by: Konrad Dybcio Acked-by: Rob Herring Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220226214126.21209-1-konrad.dybcio@somainline.org --- include/dt-bindings/clock/qcom,rpmcc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,rpmcc.h b/include/dt-bindings/clock/qcom,rpmcc.h index fb624ff39273..015db95303d1 100644 --- a/include/dt-bindings/clock/qcom,rpmcc.h +++ b/include/dt-bindings/clock/qcom,rpmcc.h @@ -165,5 +165,7 @@ #define RPM_SMD_PKA_A_CLK 119 #define RPM_SMD_CPUSS_GNOC_CLK 120 #define RPM_SMD_CPUSS_GNOC_A_CLK 121 +#define RPM_SMD_MSS_CFG_AHB_CLK 122 +#define RPM_SMD_MSS_CFG_AHB_A_CLK 123 #endif -- cgit v1.2.3 From 41d36a9f3e5336f5b48c3adba0777b8e217020d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Mar 2022 07:05:28 +0100 Subject: fs: remove kiocb.ki_hint This field is entirely unused now except for a tracepoint in f2fs, so remove it. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220308060529.736277-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/fs.h | 12 ------------ include/trace/events/f2fs.h | 3 +-- 2 files changed, 1 insertion(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index e2d892b201b0..d5658ac5d8c6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -327,7 +327,6 @@ struct kiocb { void (*ki_complete)(struct kiocb *iocb, long ret); void *private; int ki_flags; - u16 ki_hint; u16 ki_ioprio; /* See linux/ioprio.h */ struct wait_page_queue *ki_waitq; /* for async buffered IO */ randomized_struct_fields_end @@ -2225,21 +2224,11 @@ static inline enum rw_hint file_write_hint(struct file *file) static inline int iocb_flags(struct file *file); -static inline u16 ki_hint_validate(enum rw_hint hint) -{ - typeof(((struct kiocb *)0)->ki_hint) max_hint = -1; - - if (hint <= max_hint) - return hint; - return 0; -} - static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { .ki_filp = filp, .ki_flags = iocb_flags(filp), - .ki_hint = ki_hint_validate(file_write_hint(filp)), .ki_ioprio = get_current_ioprio(), }; } @@ -2250,7 +2239,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, *kiocb = (struct kiocb) { .ki_filp = filp, .ki_flags = kiocb_src->ki_flags, - .ki_hint = kiocb_src->ki_hint, .ki_ioprio = kiocb_src->ki_ioprio, .ki_pos = kiocb_src->ki_pos, }; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index f701bb23f83c..1779e133cea0 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -956,12 +956,11 @@ TRACE_EVENT(f2fs_direct_IO_enter, __entry->rw = rw; ), - TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu ki_flags = %x ki_hint = %x ki_ioprio = %x rw = %d", + TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu ki_flags = %x ki_ioprio = %x rw = %d", show_dev_ino(__entry), __entry->iocb->ki_pos, __entry->len, __entry->iocb->ki_flags, - __entry->iocb->ki_hint, __entry->iocb->ki_ioprio, __entry->rw) ); -- cgit v1.2.3 From 7b12e49669c99f63bc12351c57e581f1f14d4adf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Mar 2022 07:05:29 +0100 Subject: fs: remove fs.f_write_hint The value is now completely unused except for reporting it back through the F_GET_FILE_RW_HINT ioctl, so remove the value and the two ioctls for it. Trying to use the F_SET_FILE_RW_HINT and F_GET_FILE_RW_HINT fcntls will now return EINVAL, just like it would on a kernel that never supported this functionality in the first place. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220308060529.736277-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/fs.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d5658ac5d8c6..a1fc3b41cd82 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -966,7 +966,6 @@ struct file { * Must not be taken from IRQ context. */ spinlock_t f_lock; - enum rw_hint f_write_hint; atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; @@ -2214,14 +2213,6 @@ static inline bool HAS_UNMAPPED_ID(struct user_namespace *mnt_userns, !gid_valid(i_gid_into_mnt(mnt_userns, inode)); } -static inline enum rw_hint file_write_hint(struct file *file) -{ - if (file->f_write_hint != WRITE_LIFE_NOT_SET) - return file->f_write_hint; - - return file_inode(file)->i_write_hint; -} - static inline int iocb_flags(struct file *file); static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) -- cgit v1.2.3 From 4e5cc99e1e485954a9c09872e0eeea570fb2b5a5 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 8 Mar 2022 15:32:19 +0800 Subject: blk-mq: manage hctx map via xarray First code becomes more clean by switching to xarray from plain array. Second use-after-free on q->queue_hw_ctx can be fixed because queue_for_each_hw_ctx() may be run when updating nr_hw_queues is in-progress. With this patch, q->hctx_table is defined as xarray, and this structure will share same lifetime with request queue, so queue_for_each_hw_ctx() can use q->hctx_table to lookup hctx reliably. Reported-by: Yu Kuai Signed-off-by: Ming Lei Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220308073219.91173-7-ming.lei@redhat.com [axboe: fix blk_mq_hw_ctx forward declaration] Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +-- include/linux/blkdev.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 3a41d50b85d3..7aa5c54901a9 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -917,8 +917,7 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) } #define queue_for_each_hw_ctx(q, hctx, i) \ - for ((i) = 0; (i) < (q)->nr_hw_queues && \ - ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) + xa_for_each(&(q)->hctx_table, (i), (hctx)) #define hctx_for_each_ctx(hctx, ctx, i) \ for ((i) = 0; (i) < (hctx)->nr_ctx && \ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e19947d84f12..d982171469bc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -355,7 +355,7 @@ struct request_queue { unsigned int queue_depth; /* hw dispatch queues */ - struct blk_mq_hw_ctx **queue_hw_ctx; + struct xarray hctx_table; unsigned int nr_hw_queues; /* -- cgit v1.2.3 From e7f76552277ca79d9dfe4c59f7b3620a98467f9c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Mar 2022 06:51:49 +0100 Subject: scsi: don't use disk->private_data to find the scsi_driver Requiring every ULP to have the scsi_drive as first member of the private data is rather fragile and not necessary anyway. Just use the driver hanging off the SCSI device instead. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220308055200.735835-4-hch@lst.de Signed-off-by: Jens Axboe --- include/scsi/scsi_cmnd.h | 9 --------- include/scsi/scsi_driver.h | 9 +++++++-- 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 6794d7322cbd..e3a4c67794b1 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -13,7 +13,6 @@ #include struct Scsi_Host; -struct scsi_driver; /* * MAX_COMMAND_SIZE is: @@ -159,14 +158,6 @@ static inline void *scsi_cmd_priv(struct scsi_cmnd *cmd) return cmd + 1; } -/* make sure not to use it with passthrough commands */ -static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd) -{ - struct request *rq = scsi_cmd_to_rq(cmd); - - return *(struct scsi_driver **)rq->q->disk->private_data; -} - void scsi_done(struct scsi_cmnd *cmd); extern void scsi_finish_command(struct scsi_cmnd *cmd); diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 6dffa8555a39..4ce1988b2ba0 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -4,11 +4,10 @@ #include #include +#include struct module; struct request; -struct scsi_cmnd; -struct scsi_device; struct scsi_driver { struct device_driver gendrv; @@ -31,4 +30,10 @@ extern int scsi_register_interface(struct class_interface *); #define scsi_unregister_interface(intf) \ class_interface_unregister(intf) +/* make sure not to use it with passthrough commands */ +static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd) +{ + return to_scsi_driver(cmd->device->sdev_gendev.driver); +} + #endif /* _SCSI_SCSI_DRIVER_H */ -- cgit v1.2.3 From 0eb4e7ee1655b7ffd3204a35d77b809d42613cb9 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 7 Mar 2022 12:44:31 -0800 Subject: mptcp: add tracepoint in mptcp_sendmsg_frag The tracepoint in get_mapping_status() only dumped the incoming mpext fields. This patch added a new tracepoint in mptcp_sendmsg_frag() to dump the outgoing mpext too. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/trace/events/mptcp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index 6bf43176f14c..f8e28e686c65 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -115,6 +115,10 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, __entry->csum_reqd) ); +DEFINE_EVENT(mptcp_dump_mpext, mptcp_sendmsg_frag, + TP_PROTO(struct mptcp_ext *mpext), + TP_ARGS(mpext)); + DEFINE_EVENT(mptcp_dump_mpext, get_mapping_status, TP_PROTO(struct mptcp_ext *mpext), TP_ARGS(mpext)); -- cgit v1.2.3 From d045b9eb95a9b611c483897a69e7285aefdc66d7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 7 Mar 2022 12:44:36 -0800 Subject: mptcp: introduce implicit endpoints In some edge scenarios, an MPTCP subflows can use a local address mapped by a "implicit" endpoint created by the in-kernel path manager. Such endpoints presence can be confusing, as it's creation is hard to track and will prevent the later endpoint creation from the user-space using the same address. Define a new endpoint flag to mark implicit endpoints and allow the user-space to replace implicit them with user-provided data at endpoint creation time. Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index f106a3941cdf..9690efedb5fa 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -81,6 +81,7 @@ enum { #define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1) #define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2) #define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3) +#define MPTCP_PM_ADDR_FLAG_IMPLICIT (1 << 4) enum { MPTCP_PM_CMD_UNSPEC, -- cgit v1.2.3 From 7e580490ac9819dd55a36be2a9b3380d1391f91b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 8 Mar 2022 11:15:15 +0200 Subject: net: dsa: felix: avoid early deletion of host FDB entries The Felix driver declares FDB isolation but puts all standalone ports in VID 0. This is mostly problem-free as discussed with Alvin here: https://patchwork.kernel.org/project/netdevbpf/cover/20220302191417.1288145-1-vladimir.oltean@nxp.com/#24763870 however there is one catch. DSA still thinks that FDB entries are installed on the CPU port as many times as there are user ports, and this is problematic when multiple user ports share the same MAC address. Consider the default case where all user ports inherit their MAC address from the DSA master, and then the user runs: ip link set swp0 address 00:01:02:03:04:05 The above will make dsa_slave_set_mac_address() call dsa_port_standalone_host_fdb_add() for 00:01:02:03:04:05 in port 0's standalone database, and dsa_port_standalone_host_fdb_del() for the old address of swp0, again in swp0's standalone database. Both the ->port_fdb_add() and ->port_fdb_del() will be propagated down to the felix driver, which will end up deleting the old MAC address from the CPU port. But this is still in use by other user ports, so we end up breaking unicast termination for them. There isn't a problem in the fact that DSA keeps track of host standalone addresses in the individual database of each user port: some drivers like sja1105 need this. There also isn't a problem in the fact that some drivers choose the same VID/FID for all standalone ports. It is just that the deletion of these host addresses must be delayed until they are known to not be in use any longer, and only the driver has this knowledge. Since DSA keeps these addresses in &cpu_dp->fdbs and &cpu_db->mdbs, it is just a matter of walking over those lists and see whether the same MAC address is present on the CPU port in the port db of another user port. I have considered reusing the generic dsa_port_walk_fdbs() and dsa_port_walk_mdbs() schemes for this, but locking makes it difficult. In the ->port_fdb_add() method and co, &dp->addr_lists_lock is held, but dsa_port_walk_fdbs() also acquires that lock. Also, even assuming that we introduce an unlocked variant of the address iterator, we'd still need some relatively complex data structures, and a void *ctx in the dsa_fdb_walk_cb_t which we don't currently pass, such that drivers are able to figure out, after iterating, whether the same MAC address is or isn't present in the port db of another port. All the above, plus the fact that I expect other drivers to follow the same model as felix where all standalone ports use the same FID, made me conclude that a generic method provided by DSA is necessary: dsa_fdb_present_in_other_db() and the mdb equivalent. Felix calls this from the ->port_fdb_del() handler for the CPU port, when the database was classified to either a port db, or a LAG db. For symmetry, we also call this from ->port_fdb_add(), because if the address was installed once, then installing it a second time serves no purpose: it's already in hardware in VID 0 and it affects all standalone ports. This change moves dsa_db_equal() from switch.c to dsa.c, since it now has one more caller. Fixes: 54c319846086 ("net: mscc: ocelot: enforce FDB isolation when VLAN-unaware") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 759479fe8573..9d16505fc0e2 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1227,6 +1227,12 @@ typedef int dsa_fdb_walk_cb_t(struct dsa_switch *ds, int port, int dsa_port_walk_fdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb); int dsa_port_walk_mdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb); +bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db); +bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); /* Keep inline for faster access in hot path */ static inline bool netdev_uses_dsa(const struct net_device *dev) -- cgit v1.2.3 From 1330b6ef3313fcec577d2b020c290dc8b9f11f1a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 7 Mar 2022 16:44:21 -0800 Subject: skb: make drop reason booleanable We have a number of cases where function returns drop/no drop decision as a boolean. Now that we want to report the reason code as well we have to pass extra output arguments. We can make the reason code evaluate correctly as bool. I believe we're good to reorder the reasons as they are reported to user space as strings. Signed-off-by: Jakub Kicinski Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/net/tcp.h | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 34f572271c0c..26538ceb4b01 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -314,6 +314,7 @@ struct sk_buff; * used to translate the reason to string. */ enum skb_drop_reason { + SKB_NOT_DROPPED_YET = 0, SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ SKB_DROP_REASON_NO_SOCKET, /* socket not found */ SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ diff --git a/include/net/tcp.h b/include/net/tcp.h index d486d7b6112d..ee8237b58e1d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1674,10 +1674,11 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, return NULL; return __tcp_md5_do_lookup(sk, l3index, addr, family); } -bool tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, - enum skb_drop_reason *reason, - const void *saddr, const void *daddr, - int family, int dif, int sdif); + +enum skb_drop_reason +tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif); #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) @@ -1688,13 +1689,13 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, { return NULL; } -static inline bool tcp_inbound_md5_hash(const struct sock *sk, - const struct sk_buff *skb, - enum skb_drop_reason *reason, - const void *saddr, const void *daddr, - int family, int dif, int sdif) + +static inline enum skb_drop_reason +tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif); { - return false; + return SKB_NOT_DROPPED_YET; } #define tcp_twsk_md5_key(twsk) NULL #endif -- cgit v1.2.3 From d8fd5a1e78db375f2246d43df7833fec07a221cd Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 1 Mar 2022 15:45:18 +0000 Subject: kasan: fix a missing header include of static_keys.h The kasan-enabled.h header relies on static keys, so make sure to include the header to avoid compilation errors (with JUMP_LABEL=n). It fixes the following: ./include/linux/kasan-enabled.h:9:1: warning: data definition has no type or storage class 9 | DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); | ^~~~~~~~~~~~~~~~~~~~~~~~ error: type defaults to 'int' in declaration of 'DECLARE_STATIC_KEY_FALSE' [-Werror=implicit-int] Fixes: f9b5e46f4097eb29 ("kasan: split kasan_*enabled() functions into a separate header") Cc: Peter Collingbourne Cc: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Acked-by: Andrey Konovalov Signed-off-by: Joey Gouly Link: https://lore.kernel.org/r/20220301154518.19456-1-joey.gouly@arm.com Signed-off-by: Will Deacon --- include/linux/kasan-enabled.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h index 4b6615375022..6f612d69ea0c 100644 --- a/include/linux/kasan-enabled.h +++ b/include/linux/kasan-enabled.h @@ -2,6 +2,8 @@ #ifndef _LINUX_KASAN_ENABLED_H #define _LINUX_KASAN_ENABLED_H +#include + #ifdef CONFIG_KASAN_HW_TAGS DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); -- cgit v1.2.3 From 7a976552a4f264f777be236c0c83263975512f1a Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Tue, 8 Mar 2022 08:43:37 -0800 Subject: ASoC: SOF: make struct snd_sof_widget IPC agnostic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parse the UUID token and save it in the new uuid field in struct snd_sof_widget. struct sof_ipc_comp_ext is no longer needed. So remove it too. Signed-off-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20220308164344.577647-12-ranjani.sridharan@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof/topology.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/sound/sof/topology.h b/include/sound/sof/topology.h index adee6afd1490..88560281d420 100644 --- a/include/sound/sof/topology.h +++ b/include/sound/sof/topology.h @@ -87,9 +87,6 @@ struct sof_ipc_comp { */ #define SOF_BUF_UNDERRUN_PERMITTED BIT(1) -/* the UUID size in bytes, shared between FW and host */ -#define SOF_UUID_SIZE 16 - /* create new component buffer - SOF_IPC_TPLG_BUFFER_NEW */ struct sof_ipc_buffer { struct sof_ipc_comp comp; @@ -303,9 +300,4 @@ enum sof_event_types { SOF_KEYWORD_DETECT_DAPM_EVENT, }; -/* extended data struct for UUID components */ -struct sof_ipc_comp_ext { - uint8_t uuid[SOF_UUID_SIZE]; -} __packed; - #endif -- cgit v1.2.3 From 839e484f9e173309d599e1281eb7221e07f41814 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Tue, 8 Mar 2022 08:43:43 -0800 Subject: ASoC: SOF: make struct snd_sof_dai IPC agnostic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the comp_dai and dai_config members of struct snd_sof_dai and replace it with a void *private field. Introduce a new struct sof_dai_private_data that will contain the pointer to these two fields. The topology parser will populate this structure and save it as part of the "private" member in snd_sof_dai. Change all users of these fields to use the private member instead. Signed-off-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20220308164344.577647-18-ranjani.sridharan@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof/dai.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h index 59ee50ac7705..a818a0f0a226 100644 --- a/include/sound/sof/dai.h +++ b/include/sound/sof/dai.h @@ -116,4 +116,9 @@ struct sof_ipc_dai_config { }; } __packed; +struct sof_dai_private_data { + struct sof_ipc_comp_dai *comp_dai; + struct sof_ipc_dai_config *dai_config; +}; + #endif -- cgit v1.2.3 From 1174442b82b6cf13328a5f9574fac3655c58ae06 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 8 Mar 2022 13:25:51 -0600 Subject: ASoC: soc-acpi: fix kernel-doc descriptor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing dmic_num mention and clarify that 'links' mean 'SoundWire links', not to be used for other links. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Link: https://lore.kernel.org/r/20220308192610.392950-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-acpi.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index fdb536d699ff..a8fd62c00f91 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -60,9 +60,10 @@ static inline struct snd_soc_acpi_mach *snd_soc_acpi_codec_list(void *arg) * @acpi_ipc_irq_index: used for BYT-CR detection * @platform: string used for HDAudio codec support * @codec_mask: used for HDAudio support + * @dmic_num: number of SoC- or chipset-attached PDM digital microphones * @common_hdmi_codec_drv: use commom HDAudio HDMI codec driver - * @link_mask: links enabled on the board - * @links: array of link _ADR descriptors, null terminated + * @link_mask: SoundWire links enabled on the board + * @links: array of SoundWire link _ADR descriptors, null terminated * @num_dai_drivers: number of elements in @dai_drivers * @dai_drivers: pointer to dai_drivers, used e.g. in nocodec mode */ -- cgit v1.2.3 From 679aa83a0fb70dcbf9e97cbdfd573e6fc8bf9b1a Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 8 Mar 2022 13:25:52 -0600 Subject: ASoC: soc-acpi: add information on I2S/TDM link mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The platform driver may have information on which I2S/TDM link(s) to enable in the machine driver. In the case of Intel devices, this may be extracted from NHLT tables in platform firmware. This link information is necessary to make sure machine driver and topology are aligned. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Link: https://lore.kernel.org/r/20220308192610.392950-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-acpi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index a8fd62c00f91..093bbe7f0e1f 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -64,6 +64,7 @@ static inline struct snd_soc_acpi_mach *snd_soc_acpi_codec_list(void *arg) * @common_hdmi_codec_drv: use commom HDAudio HDMI codec driver * @link_mask: SoundWire links enabled on the board * @links: array of SoundWire link _ADR descriptors, null terminated + * @i2s_link_mask: I2S/TDM links enabled on the board * @num_dai_drivers: number of elements in @dai_drivers * @dai_drivers: pointer to dai_drivers, used e.g. in nocodec mode */ @@ -75,6 +76,7 @@ struct snd_soc_acpi_mach_params { bool common_hdmi_codec_drv; u32 link_mask; const struct snd_soc_acpi_link_adr *links; + u32 i2s_link_mask; u32 num_dai_drivers; struct snd_soc_dai_driver *dai_drivers; }; -- cgit v1.2.3 From 0c470db0399e17310ed2ba54dd1c25cfa16ce0d3 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 8 Mar 2022 13:25:54 -0600 Subject: ALSA: intel-nhlt: add helper to detect SSP link mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The NHLT information can be used to figure out which SSPs are enabled in a platform. The 'SSP' link type is too broad for machine drivers, since it can cover the Bluetooth sideband and the analog audio codec connections, so this helper exposes a parameter to filter with the device type (DEVICE_I2S refers to analog audio codec in NHLT parlance). The helper returns a mask, since more than one SSP may be used for analog audio, e.g. the NHLT spec describes the use of SSP0 for amplifiers and SSP1 for headset codec. Note that if more than one bit is set, it's impossible to determine which SSP is connected to what external component. Additional platform-specific information based on e.g. DMI quirks would still be required in the machine driver to configure the relevant dailinks. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Acked-by: Takashi Iwai Link: https://lore.kernel.org/r/20220308192610.392950-5-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/intel-nhlt.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h index 089a760d36eb..6fb2d5e378fd 100644 --- a/include/sound/intel-nhlt.h +++ b/include/sound/intel-nhlt.h @@ -18,6 +18,13 @@ enum nhlt_link_type { NHLT_LINK_INVALID }; +enum nhlt_device_type { + NHLT_DEVICE_BT = 0, + NHLT_DEVICE_DMIC = 1, + NHLT_DEVICE_I2S = 4, + NHLT_DEVICE_INVALID +}; + #if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_INTEL_NHLT) struct wav_fmt { @@ -41,13 +48,6 @@ struct wav_fmt_ext { u8 sub_fmt[16]; } __packed; -enum nhlt_device_type { - NHLT_DEVICE_BT = 0, - NHLT_DEVICE_DMIC = 1, - NHLT_DEVICE_I2S = 4, - NHLT_DEVICE_INVALID -}; - struct nhlt_specific_cfg { u32 size; u8 caps[]; @@ -133,6 +133,9 @@ void intel_nhlt_free(struct nhlt_acpi_table *addr); int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt); bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt, u8 link_type); + +int intel_nhlt_ssp_endpoint_mask(struct nhlt_acpi_table *nhlt, u8 device_type); + struct nhlt_specific_cfg * intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, u32 bus_id, u8 link_type, u8 vbps, u8 bps, @@ -163,6 +166,11 @@ static inline bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt, return false; } +static inline int intel_nhlt_ssp_endpoint_mask(struct nhlt_acpi_table *nhlt, u8 device_type) +{ + return 0; +} + static inline struct nhlt_specific_cfg * intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, u32 bus_id, u8 link_type, u8 vbps, u8 bps, -- cgit v1.2.3 From 4694b8382d6b79bcf95995757419d279a3ab375b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 8 Mar 2022 13:25:56 -0600 Subject: ASoC: Intel: soc-acpi: quirk topology filename dynamically MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Different topology filenames may be required depending on which SSP is used, and whether or not digital mics are present. This patch adds a tplg_quirk_mask and in the case of the SOF driver adds the relevant configurations. This is a short-term solution to the ES8336 support issues. In a long-term solution, we would need an interface where the machine driver or platform driver have the ability to alter the topology hard-coded low-level hardware support, e.g. by substituting an interface for another, or disabling an interface that is not supported on a given skew. BugLink: https://github.com/thesofproject/linux/issues/3248 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Link: https://lore.kernel.org/r/20220308192610.392950-7-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-acpi.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index 093bbe7f0e1f..d33cf8df14b1 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -125,6 +125,24 @@ struct snd_soc_acpi_link_adr { const struct snd_soc_acpi_adr_device *adr_d; }; +/* + * when set the topology uses the -ssp suffix, where N is determined based on + * BIOS or DMI information + */ +#define SND_SOC_ACPI_TPLG_INTEL_SSP_NUMBER BIT(0) + +/* + * when more than one SSP is reported in the link mask, use the most significant. + * This choice was found to be valid on platforms with ES8336 codecs. + */ +#define SND_SOC_ACPI_TPLG_INTEL_SSP_MSB BIT(1) + +/* + * when set the topology uses the -dmicch suffix, where N is determined based on + * BIOS or DMI information + */ +#define SND_SOC_ACPI_TPLG_INTEL_DMIC_NUMBER BIT(2) + /** * snd_soc_acpi_mach: ACPI-based machine descriptor. Most of the fields are * related to the hardware, except for the firmware and topology file names. @@ -146,6 +164,7 @@ struct snd_soc_acpi_link_adr { * @pdata: intended for platform data or machine specific-ops. This structure * is not constant since this field may be updated at run-time * @sof_tplg_filename: Sound Open Firmware topology file name, if enabled + * @tplg_quirk_mask: quirks to select different topology files dynamically */ /* Descriptor for SST ASoC machine driver */ struct snd_soc_acpi_mach { @@ -161,6 +180,7 @@ struct snd_soc_acpi_mach { void *pdata; struct snd_soc_acpi_mach_params mach_params; const char *sof_tplg_filename; + const u32 tplg_quirk_mask; }; #define SND_SOC_ACPI_MAX_CODECS 3 -- cgit v1.2.3 From b77e70f6b8f2cc62fba847f3008a430a09ef275d Mon Sep 17 00:00:00 2001 From: ChiYuan Huang Date: Wed, 9 Mar 2022 16:01:42 +0800 Subject: regulator: Add bindings for Richtek RT5190A PMIC Add bindings for Richtek RT5190A PMIC. Signed-off-by: ChiYuan Huang Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/1646812903-32496-2-git-send-email-u0084500@gmail.com Signed-off-by: Mark Brown --- include/dt-bindings/regulator/richtek,rt5190a-regulator.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/dt-bindings/regulator/richtek,rt5190a-regulator.h (limited to 'include') diff --git a/include/dt-bindings/regulator/richtek,rt5190a-regulator.h b/include/dt-bindings/regulator/richtek,rt5190a-regulator.h new file mode 100644 index 000000000000..63f99d4c1cb3 --- /dev/null +++ b/include/dt-bindings/regulator/richtek,rt5190a-regulator.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __DT_BINDINGS_RICHTEK_RT5190A_REGULATOR_H__ +#define __DT_BINDINGS_RICHTEK_RT5190A_REGULATOR_H__ + +/* + * BUCK/LDO mode constants which may be used in devicetree properties + * (eg. regulator-allowed-modes). + * See the manufacturer's datasheet for more information on these modes. + */ + +#define RT5190A_OPMODE_AUTO 0 +#define RT5190A_OPMODE_FPWM 1 + +#endif -- cgit v1.2.3 From f804360bb3a50decbed6e2761247964dca72c080 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 26 Feb 2022 22:41:25 +0100 Subject: clk: qcom: smd: Add missing RPM clocks for msm8992/4 XO and MSS_CFG were omitted when first adding the clocks for these SoCs. Add them, and while at it, move the XO clock to the top of the definition list, as ideally everyone should start using it sooner or later.. Fixes: b4297844995f ("clk: qcom: smd: Add support for MSM8992/4 rpm clocks") Signed-off-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220226214126.21209-2-konrad.dybcio@somainline.org --- include/linux/soc/qcom/smd-rpm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h index 860dd8cdf9f3..82c9d489833a 100644 --- a/include/linux/soc/qcom/smd-rpm.h +++ b/include/linux/soc/qcom/smd-rpm.h @@ -40,6 +40,7 @@ struct qcom_smd_rpm; #define QCOM_SMD_RPM_AGGR_CLK 0x72676761 #define QCOM_SMD_RPM_HWKM_CLK 0x6d6b7768 #define QCOM_SMD_RPM_PKA_CLK 0x616b70 +#define QCOM_SMD_RPM_MCFG_CLK 0x6766636d int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm, int state, -- cgit v1.2.3 From 2dc63e768ce2fbf24cb49c858f549596bb30a0a0 Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Thu, 3 Mar 2022 02:00:41 +0530 Subject: clk: qcom: gcc: Add PCIe0 and PCIe1 GDSC for SM8150 Add the PCIe0 and PCIe1 GDSC defines & driver structures for SM8150. Cc: Stephen Boyd Cc: Bjorn Andersson Signed-off-by: Bhupesh Sharma Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220302203045.184500-4-bhupesh.sharma@linaro.org --- include/dt-bindings/clock/qcom,gcc-sm8150.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-sm8150.h b/include/dt-bindings/clock/qcom,gcc-sm8150.h index 3e1a91876610..ae9c16410420 100644 --- a/include/dt-bindings/clock/qcom,gcc-sm8150.h +++ b/include/dt-bindings/clock/qcom,gcc-sm8150.h @@ -241,6 +241,8 @@ #define GCC_USB_PHY_CFG_AHB2PHY_BCR 28 /* GCC GDSCRs */ +#define PCIE_0_GDSC 0 +#define PCIE_1_GDSC 1 #define USB30_PRIM_GDSC 4 #define USB30_SEC_GDSC 5 -- cgit v1.2.3 From 2fb605a1a529d366513fd223b4ee7c75db2db7e8 Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Thu, 3 Mar 2022 13:51:39 +0530 Subject: clk: qcom: gcc: Add UFS_CARD and UFS_PHY GDSCs for SM8150 Add the UFS_CARD and UFS_PHY GDSC defines & driver structures for SM8150. Cc: Stephen Boyd Cc: Bjorn Andersson Signed-off-by: Bhupesh Sharma Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220303082140.240745-2-bhupesh.sharma@linaro.org --- include/dt-bindings/clock/qcom,gcc-sm8150.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-sm8150.h b/include/dt-bindings/clock/qcom,gcc-sm8150.h index ae9c16410420..d70ab54c44c7 100644 --- a/include/dt-bindings/clock/qcom,gcc-sm8150.h +++ b/include/dt-bindings/clock/qcom,gcc-sm8150.h @@ -243,6 +243,8 @@ /* GCC GDSCRs */ #define PCIE_0_GDSC 0 #define PCIE_1_GDSC 1 +#define UFS_CARD_GDSC 2 +#define UFS_PHY_GDSC 3 #define USB30_PRIM_GDSC 4 #define USB30_SEC_GDSC 5 -- cgit v1.2.3 From d1a16e345578332c31aa4fb7b76e2e38249861ad Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Thu, 3 Mar 2022 14:18:21 +0530 Subject: clk: qcom: gcc: Add emac GDSC support for SM8150 Add the EMAC GDSC defines and driver structures for SM8150. Cc: Stephen Boyd Cc: Bjorn Andersson Signed-off-by: Bhupesh Sharma Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220303084824.284946-4-bhupesh.sharma@linaro.org --- include/dt-bindings/clock/qcom,gcc-sm8150.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-sm8150.h b/include/dt-bindings/clock/qcom,gcc-sm8150.h index d70ab54c44c7..dfefd5e8bf6e 100644 --- a/include/dt-bindings/clock/qcom,gcc-sm8150.h +++ b/include/dt-bindings/clock/qcom,gcc-sm8150.h @@ -247,5 +247,6 @@ #define UFS_PHY_GDSC 3 #define USB30_PRIM_GDSC 4 #define USB30_SEC_GDSC 5 +#define EMAC_GDSC 6 #endif -- cgit v1.2.3 From 8397c9c0c26b5129acb623044ecc2d8d1b15d420 Mon Sep 17 00:00:00 2001 From: Martin Botka Date: Thu, 3 Mar 2022 14:18:11 +0100 Subject: dt-bindings: clock: add QCOM SM6125 display clock bindings Add device tree bindings for display clock controller for Qualcomm Technology Inc's SM6125 SoC. Signed-off-by: Martin Botka Signed-off-by: Marijn Suijten Reviewed-by: Krzysztof Kozlowski Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220303131812.302302-3-marijn.suijten@somainline.org --- include/dt-bindings/clock/qcom,dispcc-sm6125.h | 41 ++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,dispcc-sm6125.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,dispcc-sm6125.h b/include/dt-bindings/clock/qcom,dispcc-sm6125.h new file mode 100644 index 000000000000..4ff974f4fcc3 --- /dev/null +++ b/include/dt-bindings/clock/qcom,dispcc-sm6125.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_DISP_CC_SM6125_H +#define _DT_BINDINGS_CLK_QCOM_DISP_CC_SM6125_H + +#define DISP_CC_PLL0 0 +#define DISP_CC_MDSS_AHB_CLK 1 +#define DISP_CC_MDSS_AHB_CLK_SRC 2 +#define DISP_CC_MDSS_BYTE0_CLK 3 +#define DISP_CC_MDSS_BYTE0_CLK_SRC 4 +#define DISP_CC_MDSS_BYTE0_INTF_CLK 5 +#define DISP_CC_MDSS_DP_AUX_CLK 6 +#define DISP_CC_MDSS_DP_AUX_CLK_SRC 7 +#define DISP_CC_MDSS_DP_CRYPTO_CLK 8 +#define DISP_CC_MDSS_DP_CRYPTO_CLK_SRC 9 +#define DISP_CC_MDSS_DP_LINK_CLK 10 +#define DISP_CC_MDSS_DP_LINK_CLK_SRC 11 +#define DISP_CC_MDSS_DP_LINK_INTF_CLK 12 +#define DISP_CC_MDSS_DP_PIXEL_CLK 13 +#define DISP_CC_MDSS_DP_PIXEL_CLK_SRC 14 +#define DISP_CC_MDSS_ESC0_CLK 15 +#define DISP_CC_MDSS_ESC0_CLK_SRC 16 +#define DISP_CC_MDSS_MDP_CLK 17 +#define DISP_CC_MDSS_MDP_CLK_SRC 18 +#define DISP_CC_MDSS_MDP_LUT_CLK 19 +#define DISP_CC_MDSS_NON_GDSC_AHB_CLK 20 +#define DISP_CC_MDSS_PCLK0_CLK 21 +#define DISP_CC_MDSS_PCLK0_CLK_SRC 22 +#define DISP_CC_MDSS_ROT_CLK 23 +#define DISP_CC_MDSS_ROT_CLK_SRC 24 +#define DISP_CC_MDSS_VSYNC_CLK 25 +#define DISP_CC_MDSS_VSYNC_CLK_SRC 26 +#define DISP_CC_XO_CLK 27 + +/* DISP_CC GDSCR */ +#define MDSS_GDSC 0 + +#endif -- cgit v1.2.3 From 24055bb87977e0c687b54ebf7bac8715f3636bc3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 9 Mar 2022 14:20:12 +0200 Subject: net: tcp: fix shim definition of tcp_inbound_md5_hash When CONFIG_TCP_MD5SIG isn't enabled, there is a compilation bug due to the fact that the static inline definition of tcp_inbound_md5_hash() has an unexpected semicolon. Remove it. Fixes: 1330b6ef3313 ("skb: make drop reason booleanable") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220309122012.668986-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index ee8237b58e1d..70ca4a5e330a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1693,7 +1693,7 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, static inline enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, - int family, int dif, int sdif); + int family, int dif, int sdif) { return SKB_NOT_DROPPED_YET; } -- cgit v1.2.3 From 69fe0f29892077f14b56e2a479b6bcf533209d53 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 4 Mar 2022 21:08:03 -0500 Subject: block: add ->poll_bio to block_device_operations Prepare for supporting IO polling for bio-based driver. Add ->poll_bio callback so that bio-based driver can provide their own logic for polling bio. Also fix ->submit_bio_bio typo in comment block above __submit_bio_noacct. Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f757f9c2871f..51f1b1ddbed2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1455,6 +1455,8 @@ enum blk_unique_id { struct block_device_operations { void (*submit_bio)(struct bio *bio); + int (*poll_bio)(struct bio *bio, struct io_comp_batch *iob, + unsigned int flags); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); -- cgit v1.2.3 From cf220ad6749b8305ba11bdf601c55a17ad2a715d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 9 Mar 2022 13:12:00 +0000 Subject: arm64/mte: Remove asymmetric mode from the prctl() interface As pointed out by Evgenii Stepanov one potential issue with the new ABI for enabling asymmetric is that if there are multiple places where MTE is configured in a process, some of which were compiled with the old prctl.h and some of which were compiled with the new prctl.h, there may be problems keeping track of which MTE modes are requested. For example some code may disable only sync and async modes leaving asymmetric mode enabled when it intended to fully disable MTE. In order to avoid such mishaps remove asymmetric mode from the prctl(), instead implicitly allowing it if both sync and async modes are requested. This should not disrupt userspace since a process requesting both may already see a mix of sync and async modes due to differing defaults between CPUs or changes in default while the process is running but it does mean that userspace is unable to explicitly request asymmetric mode without changing the system default for CPUs. Reported-by: Evgenii Stepanov Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Evgenii Stepanov Cc: Peter Collingbourne Cc: Joey Gouly Cc: Branislav Rankov Link: https://lore.kernel.org/r/20220309131200.112637-1-broonie@kernel.org Signed-off-by: Will Deacon --- include/uapi/linux/prctl.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 4ae2b21e4066..e998764f0262 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -238,9 +238,7 @@ struct prctl_mm_map { # define PR_MTE_TCF_NONE 0UL # define PR_MTE_TCF_SYNC (1UL << 1) # define PR_MTE_TCF_ASYNC (1UL << 2) -# define PR_MTE_TCF_ASYMM (1UL << 19) -# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC | \ - PR_MTE_TCF_ASYMM) +# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) /* MTE tag inclusion mask */ # define PR_MTE_TAG_SHIFT 3 # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) -- cgit v1.2.3 From ac77998b7ac3044f0509b097da9637184598980d Mon Sep 17 00:00:00 2001 From: Mohammad Kabat Date: Thu, 25 Mar 2021 14:38:55 +0200 Subject: net/mlx5: Fix size field in bufferx_reg struct According to HW spec the field "size" should be 16 bits in bufferx register. Fixes: e281682bf294 ("net/mlx5_core: HW data structs/types definitions cleanup") Signed-off-by: Mohammad Kabat Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 598ac3bcc901..5743f5b3414b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9900,8 +9900,8 @@ struct mlx5_ifc_bufferx_reg_bits { u8 reserved_at_0[0x6]; u8 lossy[0x1]; u8 epsb[0x1]; - u8 reserved_at_8[0xc]; - u8 size[0xc]; + u8 reserved_at_8[0x8]; + u8 size[0x10]; u8 xoff_threshold[0x10]; u8 xon_threshold[0x10]; -- cgit v1.2.3 From 99a2b9be077ae3a5d97fbf5f7782e0f2e9812978 Mon Sep 17 00:00:00 2001 From: Ben Ben-Ishay Date: Wed, 2 Mar 2022 17:07:08 +0200 Subject: net/mlx5e: SHAMPO, reduce TIR indication SHAMPO is an RQ / WQ feature, an indication was added to the TIR in the first place to enforce suitability between connected TIR and RQ, this enforcement does not exist in current the Firmware implementation and was redundant in the first place. Fixes: 83439f3c37aa ("net/mlx5e: Add HW-GRO offload") Signed-off-by: Ben Ben-Ishay Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5743f5b3414b..49a48d7709ac 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3434,7 +3434,6 @@ enum { enum { MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO = BIT(0), MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO = BIT(1), - MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO = BIT(2), }; enum { -- cgit v1.2.3 From 34f46ae0d4b38e83cfb26fb6f06b5b5efea47fdc Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 27 Jan 2022 15:22:21 +0200 Subject: net/mlx5: Add command failures data to debugfs Add new counters to command interface debugfs to count command failures. The following counters added: total_failed - number of times command failed (any kind of failure). failed_mbox_status - number of times command failed on bad status returned by FW. In addition, add data about last command failure to command interface debugfs: last_failed_errno - last command failed returned errno. last_failed_mbox_status - last bad status returned by FW. Signed-off-by: Moshe Shemesh Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d3b1a6a1f8d2..f18c1e15a12c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -264,6 +264,14 @@ enum { struct mlx5_cmd_stats { u64 sum; u64 n; + /* number of times command failed */ + u64 failed; + /* number of times command failed on bad status returned by FW */ + u64 failed_mbox_status; + /* last command failed returned errno */ + u32 last_failed_errno; + /* last bad status returned by FW */ + u8 last_failed_mbox_status; struct dentry *root; /* protect command average calculations */ spinlock_t lock; @@ -955,6 +963,7 @@ typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context); struct mlx5_async_work { struct mlx5_async_ctx *ctx; mlx5_async_cbk_t user_callback; + u16 opcode; /* cmd opcode */ void *out; /* pointer to the cmd output buffer */ }; -- cgit v1.2.3 From d2cb8dda214fb75f946ba554a1ecd25da7004b2b Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 26 Jan 2022 07:23:53 +0200 Subject: net/mlx5: Change release_all_pages cap bit location mlx5 FW has changed release_all_pages cap bit by one bit offset to reflect a fix in the FW flow for release_all_pages. The driver should use the new bit to ensure it calls release_all_pages only if the FW fix is there. Signed-off-by: Moshe Shemesh Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ea65131835ab..69985e4d8dfe 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1419,8 +1419,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_130[0xa]; u8 log_max_ra_res_dc[0x6]; - u8 reserved_at_140[0x6]; + u8 reserved_at_140[0x5]; u8 release_all_pages[0x1]; + u8 must_not_use[0x1]; u8 reserved_at_147[0x2]; u8 roce_accl[0x1]; u8 log_max_ra_req_qp[0x6]; -- cgit v1.2.3 From 66771a1c729e816dc84e29ba555013b6e722fb22 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 18 Feb 2022 09:36:20 +0200 Subject: net/mlx5: Move debugfs entries to separate struct Move the debugfs entry pointers under priv to their own struct. Add get function for device debugfs root. Signed-off-by: Moshe Shemesh Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f18c1e15a12c..aa539d245a12 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -551,6 +551,14 @@ struct mlx5_adev { int idx; }; +struct mlx5_debugfs_entries { + struct dentry *dbg_root; + struct dentry *qp_debugfs; + struct dentry *eq_debugfs; + struct dentry *cq_debugfs; + struct dentry *cmdif_debugfs; +}; + struct mlx5_ft_pool; struct mlx5_priv { /* IRQ table valid only for real pci devices PF or VF */ @@ -570,12 +578,7 @@ struct mlx5_priv { struct mlx5_core_health health; struct list_head traps; - /* start: qp staff */ - struct dentry *qp_debugfs; - struct dentry *eq_debugfs; - struct dentry *cq_debugfs; - struct dentry *cmdif_debugfs; - /* end: qp staff */ + struct mlx5_debugfs_entries dbg; /* start: alloc staff */ /* protect buffer allocation according to numa node */ @@ -585,7 +588,6 @@ struct mlx5_priv { struct mutex pgdir_mutex; struct list_head pgdir_list; /* end: alloc staff */ - struct dentry *dbg_root; struct list_head ctx_list; spinlock_t ctx_lock; @@ -1038,6 +1040,7 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); +struct dentry *mlx5_debugfs_get_dev_root(struct mlx5_core_dev *dev); void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev); void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, -- cgit v1.2.3 From 4e05cbf05c66ca6931ee4f2a5aff0d84f1e65f40 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 27 Jan 2022 07:03:33 +0200 Subject: net/mlx5: Add pages debugfs Add pages debugfs to expose the following counters for debuggability: fw_pages_total - How many pages were given to FW and not returned yet. vfs_pages - For SRIOV, how many pages were given to FW for virtual functions usage. host_pf_pages - For ECPF, how many pages were given to FW for external hosts physical functions usage. Signed-off-by: Moshe Shemesh Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index aa539d245a12..c5f93b5910ed 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -557,6 +557,7 @@ struct mlx5_debugfs_entries { struct dentry *eq_debugfs; struct dentry *cq_debugfs; struct dentry *cmdif_debugfs; + struct dentry *pages_debugfs; }; struct mlx5_ft_pool; @@ -569,11 +570,11 @@ struct mlx5_priv { struct mlx5_nb pg_nb; struct workqueue_struct *pg_wq; struct xarray page_root_xa; - int fw_pages; + u32 fw_pages; atomic_t reg_pages; struct list_head free_list; - int vfs_pages; - int host_pf_pages; + u32 vfs_pages; + u32 host_pf_pages; struct mlx5_core_health health; struct list_head traps; @@ -1026,6 +1027,8 @@ int mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); void mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); +void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s32 npages, bool ec_function); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); -- cgit v1.2.3 From 32071187e9fb18da62f5be569bd2ea0d7a981ee8 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 27 Jan 2022 07:51:14 +0200 Subject: net/mlx5: Add debugfs counters for page commands failures Add the following new debugfs counters for debug and verbosity: fw_pages_alloc_failed - number of pages FW requested but driver failed to allocate. give_pages_dropped - number of pages given to FW, but command give pages failed by FW. reclaim_pages_discard - number of pages which were about to reclaim back and FW failed the command. Signed-off-by: Moshe Shemesh Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c5f93b5910ed..00a914b0716e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -575,6 +575,9 @@ struct mlx5_priv { struct list_head free_list; u32 vfs_pages; u32 host_pf_pages; + u32 fw_pages_alloc_failed; + u32 give_pages_dropped; + u32 reclaim_pages_discard; struct mlx5_core_health health; struct list_head traps; -- cgit v1.2.3 From 5c422bfad2fbab96381273e50c7084465199501d Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 24 Feb 2022 00:58:58 +0200 Subject: net/mlx5: DR, Add support for matching on Internet Header Length (IHL) Add support for matching on new field - Internet Header Length (IHL). Signed-off-by: Muhammad Sammar Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 69985e4d8dfe..1f0c35162b7b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -493,7 +493,10 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 tcp_sport[0x10]; u8 tcp_dport[0x10]; - u8 reserved_at_c0[0x18]; + u8 reserved_at_c0[0x10]; + u8 ipv4_ihl[0x4]; + u8 reserved_at_c4[0x4]; + u8 ttl_hoplimit[0x8]; u8 udp_sport[0x10]; -- cgit v1.2.3 From 6862c787c7e88df490675ed781dc9052dba88a56 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 23 Feb 2022 18:40:59 +0200 Subject: net/mlx5: DR, Add support for ConnectX-7 steering Add support for a new SW format version that is implemented by ConnectX-7. Except for several differences, the STEv2 is identical to STEv1, so for most callbacks the STEv2 context struct will call STEv1 functions. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1f0c35162b7b..bcf60ede6fcc 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1346,6 +1346,7 @@ enum mlx5_fc_bulk_alloc_bitmask { enum { MLX5_STEERING_FORMAT_CONNECTX_5 = 0, MLX5_STEERING_FORMAT_CONNECTX_6DX = 1, + MLX5_STEERING_FORMAT_CONNECTX_7 = 2, }; struct mlx5_ifc_cmd_hca_cap_bits { -- cgit v1.2.3 From b530e9e1063ed2b817eae7eec6ed2daa8be11608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 9 Mar 2022 11:53:42 +0100 Subject: bpf: Add "live packet" mode for XDP in BPF_PROG_RUN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for running XDP programs through BPF_PROG_RUN in a mode that enables live packet processing of the resulting frames. Previous uses of BPF_PROG_RUN for XDP returned the XDP program return code and the modified packet data to userspace, which is useful for unit testing of XDP programs. The existing BPF_PROG_RUN for XDP allows userspace to set the ingress ifindex and RXQ number as part of the context object being passed to the kernel. This patch reuses that code, but adds a new mode with different semantics, which can be selected with the new BPF_F_TEST_XDP_LIVE_FRAMES flag. When running BPF_PROG_RUN in this mode, the XDP program return codes will be honoured: returning XDP_PASS will result in the frame being injected into the networking stack as if it came from the selected networking interface, while returning XDP_TX and XDP_REDIRECT will result in the frame being transmitted out that interface. XDP_TX is translated into an XDP_REDIRECT operation to the same interface, since the real XDP_TX action is only possible from within the network drivers themselves, not from the process context where BPF_PROG_RUN is executed. Internally, this new mode of operation creates a page pool instance while setting up the test run, and feeds pages from that into the XDP program. The setup cost of this is amortised over the number of repetitions specified by userspace. To support the performance testing use case, we further optimise the setup step so that all pages in the pool are pre-initialised with the packet data, and pre-computed context and xdp_frame objects stored at the start of each page. This makes it possible to entirely avoid touching the page content on each XDP program invocation, and enables sending up to 9 Mpps/core on my test box. Because the data pages are recycled by the page pool, and the test runner doesn't re-initialise them for each run, subsequent invocations of the XDP program will see the packet data in the state it was after the last time it ran on that particular page. This means that an XDP program that modifies the packet before redirecting it has to be careful about which assumptions it makes about the packet content, but that is only an issue for the most naively written programs. Enabling the new flag is only allowed when not setting ctx_out and data_out in the test specification, since using it means frames will be redirected somewhere else, so they can't be returned. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220309105346.100053-2-toke@redhat.com --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4eebea830613..bc23020b638d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1232,6 +1232,8 @@ enum { /* If set, run the test on the cpu specified by bpf_attr.test.cpu */ #define BPF_F_TEST_RUN_ON_CPU (1U << 0) +/* If set, XDP frames will be transmitted after processing */ +#define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { @@ -1393,6 +1395,7 @@ union bpf_attr { __aligned_u64 ctx_out; __u32 flags; __u32 cpu; + __u32 batch_size; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ -- cgit v1.2.3 From 9a61d0838cd0a81529badfba7bfa39e81d5529d3 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 25 Feb 2022 20:00:21 +0530 Subject: drivers/nvdimm: Add nvdimm pmu structure A structure is added called nvdimm_pmu, for performance stats reporting support of nvdimm devices. It can be used to add device pmu data such as pmu data structure for performance stats, nvdimm device pointer along with cpumask attributes. Acked-by: Peter Zijlstra (Intel) Tested-by: Nageswara R Sastry Signed-off-by: Kajol Jain Reviewed-by: Madhavan Srinivasan Link: https://lore.kernel.org/r/20220225143024.47947-2-kjain@linux.ibm.com Signed-off-by: Dan Williams --- include/linux/nd.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/nd.h b/include/linux/nd.h index 8a8c63edb1b2..ad186e828263 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -8,6 +8,7 @@ #include #include #include +#include enum nvdimm_event { NVDIMM_REVALIDATE_POISON, @@ -23,6 +24,25 @@ enum nvdimm_claim_class { NVDIMM_CCLASS_UNKNOWN, }; +/** + * struct nvdimm_pmu - data structure for nvdimm perf driver + * @pmu: pmu data structure for nvdimm performance stats. + * @dev: nvdimm device pointer. + * @cpu: designated cpu for counter access. + * @node: node for cpu hotplug notifier link. + * @cpuhp_state: state for cpu hotplug notification. + * @arch_cpumask: cpumask to get designated cpu for counter access. + */ +struct nvdimm_pmu { + struct pmu pmu; + struct device *dev; + int cpu; + struct hlist_node node; + enum cpuhp_state cpuhp_state; + /* cpumask provided by arch/platform specific code */ + struct cpumask arch_cpumask; +}; + struct nd_device_driver { struct device_driver drv; unsigned long type; -- cgit v1.2.3 From 0fab1ba6ad6ba1f76380f92ead95c6e861ef8116 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 25 Feb 2022 20:00:22 +0530 Subject: drivers/nvdimm: Add perf interface to expose nvdimm performance stats A common interface is added to get performance stats reporting support for nvdimm devices. Added interface defines supported event list, config fields for the event attributes and their corresponding bit values which are exported via sysfs. Interface also added support for pmu register/unregister functions, cpu hotplug feature along with macros for handling events addition via sysfs. It adds attribute groups for format, cpumask and events to the pmu structure. User could use the standard perf tool to access perf events exposed via nvdimm pmu. [Declare pmu functions in nd.h file to resolve implicit-function-declaration warning and make hotplug function static as reported by kernel test robot] Acked-by: Peter Zijlstra (Intel) Tested-by: Nageswara R Sastry Signed-off-by: Kajol Jain Link: https://lore.kernel.org/all/202202241242.zqzGkguy-lkp@intel.com/ Reported-by: kernel test robot Reviewed-by: Madhavan Srinivasan Link: https://lore.kernel.org/r/20220225143024.47947-3-kjain@linux.ibm.com Signed-off-by: Dan Williams --- include/linux/nd.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/nd.h b/include/linux/nd.h index ad186e828263..4813c7089e5c 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -9,6 +9,7 @@ #include #include #include +#include enum nvdimm_event { NVDIMM_REVALIDATE_POISON, @@ -24,6 +25,19 @@ enum nvdimm_claim_class { NVDIMM_CCLASS_UNKNOWN, }; +#define NVDIMM_EVENT_VAR(_id) event_attr_##_id +#define NVDIMM_EVENT_PTR(_id) (&event_attr_##_id.attr.attr) + +#define NVDIMM_EVENT_ATTR(_name, _id) \ + PMU_EVENT_ATTR(_name, NVDIMM_EVENT_VAR(_id), _id, \ + nvdimm_events_sysfs_show) + +/* Event attribute array index */ +#define NVDIMM_PMU_FORMAT_ATTR 0 +#define NVDIMM_PMU_EVENT_ATTR 1 +#define NVDIMM_PMU_CPUMASK_ATTR 2 +#define NVDIMM_PMU_NULL_ATTR 3 + /** * struct nvdimm_pmu - data structure for nvdimm perf driver * @pmu: pmu data structure for nvdimm performance stats. @@ -43,6 +57,16 @@ struct nvdimm_pmu { struct cpumask arch_cpumask; }; +extern ssize_t nvdimm_events_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page); + +int register_nvdimm_pmu(struct nvdimm_pmu *nvdimm, struct platform_device *pdev); +void unregister_nvdimm_pmu(struct nvdimm_pmu *nd_pmu); +void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu); +int perf_pmu_register(struct pmu *pmu, const char *name, int type); +void perf_pmu_unregister(struct pmu *pmu); + struct nd_device_driver { struct device_driver drv; unsigned long type; -- cgit v1.2.3 From 013a3e7c79ac51e6cdd84e3580863a562c7597c7 Mon Sep 17 00:00:00 2001 From: Min Li Date: Tue, 8 Mar 2022 09:10:51 -0500 Subject: ptp: idt82p33: use rsmu driver to access i2c/spi bus rsmu (Renesas Synchronization Management Unit ) driver is located in drivers/mfd and responsible for creating multiple devices including idt82p33 phc, which will then use the exposed regmap and mutex handle to access i2c/spi bus. Signed-off-by: Min Li Acked-by: Richard Cochran Link: https://lore.kernel.org/r/1646748651-16811-1-git-send-email-min.li.xe@renesas.com Signed-off-by: Jakub Kicinski --- include/linux/mfd/idt82p33_reg.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/idt82p33_reg.h b/include/linux/mfd/idt82p33_reg.h index 129a6c078221..1db532feeb91 100644 --- a/include/linux/mfd/idt82p33_reg.h +++ b/include/linux/mfd/idt82p33_reg.h @@ -7,6 +7,8 @@ #ifndef HAVE_IDT82P33_REG #define HAVE_IDT82P33_REG +#define REG_ADDR(page, offset) (((page) << 0x7) | ((offset) & 0x7f)) + /* Register address */ #define DPLL1_TOD_CNFG 0x134 #define DPLL2_TOD_CNFG 0x1B4 @@ -41,6 +43,7 @@ #define REG_SOFT_RESET 0X381 #define OUT_MUX_CNFG(outn) REG_ADDR(0x6, (0xC * (outn))) +#define TOD_TRIGGER(wr_trig, rd_trig) ((wr_trig & 0xf) << 4 | (rd_trig & 0xf)) /* Register bit definitions */ #define SYNC_TOD BIT(1) -- cgit v1.2.3 From 77f09e66f613e4801134c64d26a0be593588a42e Mon Sep 17 00:00:00 2001 From: Dimitris Michailidis Date: Tue, 8 Mar 2022 19:40:31 -0800 Subject: net/tls: Provide {__,}tls_driver_ctx() unconditionally Having the definitions of {__,}tls_driver_ctx() under an #if guard means code referencing them also needs to rely on the preprocessor. The protection doesn't appear needed so make the definitions unconditional. Fixes: db37bc177dae ("net/funeth: add the data path") Reported-by: Randy Dunlap Reported-by: kernel test robot Suggested-by: Jakub Kicinski Signed-off-by: Dimitris Michailidis Signed-off-by: Jakub Kicinski --- include/net/tls.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 526cb2c3b724..b6968a5b5538 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -626,7 +626,6 @@ tls_offload_ctx_rx(const struct tls_context *tls_ctx) return (struct tls_offload_context_rx *)tls_ctx->priv_ctx_rx; } -#if IS_ENABLED(CONFIG_TLS_DEVICE) static inline void *__tls_driver_ctx(struct tls_context *tls_ctx, enum tls_offload_ctx_dir direction) { @@ -641,7 +640,6 @@ tls_driver_ctx(const struct sock *sk, enum tls_offload_ctx_dir direction) { return __tls_driver_ctx(tls_get_ctx(sk), direction); } -#endif #define RESYNC_REQ BIT(0) #define RESYNC_REQ_ASYNC BIT(1) -- cgit v1.2.3 From 65466904b015f6eeb9225b51aeb29b01a1d4b59c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Mar 2022 17:57:57 -0800 Subject: tcp: adjust TSO packet sizes based on min_rtt Back when tcp_tso_autosize() and TCP pacing were introduced, our focus was really to reduce burst sizes for long distance flows. The simple heuristic of using sk_pacing_rate/1024 has worked well, but can lead to too small packets for hosts in the same rack/cluster, when thousands of flows compete for the bottleneck. Neal Cardwell had the idea of making the TSO burst size a function of both sk_pacing_rate and tcp_min_rtt() Indeed, for local flows, sending bigger bursts is better to reduce cpu costs, as occasional losses can be repaired quite fast. This patch is based on Neal Cardwell implementation done more than two years ago. bbr is adjusting max_pacing_rate based on measured bandwidth, while cubic would over estimate max_pacing_rate. /proc/sys/net/ipv4/tcp_tso_rtt_log can be used to tune or disable this new feature, in logarithmic steps. Tested: 100Gbit NIC, two hosts in the same rack, 4K MTU. 600 flows rate-limited to 20000000 bytes per second. Before patch: (TSO sizes would be limited to 20000000/1024/4096 -> 4 segments per TSO) ~# echo 0 >/proc/sys/net/ipv4/tcp_tso_rtt_log ~# nstat -n;perf stat ./super_netperf 600 -H otrv6 -l 20 -- -K dctcp -q 20000000;nstat|egrep "TcpInSegs|TcpOutSegs|TcpRetransSegs|Delivered" 96005 Performance counter stats for './super_netperf 600 -H otrv6 -l 20 -- -K dctcp -q 20000000': 65,945.29 msec task-clock # 2.845 CPUs utilized 1,314,632 context-switches # 19935.279 M/sec 5,292 cpu-migrations # 80.249 M/sec 940,641 page-faults # 14264.023 M/sec 201,117,030,926 cycles # 3049769.216 GHz (83.45%) 17,699,435,405 stalled-cycles-frontend # 8.80% frontend cycles idle (83.48%) 136,584,015,071 stalled-cycles-backend # 67.91% backend cycles idle (83.44%) 53,809,530,436 instructions # 0.27 insn per cycle # 2.54 stalled cycles per insn (83.36%) 9,062,315,523 branches # 137422329.563 M/sec (83.22%) 153,008,621 branch-misses # 1.69% of all branches (83.32%) 23.182970846 seconds time elapsed TcpInSegs 15648792 0.0 TcpOutSegs 58659110 0.0 # Average of 3.7 4K segments per TSO packet TcpExtTCPDelivered 58654791 0.0 TcpExtTCPDeliveredCE 19 0.0 After patch: ~# echo 9 >/proc/sys/net/ipv4/tcp_tso_rtt_log ~# nstat -n;perf stat ./super_netperf 600 -H otrv6 -l 20 -- -K dctcp -q 20000000;nstat|egrep "TcpInSegs|TcpOutSegs|TcpRetransSegs|Delivered" 96046 Performance counter stats for './super_netperf 600 -H otrv6 -l 20 -- -K dctcp -q 20000000': 48,982.58 msec task-clock # 2.104 CPUs utilized 186,014 context-switches # 3797.599 M/sec 3,109 cpu-migrations # 63.472 M/sec 941,180 page-faults # 19214.814 M/sec 153,459,763,868 cycles # 3132982.807 GHz (83.56%) 12,069,861,356 stalled-cycles-frontend # 7.87% frontend cycles idle (83.32%) 120,485,917,953 stalled-cycles-backend # 78.51% backend cycles idle (83.24%) 36,803,672,106 instructions # 0.24 insn per cycle # 3.27 stalled cycles per insn (83.18%) 5,947,266,275 branches # 121417383.427 M/sec (83.64%) 87,984,616 branch-misses # 1.48% of all branches (83.43%) 23.281200256 seconds time elapsed TcpInSegs 1434706 0.0 TcpOutSegs 58883378 0.0 # Average of 41 4K segments per TSO packet TcpExtTCPDelivered 58878971 0.0 TcpExtTCPDeliveredCE 9664 0.0 Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Link: https://lore.kernel.org/r/20220309015757.2532973-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f0687867b5cd..ce0cc4e8d8c7 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -127,6 +127,7 @@ struct netns_ipv4 { u8 sysctl_tcp_synack_retries; u8 sysctl_tcp_syncookies; u8 sysctl_tcp_migrate_req; + u8 sysctl_tcp_comp_sack_nr; int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; @@ -160,9 +161,9 @@ struct netns_ipv4 { int sysctl_tcp_challenge_ack_limit; int sysctl_tcp_min_rtt_wlen; u8 sysctl_tcp_min_tso_segs; + u8 sysctl_tcp_tso_rtt_log; u8 sysctl_tcp_autocorking; u8 sysctl_tcp_reflect_tos; - u8 sysctl_tcp_comp_sack_nr; int sysctl_tcp_invalid_ratelimit; int sysctl_tcp_pacing_ss_ratio; int sysctl_tcp_pacing_ca_ratio; -- cgit v1.2.3 From 8ddde07a3d285a0f3cec14924446608320fdc013 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Tue, 8 Mar 2022 16:59:10 +0800 Subject: dma-mapping: benchmark: extract a common header file for map_benchmark definition kernel/dma/map_benchmark.c and selftests/dma/dma_map_benchmark.c have duplicate map_benchmark definitions, which tends to lead to inconsistent changes to map_benchmark on both sides, extract a common header file to avoid this problem. Signed-off-by: Tian Tao Acked-by: Barry Song Reviewed-by: Shuah Khan Signed-off-by: Christoph Hellwig --- include/linux/map_benchmark.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/linux/map_benchmark.h (limited to 'include') diff --git a/include/linux/map_benchmark.h b/include/linux/map_benchmark.h new file mode 100644 index 000000000000..62674c83bde4 --- /dev/null +++ b/include/linux/map_benchmark.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 HiSilicon Limited. + */ + +#ifndef _KERNEL_DMA_BENCHMARK_H +#define _KERNEL_DMA_BENCHMARK_H + +#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) +#define DMA_MAP_MAX_THREADS 1024 +#define DMA_MAP_MAX_SECONDS 300 +#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC) + +#define DMA_MAP_BIDIRECTIONAL 0 +#define DMA_MAP_TO_DEVICE 1 +#define DMA_MAP_FROM_DEVICE 2 + +struct map_benchmark { + __u64 avg_map_100ns; /* average map latency in 100ns */ + __u64 map_stddev; /* standard deviation of map latency */ + __u64 avg_unmap_100ns; /* as above */ + __u64 unmap_stddev; + __u32 threads; /* how many threads will do map/unmap in parallel */ + __u32 seconds; /* how long the test will last */ + __s32 node; /* which numa node this benchmark will run on */ + __u32 dma_bits; /* DMA addressing capability */ + __u32 dma_dir; /* DMA data direction */ + __u32 dma_trans_ns; /* time for DMA transmission in ns */ + __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ +}; +#endif /* _KERNEL_DMA_BENCHMARK_H */ -- cgit v1.2.3 From 530e0d46c61314c59ecfdb8d3bcb87edbc0f85d3 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 9 Mar 2022 13:04:13 +0100 Subject: can: isotp: set default value for N_As to 50 micro seconds The N_As value describes the time a CAN frame needs on the wire when transmitted by the CAN controller. Even very short CAN FD frames need arround 100 usecs (bitrate 1Mbit/s, data bitrate 8Mbit/s). Having N_As to be zero (the former default) leads to 'no CAN frame separation' when STmin is set to zero by the receiving node. This 'burst mode' should not be enabled by default as it could potentially dump a high number of CAN frames into the netdev queue from the soft hrtimer context. This does not affect the system stability but is just not nice and cooperative. With this N_As/frame_txtime value the 'burst mode' is disabled by default. As user space applications usually do not set the frame_txtime element of struct can_isotp_options the new in-kernel default is very likely overwritten with zero when the sockopt() CAN_ISOTP_OPTS is invoked. To make sure that a N_As value of zero is only set intentional the value '0' is now interpreted as 'do not change the current value'. When a frame_txtime of zero is required for testing purposes this CAN_ISOTP_FRAME_TXTIME_ZERO u32 value has to be set in frame_txtime. Link: https://lore.kernel.org/all/20220309120416.83514-2-socketcan@hartkopp.net Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/isotp.h | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index c55935b64ccc..590f8aea2b6d 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -137,20 +137,16 @@ struct can_isotp_ll_options { #define CAN_ISOTP_WAIT_TX_DONE 0x400 /* wait for tx completion */ #define CAN_ISOTP_SF_BROADCAST 0x800 /* 1-to-N functional addressing */ -/* default values */ +/* protocol machine default values */ #define CAN_ISOTP_DEFAULT_FLAGS 0 #define CAN_ISOTP_DEFAULT_EXT_ADDRESS 0x00 #define CAN_ISOTP_DEFAULT_PAD_CONTENT 0xCC /* prevent bit-stuffing */ -#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 0 +#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 50000 /* 50 micro seconds */ #define CAN_ISOTP_DEFAULT_RECV_BS 0 #define CAN_ISOTP_DEFAULT_RECV_STMIN 0x00 #define CAN_ISOTP_DEFAULT_RECV_WFTMAX 0 -#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU -#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN -#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 - /* * Remark on CAN_ISOTP_DEFAULT_RECV_* values: * @@ -162,4 +158,24 @@ struct can_isotp_ll_options { * consistency and copied directly into the flow control (FC) frame. */ +/* link layer default values => make use of Classical CAN frames */ + +#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU +#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN +#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 + +/* + * The CAN_ISOTP_DEFAULT_FRAME_TXTIME has become a non-zero value as + * it only makes sense for isotp implementation tests to run without + * a N_As value. As user space applications usually do not set the + * frame_txtime element of struct can_isotp_options the new in-kernel + * default is very likely overwritten with zero when the sockopt() + * CAN_ISOTP_OPTS is invoked. + * To make sure that a N_As value of zero is only set intentional the + * value '0' is now interpreted as 'do not change the current value'. + * When a frame_txtime of zero is required for testing purposes this + * CAN_ISOTP_FRAME_TXTIME_ZERO u32 value has to be set in frame_txtime. + */ +#define CAN_ISOTP_FRAME_TXTIME_ZERO 0xFFFFFFFF + #endif /* !_UAPI_CAN_ISOTP_H */ -- cgit v1.2.3 From 2757be22c0f4c06d359f802fa1fc57286fa26975 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Fri, 4 Feb 2022 14:51:13 +0000 Subject: io_uring: remove trace for eventfd The information on whether eventfd is registered is not very useful and would result in the tracepoint being enclosed in an rcu_readlock in a later patch that tries to avoid ring quiesce for registering eventfd. Suggested-by: Jens Axboe Signed-off-by: Usama Arif Link: https://lore.kernel.org/r/20220204145117.1186568-2-usama.arif@bytedance.com Signed-off-by: Jens Axboe --- include/trace/events/io_uring.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 7346f0164cf4..098beda7601a 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -57,10 +57,9 @@ TRACE_EVENT(io_uring_create, * @opcode: describes which operation to perform * @nr_user_files: number of registered files * @nr_user_bufs: number of registered buffers - * @cq_ev_fd: whether eventfs registered or not * @ret: return code * - * Allows to trace fixed files/buffers/eventfds, that could be registered to + * Allows to trace fixed files/buffers, that could be registered to * avoid an overhead of getting references to them for every operation. This * event, together with io_uring_file_get, can provide a full picture of how * much overhead one can reduce via fixing. @@ -68,16 +67,15 @@ TRACE_EVENT(io_uring_create, TRACE_EVENT(io_uring_register, TP_PROTO(void *ctx, unsigned opcode, unsigned nr_files, - unsigned nr_bufs, bool eventfd, long ret), + unsigned nr_bufs, long ret), - TP_ARGS(ctx, opcode, nr_files, nr_bufs, eventfd, ret), + TP_ARGS(ctx, opcode, nr_files, nr_bufs, ret), TP_STRUCT__entry ( __field( void *, ctx ) __field( unsigned, opcode ) __field( unsigned, nr_files ) __field( unsigned, nr_bufs ) - __field( bool, eventfd ) __field( long, ret ) ), @@ -86,14 +84,13 @@ TRACE_EVENT(io_uring_register, __entry->opcode = opcode; __entry->nr_files = nr_files; __entry->nr_bufs = nr_bufs; - __entry->eventfd = eventfd; __entry->ret = ret; ), TP_printk("ring %p, opcode %d, nr_user_files %d, nr_user_bufs %d, " - "eventfd %d, ret %ld", + "ret %ld", __entry->ctx, __entry->opcode, __entry->nr_files, - __entry->nr_bufs, __entry->eventfd, __entry->ret) + __entry->nr_bufs, __entry->ret) ); /** -- cgit v1.2.3 From 502c87d65564cbfd65b1621309bcd900390eca81 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Mon, 14 Feb 2022 10:04:30 -0800 Subject: io-uring: Make tracepoints consistent. This makes the io-uring tracepoints consistent. Where it makes sense the tracepoints start with the following four fields: - context (ring) - request - user_data - opcode. Signed-off-by: Stefan Roesch Link: https://lore.kernel.org/r/20220214180430.70572-3-shr@fb.com Signed-off-by: Jens Axboe --- include/trace/events/io_uring.h | 318 +++++++++++++++++++--------------------- 1 file changed, 153 insertions(+), 165 deletions(-) (limited to 'include') diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 098beda7601a..18d4341c581c 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -29,15 +29,15 @@ TRACE_EVENT(io_uring_create, TP_ARGS(fd, ctx, sq_entries, cq_entries, flags), TP_STRUCT__entry ( - __field( int, fd ) - __field( void *, ctx ) + __field( int, fd ) + __field( void *, ctx ) __field( u32, sq_entries ) __field( u32, cq_entries ) __field( u32, flags ) ), TP_fast_assign( - __entry->fd = fd; + __entry->fd = fd; __entry->ctx = ctx; __entry->sq_entries = sq_entries; __entry->cq_entries = cq_entries; @@ -72,11 +72,11 @@ TRACE_EVENT(io_uring_register, TP_ARGS(ctx, opcode, nr_files, nr_bufs, ret), TP_STRUCT__entry ( - __field( void *, ctx ) - __field( unsigned, opcode ) - __field( unsigned, nr_files ) - __field( unsigned, nr_bufs ) - __field( long, ret ) + __field( void *, ctx ) + __field( unsigned, opcode ) + __field( unsigned, nr_files) + __field( unsigned, nr_bufs ) + __field( long, ret ) ), TP_fast_assign( @@ -97,6 +97,8 @@ TRACE_EVENT(io_uring_register, * io_uring_file_get - called before getting references to an SQE file * * @ctx: pointer to a ring context structure + * @req: pointer to a submitted request + * @user_data: user data associated with the request * @fd: SQE file descriptor * * Allows to trace out how often an SQE file reference is obtained, which can @@ -105,59 +107,71 @@ TRACE_EVENT(io_uring_register, */ TRACE_EVENT(io_uring_file_get, - TP_PROTO(void *ctx, int fd), + TP_PROTO(void *ctx, void *req, unsigned long long user_data, int fd), - TP_ARGS(ctx, fd), + TP_ARGS(ctx, req, user_data, fd), TP_STRUCT__entry ( - __field( void *, ctx ) - __field( int, fd ) + __field( void *, ctx ) + __field( void *, req ) + __field( u64, user_data ) + __field( int, fd ) ), TP_fast_assign( - __entry->ctx = ctx; + __entry->ctx = ctx; + __entry->req = req; + __entry->user_data = user_data; __entry->fd = fd; ), - TP_printk("ring %p, fd %d", __entry->ctx, __entry->fd) + TP_printk("ring %p, req %p, user_data %llu, fd %d", + __entry->ctx, __entry->req, __entry->user_data, __entry->fd) ); /** * io_uring_queue_async_work - called before submitting a new async work * * @ctx: pointer to a ring context structure - * @hashed: type of workqueue, hashed or normal * @req: pointer to a submitted request + * @user_data: user data associated with the request + * @opcode: opcode of request + * @flags request flags * @work: pointer to a submitted io_wq_work + * @rw: type of workqueue, hashed or normal * * Allows to trace asynchronous work submission. */ TRACE_EVENT(io_uring_queue_async_work, - TP_PROTO(void *ctx, int rw, void * req, struct io_wq_work *work, - unsigned int flags), + TP_PROTO(void *ctx, void * req, unsigned long long user_data, u8 opcode, + unsigned int flags, struct io_wq_work *work, int rw), - TP_ARGS(ctx, rw, req, work, flags), + TP_ARGS(ctx, req, user_data, flags, opcode, work, rw), TP_STRUCT__entry ( - __field( void *, ctx ) - __field( int, rw ) - __field( void *, req ) - __field( struct io_wq_work *, work ) - __field( unsigned int, flags ) + __field( void *, ctx ) + __field( void *, req ) + __field( u64, user_data ) + __field( u8, opcode ) + __field( unsigned int, flags ) + __field( struct io_wq_work *, work ) + __field( int, rw ) ), TP_fast_assign( - __entry->ctx = ctx; - __entry->rw = rw; - __entry->req = req; - __entry->work = work; - __entry->flags = flags; + __entry->ctx = ctx; + __entry->req = req; + __entry->user_data = user_data; + __entry->flags = flags; + __entry->opcode = opcode; + __entry->work = work; + __entry->rw = rw; ), - TP_printk("ring %p, request %p, flags %d, %s queue, work %p", - __entry->ctx, __entry->req, __entry->flags, - __entry->rw ? "hashed" : "normal", __entry->work) + TP_printk("ring %p, request %p, user_data %llu, opcode %d, flags %d, %s queue, work %p", + __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, + __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work) ); /** @@ -166,30 +180,33 @@ TRACE_EVENT(io_uring_queue_async_work, * @ctx: pointer to a ring context structure * @req: pointer to a deferred request * @user_data: user data associated with the request + * @opcode: opcode of request * * Allows to track deferred requests, to get an insight about what requests are * not started immediately. */ TRACE_EVENT(io_uring_defer, - TP_PROTO(void *ctx, void *req, unsigned long long user_data), + TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode), - TP_ARGS(ctx, req, user_data), + TP_ARGS(ctx, req, user_data, opcode), TP_STRUCT__entry ( - __field( void *, ctx ) - __field( void *, req ) - __field( unsigned long long, data ) + __field( void *, ctx ) + __field( void *, req ) + __field( unsigned long long, data ) + __field( u8, opcode ) ), TP_fast_assign( __entry->ctx = ctx; __entry->req = req; __entry->data = user_data; + __entry->opcode = opcode; ), - TP_printk("ring %p, request %p user_data %llu", __entry->ctx, - __entry->req, __entry->data) + TP_printk("ring %p, request %p, user_data %llu, opcode %d", + __entry->ctx, __entry->req, __entry->data, __entry->opcode) ); /** @@ -247,7 +264,7 @@ TRACE_EVENT(io_uring_cqring_wait, ), TP_fast_assign( - __entry->ctx = ctx; + __entry->ctx = ctx; __entry->min_events = min_events; ), @@ -257,7 +274,10 @@ TRACE_EVENT(io_uring_cqring_wait, /** * io_uring_fail_link - called before failing a linked request * + * @ctx: pointer to a ring context structure * @req: request, which links were cancelled + * @user_data: user data associated with the request + * @opcode: opcode of request * @link: cancelled link * * Allows to track linked requests cancellation, to see not only that some work @@ -265,27 +285,36 @@ TRACE_EVENT(io_uring_cqring_wait, */ TRACE_EVENT(io_uring_fail_link, - TP_PROTO(void *req, void *link), + TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, void *link), - TP_ARGS(req, link), + TP_ARGS(ctx, req, user_data, opcode, link), TP_STRUCT__entry ( - __field( void *, req ) - __field( void *, link ) + __field( void *, ctx ) + __field( void *, req ) + __field( unsigned long long, user_data ) + __field( u8, opcode ) + __field( void *, link ) ), TP_fast_assign( - __entry->req = req; - __entry->link = link; + __entry->ctx = ctx; + __entry->req = req; + __entry->user_data = user_data; + __entry->opcode = opcode; + __entry->link = link; ), - TP_printk("request %p, link %p", __entry->req, __entry->link) + TP_printk("ring %p, request %p, user_data %llu, opcode %d, link %p", + __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, + __entry->link) ); /** * io_uring_complete - called when completing an SQE * * @ctx: pointer to a ring context structure + * @req: pointer to a submitted request * @user_data: user data associated with the request * @res: result of the request * @cflags: completion flags @@ -293,12 +322,13 @@ TRACE_EVENT(io_uring_fail_link, */ TRACE_EVENT(io_uring_complete, - TP_PROTO(void *ctx, u64 user_data, int res, unsigned cflags), + TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags), - TP_ARGS(ctx, user_data, res, cflags), + TP_ARGS(ctx, req, user_data, res, cflags), TP_STRUCT__entry ( __field( void *, ctx ) + __field( void *, req ) __field( u64, user_data ) __field( int, res ) __field( unsigned, cflags ) @@ -306,14 +336,16 @@ TRACE_EVENT(io_uring_complete, TP_fast_assign( __entry->ctx = ctx; + __entry->req = req; __entry->user_data = user_data; __entry->res = res; __entry->cflags = cflags; ), - TP_printk("ring %p, user_data 0x%llx, result %d, cflags %x", - __entry->ctx, (unsigned long long)__entry->user_data, - __entry->res, __entry->cflags) + TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags %x", + __entry->ctx, __entry->req, + (unsigned long long)__entry->user_data, + __entry->res, __entry->cflags) ); /** @@ -321,8 +353,8 @@ TRACE_EVENT(io_uring_complete, * * @ctx: pointer to a ring context structure * @req: pointer to a submitted request - * @opcode: opcode of request * @user_data: user data associated with the request + * @opcode: opcode of request * @flags request flags * @force_nonblock: whether a context blocking or not * @sq_thread: true if sq_thread has submitted this SQE @@ -332,34 +364,34 @@ TRACE_EVENT(io_uring_complete, */ TRACE_EVENT(io_uring_submit_sqe, - TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data, u32 flags, + TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, u32 flags, bool force_nonblock, bool sq_thread), - TP_ARGS(ctx, req, opcode, user_data, flags, force_nonblock, sq_thread), + TP_ARGS(ctx, req, user_data, opcode, flags, force_nonblock, sq_thread), TP_STRUCT__entry ( - __field( void *, ctx ) - __field( void *, req ) - __field( u8, opcode ) - __field( u64, user_data ) - __field( u32, flags ) - __field( bool, force_nonblock ) - __field( bool, sq_thread ) + __field( void *, ctx ) + __field( void *, req ) + __field( unsigned long long, user_data ) + __field( u8, opcode ) + __field( u32, flags ) + __field( bool, force_nonblock ) + __field( bool, sq_thread ) ), TP_fast_assign( __entry->ctx = ctx; __entry->req = req; - __entry->opcode = opcode; __entry->user_data = user_data; + __entry->opcode = opcode; __entry->flags = flags; __entry->force_nonblock = force_nonblock; __entry->sq_thread = sq_thread; ), - TP_printk("ring %p, req %p, op %d, data 0x%llx, flags %u, " + TP_printk("ring %p, req %p, user_data %llu, opcode %d, flags %u, " "non block %d, sq_thread %d", __entry->ctx, __entry->req, - __entry->opcode, (unsigned long long)__entry->user_data, + __entry->user_data, __entry->opcode, __entry->flags, __entry->force_nonblock, __entry->sq_thread) ); @@ -368,8 +400,8 @@ TRACE_EVENT(io_uring_submit_sqe, * * @ctx: pointer to a ring context structure * @req: pointer to the armed request - * @opcode: opcode of request * @user_data: user data associated with the request + * @opcode: opcode of request * @mask: request poll events mask * @events: registered events of interest * @@ -378,155 +410,110 @@ TRACE_EVENT(io_uring_submit_sqe, */ TRACE_EVENT(io_uring_poll_arm, - TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data, + TP_PROTO(void *ctx, void *req, u64 user_data, u8 opcode, int mask, int events), - TP_ARGS(ctx, req, opcode, user_data, mask, events), + TP_ARGS(ctx, req, user_data, opcode, mask, events), TP_STRUCT__entry ( - __field( void *, ctx ) - __field( void *, req ) - __field( u8, opcode ) - __field( u64, user_data ) - __field( int, mask ) - __field( int, events ) + __field( void *, ctx ) + __field( void *, req ) + __field( unsigned long long, user_data ) + __field( u8, opcode ) + __field( int, mask ) + __field( int, events ) ), TP_fast_assign( __entry->ctx = ctx; __entry->req = req; - __entry->opcode = opcode; __entry->user_data = user_data; + __entry->opcode = opcode; __entry->mask = mask; __entry->events = events; ), - TP_printk("ring %p, req %p, op %d, data 0x%llx, mask 0x%x, events 0x%x", - __entry->ctx, __entry->req, __entry->opcode, - (unsigned long long) __entry->user_data, + TP_printk("ring %p, req %p, user_data %llu, opcode %d, mask 0x%x, events 0x%x", + __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->mask, __entry->events) ); -TRACE_EVENT(io_uring_poll_wake, - - TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask), - - TP_ARGS(ctx, opcode, user_data, mask), - - TP_STRUCT__entry ( - __field( void *, ctx ) - __field( u8, opcode ) - __field( u64, user_data ) - __field( int, mask ) - ), - - TP_fast_assign( - __entry->ctx = ctx; - __entry->opcode = opcode; - __entry->user_data = user_data; - __entry->mask = mask; - ), - - TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x", - __entry->ctx, __entry->opcode, - (unsigned long long) __entry->user_data, - __entry->mask) -); - -TRACE_EVENT(io_uring_task_add, - - TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask), - - TP_ARGS(ctx, opcode, user_data, mask), - - TP_STRUCT__entry ( - __field( void *, ctx ) - __field( u8, opcode ) - __field( u64, user_data ) - __field( int, mask ) - ), - - TP_fast_assign( - __entry->ctx = ctx; - __entry->opcode = opcode; - __entry->user_data = user_data; - __entry->mask = mask; - ), - - TP_printk("ring %p, op %d, data 0x%llx, mask %x", - __entry->ctx, __entry->opcode, - (unsigned long long) __entry->user_data, - __entry->mask) -); - /* - * io_uring_task_run - called when task_work_run() executes the poll events - * notification callbacks + * io_uring_task_add - called after adding a task * * @ctx: pointer to a ring context structure - * @req: pointer to the armed request - * @opcode: opcode of request + * @req: pointer to request * @user_data: user data associated with the request + * @opcode: opcode of request + * @mask: request poll events mask * - * Allows to track when notified poll events are processed */ -TRACE_EVENT(io_uring_task_run, +TRACE_EVENT(io_uring_task_add, - TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data), + TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, int mask), - TP_ARGS(ctx, req, opcode, user_data), + TP_ARGS(ctx, req, user_data, opcode, mask), TP_STRUCT__entry ( - __field( void *, ctx ) - __field( void *, req ) - __field( u8, opcode ) - __field( u64, user_data ) + __field( void *, ctx ) + __field( void *, req ) + __field( unsigned long long, user_data ) + __field( u8, opcode ) + __field( int, mask ) ), TP_fast_assign( __entry->ctx = ctx; __entry->req = req; - __entry->opcode = opcode; __entry->user_data = user_data; + __entry->opcode = opcode; + __entry->mask = mask; ), - TP_printk("ring %p, req %p, op %d, data 0x%llx", - __entry->ctx, __entry->req, __entry->opcode, - (unsigned long long) __entry->user_data) + TP_printk("ring %p, req %p, user_data %llu, opcode %d, mask %x", + __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, + __entry->mask) ); /* * io_uring_req_failed - called when an sqe is errored dring submission * * @sqe: pointer to the io_uring_sqe that failed + * @ctx: pointer to a ring context structure + * @req: pointer to request * @error: error it failed with * * Allows easier diagnosing of malformed requests in production systems. */ TRACE_EVENT(io_uring_req_failed, - TP_PROTO(const struct io_uring_sqe *sqe, int error), + TP_PROTO(const struct io_uring_sqe *sqe, void *ctx, void *req, int error), - TP_ARGS(sqe, error), + TP_ARGS(sqe, ctx, req, error), TP_STRUCT__entry ( - __field( u8, opcode ) - __field( u8, flags ) - __field( u8, ioprio ) - __field( u64, off ) - __field( u64, addr ) - __field( u32, len ) - __field( u32, op_flags ) - __field( u64, user_data ) - __field( u16, buf_index ) - __field( u16, personality ) - __field( u32, file_index ) - __field( u64, pad1 ) - __field( u64, pad2 ) - __field( int, error ) + __field( void *, ctx ) + __field( void *, req ) + __field( unsigned long long, user_data ) + __field( u8, opcode ) + __field( u8, flags ) + __field( u8, ioprio ) + __field( u64, off ) + __field( u64, addr ) + __field( u32, len ) + __field( u32, op_flags ) + __field( u16, buf_index ) + __field( u16, personality ) + __field( u32, file_index ) + __field( u64, pad1 ) + __field( u64, pad2 ) + __field( int, error ) ), TP_fast_assign( + __entry->ctx = ctx; + __entry->req = req; + __entry->user_data = sqe->user_data; __entry->opcode = sqe->opcode; __entry->flags = sqe->flags; __entry->ioprio = sqe->ioprio; @@ -534,7 +521,6 @@ TRACE_EVENT(io_uring_req_failed, __entry->addr = sqe->addr; __entry->len = sqe->len; __entry->op_flags = sqe->rw_flags; - __entry->user_data = sqe->user_data; __entry->buf_index = sqe->buf_index; __entry->personality = sqe->personality; __entry->file_index = sqe->file_index; @@ -543,13 +529,15 @@ TRACE_EVENT(io_uring_req_failed, __entry->error = error; ), - TP_printk("op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, " - "len=%u, rw_flags=0x%x, user_data=0x%llx, buf_index=%d, " + TP_printk("ring %p, req %p, user_data %llu, " + "op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, " + "len=%u, rw_flags=0x%x, buf_index=%d, " "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d", + __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->flags, __entry->ioprio, (unsigned long long)__entry->off, (unsigned long long) __entry->addr, __entry->len, - __entry->op_flags, (unsigned long long) __entry->user_data, + __entry->op_flags, __entry->buf_index, __entry->personality, __entry->file_index, (unsigned long long) __entry->pad1, (unsigned long long) __entry->pad2, __entry->error) -- cgit v1.2.3 From e7a6c00dc77aedf27a601738ea509f1caea6d673 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 4 Mar 2022 08:22:22 -0700 Subject: io_uring: add support for registering ring file descriptors Lots of workloads use multiple threads, in which case the file table is shared between them. This makes getting and putting the ring file descriptor for each io_uring_enter(2) system call more expensive, as it involves an atomic get and put for each call. Similarly to how we allow registering normal file descriptors to avoid this overhead, add support for an io_uring_register(2) API that allows to register the ring fds themselves: 1) IORING_REGISTER_RING_FDS - takes an array of io_uring_rsrc_update structs, and registers them with the task. 2) IORING_UNREGISTER_RING_FDS - takes an array of io_uring_src_update structs, and unregisters them. When a ring fd is registered, it is internally represented by an offset. This offset is returned to the application, and the application then uses this offset and sets IORING_ENTER_REGISTERED_RING for the io_uring_enter(2) system call. This works just like using a registered file descriptor, rather than a real one, in an SQE, where IOSQE_FIXED_FILE gets set to tell io_uring that we're using an internal offset/descriptor rather than a real file descriptor. In initial testing, this provides a nice bump in performance for threaded applications in real world cases where the batch count (eg number of requests submitted per io_uring_enter(2) invocation) is low. In a microbenchmark, submitting NOP requests, we see the following increases in performance: Requests per syscall Baseline Registered Increase ---------------------------------------------------------------- 1 ~7030K ~8080K +15% 2 ~13120K ~14800K +13% 4 ~22740K ~25300K +11% Co-developed-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 5 ++++- include/uapi/linux/io_uring.h | 13 +++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 649a4d7c241b..1814e698d861 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -9,11 +9,14 @@ struct sock *io_uring_get_socket(struct file *file); void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); +void io_uring_unreg_ringfd(void); static inline void io_uring_files_cancel(void) { - if (current->io_uring) + if (current->io_uring) { + io_uring_unreg_ringfd(); __io_uring_cancel(false); + } } static inline void io_uring_task_cancel(void) { diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 787f491f0d2a..42b2fe84dbcd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -257,10 +257,11 @@ struct io_cqring_offsets { /* * io_uring_enter(2) flags */ -#define IORING_ENTER_GETEVENTS (1U << 0) -#define IORING_ENTER_SQ_WAKEUP (1U << 1) -#define IORING_ENTER_SQ_WAIT (1U << 2) -#define IORING_ENTER_EXT_ARG (1U << 3) +#define IORING_ENTER_GETEVENTS (1U << 0) +#define IORING_ENTER_SQ_WAKEUP (1U << 1) +#define IORING_ENTER_SQ_WAIT (1U << 2) +#define IORING_ENTER_EXT_ARG (1U << 3) +#define IORING_ENTER_REGISTERED_RING (1U << 4) /* * Passed in for io_uring_setup(2). Copied back with updated info on success @@ -325,6 +326,10 @@ enum { /* set/get max number of io-wq workers */ IORING_REGISTER_IOWQ_MAX_WORKERS = 19, + /* register/unregister io_uring fd with the ring */ + IORING_REGISTER_RING_FDS = 20, + IORING_UNREGISTER_RING_FDS = 21, + /* this goes last */ IORING_REGISTER_LAST }; -- cgit v1.2.3 From 4f57f06ce2186c31c3da52386125dc57b1cd6f96 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Mar 2022 06:27:26 -0700 Subject: io_uring: add support for IORING_OP_MSG_RING command This adds support for IORING_OP_MSG_RING, which allows an SQE to signal another ring. That allows either waking up someone waiting on the ring, or even passing a 64-bit value via the user_data field in the CQE. sqe->fd must contain the fd of a ring that should receive the CQE. sqe->off will be propagated to the cqe->user_data on the target ring, and sqe->len will be propagated to cqe->res. The results CQE will have IORING_CQE_F_MSG set in its flags, to indicate that this CQE was generated from a messaging request rather than a SQE issued locally on that ring. This effectively allows passing a 64-bit and a 32-bit quantify between the two rings. This request type has the following request specific error cases: - -EBADFD. Set if the sqe->fd doesn't point to a file descriptor that is of the io_uring type. - -EOVERFLOW. Set if we were not able to deliver a request to the target ring. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 42b2fe84dbcd..8bd4bfdd9a89 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -143,6 +143,7 @@ enum { IORING_OP_MKDIRAT, IORING_OP_SYMLINKAT, IORING_OP_LINKAT, + IORING_OP_MSG_RING, /* this goes last, obviously */ IORING_OP_LAST, @@ -199,9 +200,11 @@ struct io_uring_cqe { * * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries + * IORING_CQE_F_MSG If set, CQE was generated with IORING_OP_MSG_RING */ #define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_MORE (1U << 1) +#define IORING_CQE_F_MSG (1U << 2) enum { IORING_CQE_BUFFER_SHIFT = 16, -- cgit v1.2.3 From 382627824afb09cd86192f4b649fca8c5bfe5f40 Mon Sep 17 00:00:00 2001 From: Xiongwei Song Date: Thu, 10 Mar 2022 22:07:00 +0800 Subject: mm: slab: Delete unused SLAB_DEACTIVATED flag Since commit 9855609bde03 ("mm: memcg/slab: use a single set of kmem_caches for all accounted allocations") deleted all SLAB_DEACTIVATED users, therefore this flag is not needed any more, let's delete it. Signed-off-by: Xiongwei Song Acked-by: David Rientjes Reviewed-by: Roman Gushchin Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/20220310140701.87908-2-sxwjean@me.com --- include/linux/slab.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 37bde99b74af..b6b3eed6c7c4 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -117,9 +117,6 @@ #define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ -/* Slab deactivation flag */ -#define SLAB_DEACTIVATED ((slab_flags_t __force)0x10000000U) - /* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. * -- cgit v1.2.3 From afcf5441b9ff22ac57244cd45ff102ebc2e32d1a Mon Sep 17 00:00:00 2001 From: Dan Li Date: Wed, 2 Mar 2022 23:43:23 -0800 Subject: arm64: Add gcc Shadow Call Stack support Shadow call stacks will be available in GCC >= 12, this patch makes the corresponding kernel configuration available when compiling the kernel with the gcc. Note that the implementation in GCC is slightly different from Clang. With SCS enabled, functions will only pop x30 once in the epilogue, like: str x30, [x18], #8 stp x29, x30, [sp, #-16]! ...... - ldp x29, x30, [sp], #16 //clang + ldr x29, [sp], #16 //GCC ldr x30, [x18, #-8]! Link: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ce09ab17ddd21f73ff2caf6eec3b0ee9b0e1a11e Reviewed-by: Nathan Chancellor Reviewed-by: Kees Cook Reviewed-by: Nick Desaulniers Signed-off-by: Dan Li Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220303074323.86282-1-ashimida@linux.alibaba.com --- include/linux/compiler-gcc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index ccbbd31b3aae..deff5b308470 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -97,6 +97,10 @@ #define KASAN_ABI_VERSION 4 #endif +#ifdef CONFIG_SHADOW_CALL_STACK +#define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) +#endif + #if __has_attribute(__no_sanitize_address__) #define __no_sanitize_address __attribute__((no_sanitize_address)) #else -- cgit v1.2.3 From b7f8dff09827c96032c34a945ee7757e394b5952 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 10 Mar 2022 11:45:58 -0500 Subject: dm: simplify dm_sumbit_bio_remap interface Remove the from_wq argument from dm_sumbit_bio_remap(). Eliminates the need for dm_sumbit_bio_remap() callers to know whether they are calling for a workqueue or from the original dm_submit_bio(). Add map_task to dm_io struct, record the map_task in alloc_io and clear it after all target ->map() calls have completed. Update dm_sumbit_bio_remap to check if 'current' matches io->map_task rather than rely on passed 'from_rq' argument. This change really simplifies the chore of porting each DM target to using dm_sumbit_bio_remap() because there is no longer the risk of programming error by not completely knowing all the different contexts a particular method that calls dm_sumbit_bio_remap() might be used in. Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 70cd9449275a..901ec191250c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -471,7 +471,7 @@ int dm_suspended(struct dm_target *ti); int dm_post_suspending(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); -void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone, bool from_wq); +void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone); union map_info *dm_get_rq_mapinfo(struct request *rq); #ifdef CONFIG_BLK_DEV_ZONED -- cgit v1.2.3 From a2a591fb76e6f5461dfd04715b69c317e50c43a5 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Tue, 8 Mar 2022 18:07:50 -0800 Subject: ACPI: AGDI: Add driver for Arm Generic Diagnostic Dump and Reset device ACPI for Arm Components 1.1 Platform Design Document v1.1 [0] specifices Arm Generic Diagnostic Device Interface (AGDI). It allows an admin to issue diagnostic dump and reset via an SDEI event or an interrupt. This patch implements SDEI path. [0] https://developer.arm.com/documentation/den0093/latest/ Signed-off-by: Ilkka Koskinen Reviewed-by: Russell King (Oracle) Acked-by: Lorenzo Pieralisi Signed-off-by: Rafael J. Wysocki --- include/linux/acpi_agdi.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/linux/acpi_agdi.h (limited to 'include') diff --git a/include/linux/acpi_agdi.h b/include/linux/acpi_agdi.h new file mode 100644 index 000000000000..f477f0b452fa --- /dev/null +++ b/include/linux/acpi_agdi.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ACPI_AGDI_H__ +#define __ACPI_AGDI_H__ + +#include + +#ifdef CONFIG_ACPI_AGDI +void __init acpi_agdi_init(void); +#else +static inline void acpi_agdi_init(void) {} +#endif +#endif /* __ACPI_AGDI_H__ */ -- cgit v1.2.3 From 9924fbb51e0ae30b8d7eec7c1c839d74da9678b3 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 10 Mar 2022 14:54:50 +0000 Subject: arch_topology: obtain cpu capacity using information from CPPC Define topology_init_cpu_capacity_cppc() to use highest performance values from _CPC objects to obtain and set maximum capacity information for each CPU. acpi_cppc_processor_probe() is a good point at which to trigger the initialization of CPU (u-arch) capacity values, as at this point the highest performance values can be obtained from each CPU's _CPC objects. Architectures can therefore use this functionality through arch_init_invariance_cppc(). The performance scale used by CPPC is a unified scale for all CPUs in the system. Therefore, by obtaining the raw highest performance values from the _CPC objects, and normalizing them on the [0, 1024] capacity scale, used by the task scheduler, we obtain the CPU capacity of each CPU. While an ACPI Notify(0x85) could alert about a change in the highest performance value, which should in turn retrigger the CPU capacity computations, this notification is not currently handled by the ACPI processor driver. When supported, a call to arch_init_invariance_cppc() would perform the update. Signed-off-by: Ionela Voinescu Acked-by: Sudeep Holla Tested-by: Valentin Schneider Tested-by: Yicong Yang Signed-off-by: Rafael J. Wysocki --- include/linux/arch_topology.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index cce6136b300a..58cbe18d825c 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -11,6 +11,10 @@ void topology_normalize_cpu_scale(void); int topology_update_cpu_topology(void); +#ifdef CONFIG_ACPI_CPPC_LIB +void topology_init_cpu_capacity_cppc(void); +#endif + struct device_node; bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); -- cgit v1.2.3 From 19397e8b546d20226153dafe5dce34c4393752c4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 11:17:32 -0600 Subject: ptrace: Move ptrace_report_syscall into ptrace.h Move ptrace_report_syscall from tracehook.h into ptrace.h where it belongs. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-1-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/ptrace.h | 27 +++++++++++++++++++++++++++ include/linux/tracehook.h | 26 -------------------------- 2 files changed, 27 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 8aee2945ff08..91b1074edb4c 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -413,4 +413,31 @@ static inline void user_single_step_report(struct pt_regs *regs) extern int task_current_syscall(struct task_struct *target, struct syscall_info *info); extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); + +/* + * ptrace report for syscall entry and exit looks identical. + */ +static inline int ptrace_report_syscall(unsigned long message) +{ + int ptrace = current->ptrace; + + if (!(ptrace & PT_PTRACED)) + return 0; + + current->ptrace_message = message; + ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); + + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } + + current->ptrace_message = 0; + return fatal_signal_pending(current); +} #endif diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 88c007ab5ebc..998bc3863559 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -51,32 +51,6 @@ #include struct linux_binprm; -/* - * ptrace report for syscall entry and exit looks identical. - */ -static inline int ptrace_report_syscall(unsigned long message) -{ - int ptrace = current->ptrace; - - if (!(ptrace & PT_PTRACED)) - return 0; - - current->ptrace_message = message; - ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); - - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } - - current->ptrace_message = 0; - return fatal_signal_pending(current); -} /** * tracehook_report_syscall_entry - task is about to attempt a system call -- cgit v1.2.3 From 153474ba1a4aed0a7b797b4c2be8c35c7a4e57bd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 11:46:37 -0600 Subject: ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h Rename tracehook_report_syscall_{entry,exit} to ptrace_report_syscall_{entry,exit} and place them in ptrace.h There is no longer any generic tracehook infractructure so make these ptrace specific functions ptrace specific. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-3-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/asm-generic/syscall.h | 2 +- include/linux/entry-common.h | 6 ++--- include/linux/ptrace.h | 51 +++++++++++++++++++++++++++++++++++++++++++ include/linux/tracehook.h | 51 ------------------------------------------- include/uapi/linux/ptrace.h | 2 +- 5 files changed, 56 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 81695eb02a12..5a80fe728dc8 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -44,7 +44,7 @@ int syscall_get_nr(struct task_struct *task, struct pt_regs *regs); * * It's only valid to call this when @task is stopped for system * call exit tracing (due to %SYSCALL_WORK_SYSCALL_TRACE or - * %SYSCALL_WORK_SYSCALL_AUDIT), after tracehook_report_syscall_entry() + * %SYSCALL_WORK_SYSCALL_AUDIT), after ptrace_report_syscall_entry() * returned nonzero to prevent the system call from taking place. * * This rolls back the register state in @regs so it's as if the diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 2e2b8d6140ed..a670e9fba7a9 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -3,7 +3,7 @@ #define __LINUX_ENTRYCOMMON_H #include -#include +#include #include #include #include @@ -95,7 +95,7 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs #ifndef arch_syscall_enter_tracehook static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs) { - return tracehook_report_syscall_entry(regs); + return ptrace_report_syscall_entry(regs); } #endif @@ -294,7 +294,7 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step); #ifndef arch_syscall_exit_tracehook static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) { - tracehook_report_syscall_exit(regs, step); + ptrace_report_syscall_exit(regs, step); } #endif diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 91b1074edb4c..5310f43e4762 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -440,4 +440,55 @@ static inline int ptrace_report_syscall(unsigned long message) current->ptrace_message = 0; return fatal_signal_pending(current); } + +/** + * ptrace_report_syscall_entry - task is about to attempt a system call + * @regs: user register state of current task + * + * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or + * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just + * entered the kernel for a system call. Full user register state is + * available here. Changing the values in @regs can affect the system + * call number and arguments to be tried. It is safe to block here, + * preventing the system call from beginning. + * + * Returns zero normally, or nonzero if the calling arch code should abort + * the system call. That must prevent normal entry so no system call is + * made. If @task ever returns to user mode after this, its register state + * is unspecified, but should be something harmless like an %ENOSYS error + * return. It should preserve enough information so that syscall_rollback() + * can work (see asm-generic/syscall.h). + * + * Called without locks, just after entering kernel mode. + */ +static inline __must_check int ptrace_report_syscall_entry( + struct pt_regs *regs) +{ + return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY); +} + +/** + * ptrace_report_syscall_exit - task has just finished a system call + * @regs: user register state of current task + * @step: nonzero if simulating single-step or block-step + * + * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when + * the current task has just finished an attempted system call. Full + * user register state is available here. It is safe to block here, + * preventing signals from being processed. + * + * If @step is nonzero, this report is also in lieu of the normal + * trap that would follow the system call instruction because + * user_enable_block_step() or user_enable_single_step() was used. + * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set. + * + * Called without locks, just before checking for pending signals. + */ +static inline void ptrace_report_syscall_exit(struct pt_regs *regs, int step) +{ + if (step) + user_single_step_report(regs); + else + ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT); +} #endif diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 998bc3863559..819e82ac09bd 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -52,57 +52,6 @@ struct linux_binprm; -/** - * tracehook_report_syscall_entry - task is about to attempt a system call - * @regs: user register state of current task - * - * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or - * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just - * entered the kernel for a system call. Full user register state is - * available here. Changing the values in @regs can affect the system - * call number and arguments to be tried. It is safe to block here, - * preventing the system call from beginning. - * - * Returns zero normally, or nonzero if the calling arch code should abort - * the system call. That must prevent normal entry so no system call is - * made. If @task ever returns to user mode after this, its register state - * is unspecified, but should be something harmless like an %ENOSYS error - * return. It should preserve enough information so that syscall_rollback() - * can work (see asm-generic/syscall.h). - * - * Called without locks, just after entering kernel mode. - */ -static inline __must_check int tracehook_report_syscall_entry( - struct pt_regs *regs) -{ - return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY); -} - -/** - * tracehook_report_syscall_exit - task has just finished a system call - * @regs: user register state of current task - * @step: nonzero if simulating single-step or block-step - * - * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when - * the current task has just finished an attempted system call. Full - * user register state is available here. It is safe to block here, - * preventing signals from being processed. - * - * If @step is nonzero, this report is also in lieu of the normal - * trap that would follow the system call instruction because - * user_enable_block_step() or user_enable_single_step() was used. - * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set. - * - * Called without locks, just before checking for pending signals. - */ -static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) -{ - if (step) - user_single_step_report(regs); - else - ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT); -} - /** * tracehook_signal_handler - signal handler setup is complete * @stepping: nonzero if debugger single-step or block-step in use diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index 3747bf816f9a..b7af92e07d1f 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -114,7 +114,7 @@ struct ptrace_rseq_configuration { /* * These values are stored in task->ptrace_message - * by tracehook_report_syscall_* to describe the current syscall-stop. + * by ptrace_report_syscall_* to describe the current syscall-stop. */ #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 -- cgit v1.2.3 From 0cfcb2b9ef48bbcaf5d43b9f1893f63a938e8176 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 12:00:55 -0600 Subject: ptrace: Remove arch_syscall_{enter,exit}_tracehook These functions are alwasy one-to-one wrappers around ptrace_report_syscall_entry and ptrace_report_syscall_exit. So directly call the functions they are wrapping instead. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-4-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/entry-common.h | 43 ++----------------------------------------- 1 file changed, 2 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index a670e9fba7a9..9efbdda61f7a 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -79,26 +79,6 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs); static __always_inline void arch_check_user_regs(struct pt_regs *regs) {} #endif -/** - * arch_syscall_enter_tracehook - Wrapper around tracehook_report_syscall_entry() - * @regs: Pointer to currents pt_regs - * - * Returns: 0 on success or an error code to skip the syscall. - * - * Defaults to tracehook_report_syscall_entry(). Can be replaced by - * architecture specific code. - * - * Invoked from syscall_enter_from_user_mode() - */ -static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs); - -#ifndef arch_syscall_enter_tracehook -static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs) -{ - return ptrace_report_syscall_entry(regs); -} -#endif - /** * enter_from_user_mode - Establish state when coming from user mode * @@ -157,7 +137,7 @@ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); * It handles the following work items: * * 1) syscall_work flag dependent invocations of - * arch_syscall_enter_tracehook(), __secure_computing(), trace_sys_enter() + * ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter() * 2) Invocation of audit_syscall_entry() */ long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); @@ -279,25 +259,6 @@ static __always_inline void arch_exit_to_user_mode(void) { } */ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal); -/** - * arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit() - * @regs: Pointer to currents pt_regs - * @step: Indicator for single step - * - * Defaults to tracehook_report_syscall_exit(). Can be replaced by - * architecture specific code. - * - * Invoked from syscall_exit_to_user_mode() - */ -static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step); - -#ifndef arch_syscall_exit_tracehook -static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) -{ - ptrace_report_syscall_exit(regs, step); -} -#endif - /** * exit_to_user_mode - Fixup state when exiting to user mode * @@ -347,7 +308,7 @@ void syscall_exit_to_user_mode_work(struct pt_regs *regs); * - rseq syscall exit * - audit * - syscall tracing - * - tracehook (single stepping) + * - ptrace (single stepping) * * 2) Preparatory work * - Exit to user mode loop (common TIF handling). Invokes -- cgit v1.2.3 From c145137dc990fd67b52fbc52faae5ba46f168cca Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 12:04:27 -0600 Subject: ptrace: Remove tracehook_signal_handler The two line function tracehook_signal_handler is only called from signal_delivered. Expand it inline in signal_delivered and remove it. Just to make it easier to understand what is going on. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-5-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/tracehook.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 819e82ac09bd..b77bf4917196 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -52,23 +52,6 @@ struct linux_binprm; -/** - * tracehook_signal_handler - signal handler setup is complete - * @stepping: nonzero if debugger single-step or block-step in use - * - * Called by the arch code after a signal handler has been set up. - * Register and stack state reflects the user handler about to run. - * Signal mask changes have already been made. - * - * Called without locks, shortly before returning to user mode - * (or handling more signals). - */ -static inline void tracehook_signal_handler(int stepping) -{ - if (stepping) - ptrace_notify(SIGTRAP); -} - /** * set_notify_resume - cause tracehook_notify_resume() to be called * @task: task that will call tracehook_notify_resume() -- cgit v1.2.3 From 8ca07e17c9dd4c4afcb4a3f2ea8f0a0d41c0f982 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Jan 2022 13:55:47 -0600 Subject: task_work: Remove unnecessary include from posix_timers.h Break a header file circular dependency by removing the unnecessary include of task_work.h from posix_timers.h. sched.h -> posix-timers.h posix-timers.h -> task_work.h task_work.h -> sched.h Add missing includes of task_work.h to: arch/x86/mm/tlb.c kernel/time/posix-cpu-timers.c Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-6-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/posix-timers.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 5bbcd280bfd2..83539bb2f023 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -6,7 +6,6 @@ #include #include #include -#include struct kernel_siginfo; struct task_struct; -- cgit v1.2.3 From 7f62d40d9cb50fd146fe8ff071f98fa3c1855083 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 Feb 2022 08:52:41 -0600 Subject: task_work: Introduce task_work_pending Wrap the test of task->task_works in a helper function to make it clear what is being tested. All of the other readers of task->task_work use READ_ONCE and this is even necessary on current as other processes can update task->task_work. So for consistency I have added READ_ONCE into task_work_pending. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-7-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/task_work.h | 5 +++++ include/linux/tracehook.h | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 5b8a93f288bb..897494b597ba 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -19,6 +19,11 @@ enum task_work_notify_mode { TWA_SIGNAL, }; +static inline bool task_work_pending(struct task_struct *task) +{ + return READ_ONCE(task->task_works); +} + int task_work_add(struct task_struct *task, struct callback_head *twork, enum task_work_notify_mode mode); diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index b77bf4917196..fa834a22e86e 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -90,7 +90,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) * hlist_add_head(task->task_works); */ smp_mb__after_atomic(); - if (unlikely(current->task_works)) + if (unlikely(task_work_pending(current))) task_work_run(); #ifdef CONFIG_KEYS_REQUEST_CACHE @@ -115,7 +115,7 @@ static inline void tracehook_notify_signal(void) { clear_thread_flag(TIF_NOTIFY_SIGNAL); smp_mb__after_atomic(); - if (current->task_works) + if (task_work_pending(current)) task_work_run(); } -- cgit v1.2.3 From bcbb7bf6ccde7cb969a5642879832bc84ebf06a3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Mar 2022 12:59:35 -0700 Subject: io_uring: allow submissions to continue on error By default, io_uring will stop submitting a batch of requests if we run into an error submitting a request. This isn't strictly necessary, as the error result is passed out-of-band via a CQE anyway. And it can be a bit confusing for some applications. Provide a way to setup a ring that will continue submitting on error, when the error CQE has been posted. There's still one case that will break out of submission. If we fail allocating a request, then we'll still return -ENOMEM. We could in theory post a CQE for that condition too even if we never got a request. Leave that for a potential followup. Reported-by: Dylan Yudaken Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 8bd4bfdd9a89..d2be4eb22008 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -101,6 +101,7 @@ enum { #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ +#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */ enum { IORING_OP_NOP, -- cgit v1.2.3 From 3b5d4ddf8fe1f60082513f94bae586ac80188a03 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 9 Mar 2022 01:04:50 -0800 Subject: bpf: net: Remove TC_AT_INGRESS_OFFSET and SKB_MONO_DELIVERY_TIME_OFFSET macro This patch removes the TC_AT_INGRESS_OFFSET and SKB_MONO_DELIVERY_TIME_OFFSET macros. Instead, PKT_VLAN_PRESENT_OFFSET is used because all of them are at the same offset. Comment is added to make it clear that changing the position of tc_at_ingress or mono_delivery_time will require to adjust the defined macros. The earlier discussion can be found here: https://lore.kernel.org/bpf/419d994e-ff61-7c11-0ec7-11fefcb0186e@iogearbox.net/ Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220309090450.3710955-1-kafai@fb.com --- include/linux/skbuff.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2be263184d1e..7b0cb10e70cb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -960,10 +960,10 @@ struct sk_buff { __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 dst_pending_confirm:1; - __u8 mono_delivery_time:1; + __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; - __u8 tc_at_ingress:1; + __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ #endif #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; @@ -1062,7 +1062,9 @@ struct sk_buff { #endif #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) -/* if you move pkt_vlan_present around you also must adapt these constants */ +/* if you move pkt_vlan_present, tc_at_ingress, or mono_delivery_time + * around, you also must adapt these constants. + */ #ifdef __BIG_ENDIAN_BITFIELD #define PKT_VLAN_PRESENT_BIT 7 #define TC_AT_INGRESS_MASK (1 << 0) @@ -1073,8 +1075,6 @@ struct sk_buff { #define SKB_MONO_DELIVERY_TIME_MASK (1 << 5) #endif #define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) -#define TC_AT_INGRESS_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) -#define SKB_MONO_DELIVERY_TIME_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) #ifdef __KERNEL__ /* -- cgit v1.2.3 From 9bb984f28d5bcb917d35d930fcfb89f90f9449fd Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 9 Mar 2022 01:05:09 -0800 Subject: bpf: Remove BPF_SKB_DELIVERY_TIME_NONE and rename s/delivery_time_/tstamp_/ This patch is to simplify the uapi bpf.h regarding to the tstamp type and use a similar way as the kernel to describe the value stored in __sk_buff->tstamp. My earlier thought was to avoid describing the semantic and clock base for the rcv timestamp until there is more clarity on the use case, so the __sk_buff->delivery_time_type naming instead of __sk_buff->tstamp_type. With some thoughts, it can reuse the UNSPEC naming. This patch first removes BPF_SKB_DELIVERY_TIME_NONE and also rename BPF_SKB_DELIVERY_TIME_UNSPEC to BPF_SKB_TSTAMP_UNSPEC and BPF_SKB_DELIVERY_TIME_MONO to BPF_SKB_TSTAMP_DELIVERY_MONO. The semantic of BPF_SKB_TSTAMP_DELIVERY_MONO is the same: __sk_buff->tstamp has delivery time in mono clock base. BPF_SKB_TSTAMP_UNSPEC means __sk_buff->tstamp has the (rcv) tstamp at ingress and the delivery time at egress. At egress, the clock base could be found from skb->sk->sk_clockid. __sk_buff->tstamp == 0 naturally means NONE, so NONE is not needed. With BPF_SKB_TSTAMP_UNSPEC for the rcv tstamp at ingress, the __sk_buff->delivery_time_type is also renamed to __sk_buff->tstamp_type which was also suggested in the earlier discussion: https://lore.kernel.org/bpf/b181acbe-caf8-502d-4b7b-7d96b9fc5d55@iogearbox.net/ The above will then make __sk_buff->tstamp and __sk_buff->tstamp_type the same as its kernel skb->tstamp and skb->mono_delivery_time counter part. The internal kernel function bpf_skb_convert_dtime_type_read() is then renamed to bpf_skb_convert_tstamp_type_read() and it can be simplified with the BPF_SKB_DELIVERY_TIME_NONE gone. A BPF_ALU32_IMM(BPF_AND) insn is also saved by using BPF_JMP32_IMM(BPF_JSET). The bpf helper bpf_skb_set_delivery_time() is also renamed to bpf_skb_set_tstamp(). The arg name is changed from dtime to tstamp also. It only allows setting tstamp 0 for BPF_SKB_TSTAMP_UNSPEC and it could be relaxed later if there is use case to change mono delivery time to non mono. prog->delivery_time_access is also renamed to prog->tstamp_type_access. Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220309090509.3712315-1-kafai@fb.com --- include/linux/filter.h | 2 +- include/uapi/linux/bpf.h | 40 +++++++++++++++++++++------------------- 2 files changed, 22 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 9bf26307247f..05ed9bd31b45 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -573,7 +573,7 @@ struct bpf_prog { enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ call_get_func_ip:1, /* Do we call get_func_ip() */ - delivery_time_access:1; /* Accessed __sk_buff->delivery_time_type */ + tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bc23020b638d..d288a0a9f797 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5090,23 +5090,22 @@ union bpf_attr { * 0 on success, or a negative error in case of failure. On error * *dst* buffer is zeroed out. * - * long bpf_skb_set_delivery_time(struct sk_buff *skb, u64 dtime, u32 dtime_type) + * long bpf_skb_set_tstamp(struct sk_buff *skb, u64 tstamp, u32 tstamp_type) * Description - * Set a *dtime* (delivery time) to the __sk_buff->tstamp and also - * change the __sk_buff->delivery_time_type to *dtime_type*. + * Change the __sk_buff->tstamp_type to *tstamp_type* + * and set *tstamp* to the __sk_buff->tstamp together. * - * When setting a delivery time (non zero *dtime*) to - * __sk_buff->tstamp, only BPF_SKB_DELIVERY_TIME_MONO *dtime_type* - * is supported. It is the only delivery_time_type that will be - * kept after bpf_redirect_*(). - * - * If there is no need to change the __sk_buff->delivery_time_type, - * the delivery time can be directly written to __sk_buff->tstamp + * If there is no need to change the __sk_buff->tstamp_type, + * the tstamp value can be directly written to __sk_buff->tstamp * instead. * - * *dtime* 0 and *dtime_type* BPF_SKB_DELIVERY_TIME_NONE - * can be used to clear any delivery time stored in - * __sk_buff->tstamp. + * BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that + * will be kept during bpf_redirect_*(). A non zero + * *tstamp* must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO + * *tstamp_type*. + * + * A BPF_SKB_TSTAMP_UNSPEC *tstamp_type* can only be used + * with a zero *tstamp*. * * Only IPv4 and IPv6 skb->protocol are supported. * @@ -5119,7 +5118,7 @@ union bpf_attr { * Return * 0 on success. * **-EINVAL** for invalid input - * **-EOPNOTSUPP** for unsupported delivery_time_type and protocol + * **-EOPNOTSUPP** for unsupported protocol */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5314,7 +5313,7 @@ union bpf_attr { FN(xdp_load_bytes), \ FN(xdp_store_bytes), \ FN(copy_from_user_task), \ - FN(skb_set_delivery_time), \ + FN(skb_set_tstamp), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5505,9 +5504,12 @@ union { \ } __attribute__((aligned(8))) enum { - BPF_SKB_DELIVERY_TIME_NONE, - BPF_SKB_DELIVERY_TIME_UNSPEC, - BPF_SKB_DELIVERY_TIME_MONO, + BPF_SKB_TSTAMP_UNSPEC, + BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ + /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, + * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC + * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + */ }; /* user accessible mirror of in-kernel sk_buff. @@ -5550,7 +5552,7 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; - __u8 delivery_time_type; + __u8 tstamp_type; __u32 :24; /* Padding, future use. */ __u64 hwtstamp; }; -- cgit v1.2.3 From 58617014405ad5c9f94f464444f4972dabb71ca7 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Thu, 10 Mar 2022 23:53:35 +0800 Subject: bpf: Fix comment for helper bpf_current_task_under_cgroup() Fix the descriptions of the return values of helper bpf_current_task_under_cgroup(). Fixes: c6b5fb8690fa ("bpf: add documentation for eBPF helpers (42-50)") Signed-off-by: Hengqi Chen Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220310155335.1278783-1-hengqi.chen@gmail.com --- include/uapi/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d288a0a9f797..e9978a916c3e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2302,8 +2302,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if current task belongs to the cgroup2. - * * 1, if current task does not belong to the cgroup2. + * * 1, if current task belongs to the cgroup2. + * * 0, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) -- cgit v1.2.3 From 26183cfe478c1d1d5cd1e3920a4b2c5b1980849d Mon Sep 17 00:00:00 2001 From: Colin Foster Date: Tue, 8 Mar 2022 22:25:44 -0800 Subject: net: phy: correct spelling error of media in documentation The header file incorrectly referenced "median-independant interface" instead of media. Correct this typo. Signed-off-by: Colin Foster Fixes: 4069a572d423 ("net: phy: Document core PHY structures") Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20220309062544.3073-1-colin.foster@in-advantage.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 6de8d7a90d78..8fa70ba063a5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -87,8 +87,8 @@ extern const int phy_10gbit_features_array[1]; * * @PHY_INTERFACE_MODE_NA: Not Applicable - don't touch * @PHY_INTERFACE_MODE_INTERNAL: No interface, MAC and PHY combined - * @PHY_INTERFACE_MODE_MII: Median-independent interface - * @PHY_INTERFACE_MODE_GMII: Gigabit median-independent interface + * @PHY_INTERFACE_MODE_MII: Media-independent interface + * @PHY_INTERFACE_MODE_GMII: Gigabit media-independent interface * @PHY_INTERFACE_MODE_SGMII: Serial gigabit media-independent interface * @PHY_INTERFACE_MODE_TBI: Ten Bit Interface * @PHY_INTERFACE_MODE_REVMII: Reverse Media Independent Interface -- cgit v1.2.3 From 8ba62d37949e248c698c26e0d82d72fda5d33ebf Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 Feb 2022 09:51:14 -0600 Subject: task_work: Call tracehook_notify_signal from get_signal on all architectures Always handle TIF_NOTIFY_SIGNAL in get_signal. With commit 35d0b389f3b2 ("task_work: unconditionally run task_work from get_signal()") always calling task_work_run all of the work of tracehook_notify_signal is already happening except clearing TIF_NOTIFY_SIGNAL. Factor clear_notify_signal out of tracehook_notify_signal and use it in get_signal so that get_signal only needs one call of task_work_run. To keep the semantics in sync update xfer_to_guest_mode_work (which does not call get_signal) to call tracehook_notify_signal if either _TIF_SIGPENDING or _TIF_NOTIFY_SIGNAL. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-8-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/entry-common.h | 2 +- include/linux/tracehook.h | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 9efbdda61f7a..3537fd25f14e 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -257,7 +257,7 @@ static __always_inline void arch_exit_to_user_mode(void) { } * * Invoked from exit_to_user_mode_loop(). */ -void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal); +void arch_do_signal_or_restart(struct pt_regs *regs); /** * exit_to_user_mode - Fixup state when exiting to user mode diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index fa834a22e86e..b44a7820c468 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -106,6 +106,12 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) rseq_handle_notify_resume(NULL, regs); } +static inline void clear_notify_signal(void) +{ + clear_thread_flag(TIF_NOTIFY_SIGNAL); + smp_mb__after_atomic(); +} + /* * called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This * is currently used by TWA_SIGNAL based task_work, which requires breaking @@ -113,8 +119,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) */ static inline void tracehook_notify_signal(void) { - clear_thread_flag(TIF_NOTIFY_SIGNAL); - smp_mb__after_atomic(); + clear_notify_signal(); if (task_work_pending(current)) task_work_run(); } -- cgit v1.2.3 From 7c5d8fa6fbb12a3f0eefe8762bfede508e147cb3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 Feb 2022 11:18:54 -0600 Subject: task_work: Decouple TIF_NOTIFY_SIGNAL and task_work There are a small handful of reasons besides pending signals that the kernel might want to break out of interruptible sleeps. The flag TIF_NOTIFY_SIGNAL and the helpers that set and clear TIF_NOTIFY_SIGNAL provide that the infrastructure for breaking out of interruptible sleeps and entering the return to user space slow path for those cases. Expand tracehook_notify_signal inline in it's callers and remove it, which makes clear that TIF_NOTIFY_SIGNAL and task_work are separate concepts. Update the comment on set_notify_signal to more accurately describe it's purpose. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-9-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/tracehook.h | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index b44a7820c468..e5d676e841e3 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -113,19 +113,8 @@ static inline void clear_notify_signal(void) } /* - * called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This - * is currently used by TWA_SIGNAL based task_work, which requires breaking - * wait loops to ensure that task_work is noticed and run. - */ -static inline void tracehook_notify_signal(void) -{ - clear_notify_signal(); - if (task_work_pending(current)) - task_work_run(); -} - -/* - * Called when we have work to process from exit_to_user_mode_loop() + * Called to break out of interruptible wait loops, and enter the + * exit_to_user_mode_loop(). */ static inline void set_notify_signal(struct task_struct *task) { -- cgit v1.2.3 From 593febb143d17aa2096cd123c9d62b6981eb1d97 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 Feb 2022 11:46:54 -0600 Subject: signal: Move set_notify_signal and clear_notify_signal into sched/signal.h The header tracehook.h is no place for code to live. The functions set_notify_signal and clear_notify_signal are not about signals. They are about interruptions that act like signals. The fundamental signal primitives wind up calling set_notify_signal and clear_notify_signal. Which means they need to be maintained with the signal code. Since set_notify_signal and clear_notify_signal must be maintained with the signal subsystem move them into sched/signal.h and claim them as part of the signal subsystem. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-10-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 17 +++++++++++++++++ include/linux/tracehook.h | 17 ----------------- 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index b6ecb9fc4cd2..3c8b34876744 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -349,6 +349,23 @@ extern void sigqueue_free(struct sigqueue *); extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); +static inline void clear_notify_signal(void) +{ + clear_thread_flag(TIF_NOTIFY_SIGNAL); + smp_mb__after_atomic(); +} + +/* + * Called to break out of interruptible wait loops, and enter the + * exit_to_user_mode_loop(). + */ +static inline void set_notify_signal(struct task_struct *task) +{ + if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) && + !wake_up_state(task, TASK_INTERRUPTIBLE)) + kick_process(task); +} + static inline int restart_syscall(void) { set_tsk_thread_flag(current, TIF_SIGPENDING); diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index e5d676e841e3..1b7365aef8da 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -106,21 +106,4 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) rseq_handle_notify_resume(NULL, regs); } -static inline void clear_notify_signal(void) -{ - clear_thread_flag(TIF_NOTIFY_SIGNAL); - smp_mb__after_atomic(); -} - -/* - * Called to break out of interruptible wait loops, and enter the - * exit_to_user_mode_loop(). - */ -static inline void set_notify_signal(struct task_struct *task) -{ - if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) && - !wake_up_state(task, TASK_INTERRUPTIBLE)) - kick_process(task); -} - #endif /* */ -- cgit v1.2.3 From d3c51a0c8944e3a5bba458358b4c1f9ae2de0133 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Jan 2022 14:36:58 -0600 Subject: resume_user_mode: Remove #ifdef TIF_NOTIFY_RESUME in set_notify_resume Every architecture defines TIF_NOTIFY_RESUME so remove the unnecessary ifdef. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-11-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/tracehook.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 1b7365aef8da..946404ebe10b 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -63,10 +63,8 @@ struct linux_binprm; */ static inline void set_notify_resume(struct task_struct *task) { -#ifdef TIF_NOTIFY_RESUME if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME)) kick_process(task); -#endif } /** -- cgit v1.2.3 From 03248addadf1a5ef0a03cbcd5ec905b49adb9658 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 Feb 2022 12:20:45 -0600 Subject: resume_user_mode: Move to resume_user_mode.h Move set_notify_resume and tracehook_notify_resume into resume_user_mode.h. While doing that rename tracehook_notify_resume to resume_user_mode_work. Update all of the places that included tracehook.h for these functions to include resume_user_mode.h instead. Update all of the callers of tracehook_notify_resume to call resume_user_mode_work. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-12-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/entry-kvm.h | 2 +- include/linux/resume_user_mode.h | 64 ++++++++++++++++++++++++++++++++++++++++ include/linux/tracehook.h | 51 -------------------------------- 3 files changed, 65 insertions(+), 52 deletions(-) create mode 100644 include/linux/resume_user_mode.h (limited to 'include') diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 07c878d6e323..6813171afccb 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -3,7 +3,7 @@ #define __LINUX_ENTRYKVM_H #include -#include +#include #include #include #include diff --git a/include/linux/resume_user_mode.h b/include/linux/resume_user_mode.h new file mode 100644 index 000000000000..285189454449 --- /dev/null +++ b/include/linux/resume_user_mode.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef LINUX_RESUME_USER_MODE_H +#define LINUX_RESUME_USER_MODE_H + +#include +#include +#include +#include + +/** + * set_notify_resume - cause resume_user_mode_work() to be called + * @task: task that will call resume_user_mode_work() + * + * Calling this arranges that @task will call resume_user_mode_work() + * before returning to user mode. If it's already running in user mode, + * it will enter the kernel and call resume_user_mode_work() soon. + * If it's blocked, it will not be woken. + */ +static inline void set_notify_resume(struct task_struct *task) +{ + if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME)) + kick_process(task); +} + + +/** + * resume_user_mode_work - Perform work before returning to user mode + * @regs: user-mode registers of @current task + * + * This is called when %TIF_NOTIFY_RESUME has been set. Now we are + * about to return to user mode, and the user state in @regs can be + * inspected or adjusted. The caller in arch code has cleared + * %TIF_NOTIFY_RESUME before the call. If the flag gets set again + * asynchronously, this will be called again before we return to + * user mode. + * + * Called without locks. + */ +static inline void resume_user_mode_work(struct pt_regs *regs) +{ + clear_thread_flag(TIF_NOTIFY_RESUME); + /* + * This barrier pairs with task_work_add()->set_notify_resume() after + * hlist_add_head(task->task_works); + */ + smp_mb__after_atomic(); + if (unlikely(task_work_pending(current))) + task_work_run(); + +#ifdef CONFIG_KEYS_REQUEST_CACHE + if (unlikely(current->cached_requested_key)) { + key_put(current->cached_requested_key); + current->cached_requested_key = NULL; + } +#endif + + mem_cgroup_handle_over_high(); + blkcg_maybe_throttle_current(); + + rseq_handle_notify_resume(NULL, regs); +} + +#endif /* LINUX_RESUME_USER_MODE_H */ diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 946404ebe10b..9f6b3fd1880a 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -52,56 +52,5 @@ struct linux_binprm; -/** - * set_notify_resume - cause tracehook_notify_resume() to be called - * @task: task that will call tracehook_notify_resume() - * - * Calling this arranges that @task will call tracehook_notify_resume() - * before returning to user mode. If it's already running in user mode, - * it will enter the kernel and call tracehook_notify_resume() soon. - * If it's blocked, it will not be woken. - */ -static inline void set_notify_resume(struct task_struct *task) -{ - if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME)) - kick_process(task); -} - -/** - * tracehook_notify_resume - report when about to return to user mode - * @regs: user-mode registers of @current task - * - * This is called when %TIF_NOTIFY_RESUME has been set. Now we are - * about to return to user mode, and the user state in @regs can be - * inspected or adjusted. The caller in arch code has cleared - * %TIF_NOTIFY_RESUME before the call. If the flag gets set again - * asynchronously, this will be called again before we return to - * user mode. - * - * Called without locks. - */ -static inline void tracehook_notify_resume(struct pt_regs *regs) -{ - clear_thread_flag(TIF_NOTIFY_RESUME); - /* - * This barrier pairs with task_work_add()->set_notify_resume() after - * hlist_add_head(task->task_works); - */ - smp_mb__after_atomic(); - if (unlikely(task_work_pending(current))) - task_work_run(); - -#ifdef CONFIG_KEYS_REQUEST_CACHE - if (unlikely(current->cached_requested_key)) { - key_put(current->cached_requested_key); - current->cached_requested_key = NULL; - } -#endif - - mem_cgroup_handle_over_high(); - blkcg_maybe_throttle_current(); - - rseq_handle_notify_resume(NULL, regs); -} #endif /* */ -- cgit v1.2.3 From 355f841a3f8ca980c9682937a5257d3a1f6fc09d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 Feb 2022 12:47:08 -0600 Subject: tracehook: Remove tracehook.h Now that all of the definitions have moved out of tracehook.h into ptrace.h, sched/signal.h, resume_user_mode.h there is nothing left in tracehook.h so remove it. Update the few files that were depending upon tracehook.h to bring in definitions to use the headers they need directly. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-13-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/tracehook.h | 56 ----------------------------------------------- 1 file changed, 56 deletions(-) delete mode 100644 include/linux/tracehook.h (limited to 'include') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h deleted file mode 100644 index 9f6b3fd1880a..000000000000 --- a/include/linux/tracehook.h +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Tracing hooks - * - * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. - * - * This file defines hook entry points called by core code where - * user tracing/debugging support might need to do something. These - * entry points are called tracehook_*(). Each hook declared below - * has a detailed kerneldoc comment giving the context (locking et - * al) from which it is called, and the meaning of its return value. - * - * Each function here typically has only one call site, so it is ok - * to have some nontrivial tracehook_*() inlines. In all cases, the - * fast path when no tracing is enabled should be very short. - * - * The purpose of this file and the tracehook_* layer is to consolidate - * the interface that the kernel core and arch code uses to enable any - * user debugging or tracing facility (such as ptrace). The interfaces - * here are carefully documented so that maintainers of core and arch - * code do not need to think about the implementation details of the - * tracing facilities. Likewise, maintainers of the tracing code do not - * need to understand all the calling core or arch code in detail, just - * documented circumstances of each call, such as locking conditions. - * - * If the calling core code changes so that locking is different, then - * it is ok to change the interface documented here. The maintainer of - * core code changing should notify the maintainers of the tracing code - * that they need to work out the change. - * - * Some tracehook_*() inlines take arguments that the current tracing - * implementations might not necessarily use. These function signatures - * are chosen to pass in all the information that is on hand in the - * caller and might conceivably be relevant to a tracer, so that the - * core code won't have to be updated when tracing adds more features. - * If a call site changes so that some of those parameters are no longer - * already on hand without extra work, then the tracehook_* interface - * can change so there is no make-work burden on the core code. The - * maintainer of core code changing should notify the maintainers of the - * tracing code that they need to work out the change. - */ - -#ifndef _LINUX_TRACEHOOK_H -#define _LINUX_TRACEHOOK_H 1 - -#include -#include -#include -#include -#include -#include -struct linux_binprm; - - - -#endif /* */ -- cgit v1.2.3 From 6853fece628c5a968e4c264d26a4f7750de42199 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 9 Mar 2022 14:23:02 -0800 Subject: dt-bindings: clk: cleanup comments For spdx, first line /* */ for *.h, change tab to space Replacements devider to divider Comunications to Communications periphrals to peripherals supportted to supported wich to which Documentatoin to Documentation Signed-off-by: Tom Rix Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220309222302.1114561-1-trix@redhat.com --- include/dt-bindings/clock/alphascale,asm9260.h | 2 +- include/dt-bindings/clock/axis,artpec6-clkctrl.h | 2 +- include/dt-bindings/clock/boston-clock.h | 3 +-- include/dt-bindings/clock/marvell,mmp2.h | 4 ++-- include/dt-bindings/clock/marvell,pxa168.h | 4 ++-- include/dt-bindings/clock/marvell,pxa910.h | 4 ++-- include/dt-bindings/clock/nuvoton,npcm7xx-clock.h | 2 +- include/dt-bindings/clock/stm32fx-clock.h | 4 ++-- include/dt-bindings/clock/stratix10-clock.h | 2 +- 9 files changed, 13 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/alphascale,asm9260.h b/include/dt-bindings/clock/alphascale,asm9260.h index d3871c63308b..f53f8b16883d 100644 --- a/include/dt-bindings/clock/alphascale,asm9260.h +++ b/include/dt-bindings/clock/alphascale,asm9260.h @@ -55,7 +55,7 @@ #define CLKID_AHB_I2S1 45 #define CLKID_AHB_MAC1 46 -/* devider */ +/* divider */ #define CLKID_SYS_CPU 47 #define CLKID_SYS_AHB 48 #define CLKID_SYS_I2S0M 49 diff --git a/include/dt-bindings/clock/axis,artpec6-clkctrl.h b/include/dt-bindings/clock/axis,artpec6-clkctrl.h index b1f4971642e6..14e424a7c08c 100644 --- a/include/dt-bindings/clock/axis,artpec6-clkctrl.h +++ b/include/dt-bindings/clock/axis,artpec6-clkctrl.h @@ -2,7 +2,7 @@ /* * ARTPEC-6 clock controller indexes * - * Copyright 2016 Axis Comunications AB. + * Copyright 2016 Axis Communications AB. */ #ifndef DT_BINDINGS_CLK_ARTPEC6_CLKCTRL_H diff --git a/include/dt-bindings/clock/boston-clock.h b/include/dt-bindings/clock/boston-clock.h index a6f009821137..38140fa87b09 100644 --- a/include/dt-bindings/clock/boston-clock.h +++ b/include/dt-bindings/clock/boston-clock.h @@ -1,7 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2016 Imagination Technologies - * - * SPDX-License-Identifier: GPL-2.0 */ #ifndef __DT_BINDINGS_CLOCK_BOSTON_CLOCK_H__ diff --git a/include/dt-bindings/clock/marvell,mmp2.h b/include/dt-bindings/clock/marvell,mmp2.h index 87f5ad5df72f..f0819d66b230 100644 --- a/include/dt-bindings/clock/marvell,mmp2.h +++ b/include/dt-bindings/clock/marvell,mmp2.h @@ -32,7 +32,7 @@ #define MMP2_CLK_I2S0 31 #define MMP2_CLK_I2S1 32 -/* apb periphrals */ +/* apb peripherals */ #define MMP2_CLK_TWSI0 60 #define MMP2_CLK_TWSI1 61 #define MMP2_CLK_TWSI2 62 @@ -60,7 +60,7 @@ #define MMP3_CLK_THERMAL2 84 #define MMP3_CLK_THERMAL3 85 -/* axi periphrals */ +/* axi peripherals */ #define MMP2_CLK_SDH0 101 #define MMP2_CLK_SDH1 102 #define MMP2_CLK_SDH2 103 diff --git a/include/dt-bindings/clock/marvell,pxa168.h b/include/dt-bindings/clock/marvell,pxa168.h index caf90436b848..db2b41f1b127 100644 --- a/include/dt-bindings/clock/marvell,pxa168.h +++ b/include/dt-bindings/clock/marvell,pxa168.h @@ -23,7 +23,7 @@ #define PXA168_CLK_UART_PLL 27 #define PXA168_CLK_USB_PLL 28 -/* apb periphrals */ +/* apb peripherals */ #define PXA168_CLK_TWSI0 60 #define PXA168_CLK_TWSI1 61 #define PXA168_CLK_TWSI2 62 @@ -45,7 +45,7 @@ #define PXA168_CLK_SSP4 78 #define PXA168_CLK_TIMER 79 -/* axi periphrals */ +/* axi peripherals */ #define PXA168_CLK_DFC 100 #define PXA168_CLK_SDH0 101 #define PXA168_CLK_SDH1 102 diff --git a/include/dt-bindings/clock/marvell,pxa910.h b/include/dt-bindings/clock/marvell,pxa910.h index 7bf46238946e..c9018ab354d0 100644 --- a/include/dt-bindings/clock/marvell,pxa910.h +++ b/include/dt-bindings/clock/marvell,pxa910.h @@ -23,7 +23,7 @@ #define PXA910_CLK_UART_PLL 27 #define PXA910_CLK_USB_PLL 28 -/* apb periphrals */ +/* apb peripherals */ #define PXA910_CLK_TWSI0 60 #define PXA910_CLK_TWSI1 61 #define PXA910_CLK_TWSI2 62 @@ -43,7 +43,7 @@ #define PXA910_CLK_TIMER0 76 #define PXA910_CLK_TIMER1 77 -/* axi periphrals */ +/* axi peripherals */ #define PXA910_CLK_DFC 100 #define PXA910_CLK_SDH0 101 #define PXA910_CLK_SDH1 102 diff --git a/include/dt-bindings/clock/nuvoton,npcm7xx-clock.h b/include/dt-bindings/clock/nuvoton,npcm7xx-clock.h index f21522605b94..3e0a9b68933d 100644 --- a/include/dt-bindings/clock/nuvoton,npcm7xx-clock.h +++ b/include/dt-bindings/clock/nuvoton,npcm7xx-clock.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Nuvoton NPCM7xx Clock Generator binding - * clock binding number for all clocks supportted by nuvoton,npcm7xx-clk + * clock binding number for all clocks supported by nuvoton,npcm7xx-clk * * Copyright (C) 2018 Nuvoton Technologies tali.perry@nuvoton.com * diff --git a/include/dt-bindings/clock/stm32fx-clock.h b/include/dt-bindings/clock/stm32fx-clock.h index 1cc89c548578..e5dad050d518 100644 --- a/include/dt-bindings/clock/stm32fx-clock.h +++ b/include/dt-bindings/clock/stm32fx-clock.h @@ -7,10 +7,10 @@ */ /* - * List of clocks wich are not derived from system clock (SYSCLOCK) + * List of clocks which are not derived from system clock (SYSCLOCK) * * The index of these clocks is the secondary index of DT bindings - * (see Documentatoin/devicetree/bindings/clock/st,stm32-rcc.txt) + * (see Documentation/devicetree/bindings/clock/st,stm32-rcc.txt) * * e.g: ; diff --git a/include/dt-bindings/clock/stratix10-clock.h b/include/dt-bindings/clock/stratix10-clock.h index 08b98e20b7cc..636498f9e08e 100644 --- a/include/dt-bindings/clock/stratix10-clock.h +++ b/include/dt-bindings/clock/stratix10-clock.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2017, Intel Corporation */ -- cgit v1.2.3 From 458dad7cac03add9333be9dac803d63ab9587847 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Wed, 26 Jan 2022 18:39:49 +0100 Subject: dt-bindings: clock: Add JH7100 audio clock definitions Add all clock outputs for the StarFive JH7100 audio clock generator. Signed-off-by: Emil Renner Berthing Link: https://lore.kernel.org/r/20220126173953.1016706-4-kernel@esmil.dk Acked-by: Rob Herring Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/starfive-jh7100-audio.h | 41 +++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/dt-bindings/clock/starfive-jh7100-audio.h (limited to 'include') diff --git a/include/dt-bindings/clock/starfive-jh7100-audio.h b/include/dt-bindings/clock/starfive-jh7100-audio.h new file mode 100644 index 000000000000..fbb4eae6572b --- /dev/null +++ b/include/dt-bindings/clock/starfive-jh7100-audio.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2021 Emil Renner Berthing + */ + +#ifndef __DT_BINDINGS_CLOCK_STARFIVE_JH7100_AUDIO_H__ +#define __DT_BINDINGS_CLOCK_STARFIVE_JH7100_AUDIO_H__ + +#define JH7100_AUDCLK_ADC_MCLK 0 +#define JH7100_AUDCLK_I2S1_MCLK 1 +#define JH7100_AUDCLK_I2SADC_APB 2 +#define JH7100_AUDCLK_I2SADC_BCLK 3 +#define JH7100_AUDCLK_I2SADC_BCLK_N 4 +#define JH7100_AUDCLK_I2SADC_LRCLK 5 +#define JH7100_AUDCLK_PDM_APB 6 +#define JH7100_AUDCLK_PDM_MCLK 7 +#define JH7100_AUDCLK_I2SVAD_APB 8 +#define JH7100_AUDCLK_SPDIF 9 +#define JH7100_AUDCLK_SPDIF_APB 10 +#define JH7100_AUDCLK_PWMDAC_APB 11 +#define JH7100_AUDCLK_DAC_MCLK 12 +#define JH7100_AUDCLK_I2SDAC_APB 13 +#define JH7100_AUDCLK_I2SDAC_BCLK 14 +#define JH7100_AUDCLK_I2SDAC_BCLK_N 15 +#define JH7100_AUDCLK_I2SDAC_LRCLK 16 +#define JH7100_AUDCLK_I2S1_APB 17 +#define JH7100_AUDCLK_I2S1_BCLK 18 +#define JH7100_AUDCLK_I2S1_BCLK_N 19 +#define JH7100_AUDCLK_I2S1_LRCLK 20 +#define JH7100_AUDCLK_I2SDAC16K_APB 21 +#define JH7100_AUDCLK_APB0_BUS 22 +#define JH7100_AUDCLK_DMA1P_AHB 23 +#define JH7100_AUDCLK_USB_APB 24 +#define JH7100_AUDCLK_USB_LPM 25 +#define JH7100_AUDCLK_USB_STB 26 +#define JH7100_AUDCLK_APB_EN 27 +#define JH7100_AUDCLK_VAD_MEM 28 + +#define JH7100_AUDCLK_END 29 + +#endif /* __DT_BINDINGS_CLOCK_STARFIVE_JH7100_AUDIO_H__ */ -- cgit v1.2.3 From 174b16946e39ebd369097e0f773536c91a8c1a4c Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Wed, 2 Mar 2022 12:13:58 +0100 Subject: bpf-lsm: Introduce new helper bpf_ima_file_hash() ima_file_hash() has been modified to calculate the measurement of a file on demand, if it has not been already performed by IMA or the measurement is not fresh. For compatibility reasons, ima_inode_hash() remains unchanged. Keep the same approach in eBPF and introduce the new helper bpf_ima_file_hash() to take advantage of the modified behavior of ima_file_hash(). Signed-off-by: Roberto Sassu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220302111404.193900-4-roberto.sassu@huawei.com --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e9978a916c3e..99fab54ae9c0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5119,6 +5119,16 @@ union bpf_attr { * 0 on success. * **-EINVAL** for invalid input * **-EOPNOTSUPP** for unsupported protocol + * + * long bpf_ima_file_hash(struct file *file, void *dst, u32 size) + * Description + * Returns a calculated IMA hash of the *file*. + * If the hash is larger than *size*, then only *size* + * bytes will be copied to *dst* + * Return + * The **hash_algo** is returned on success, + * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if + * invalid arguments are passed. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5314,6 +5324,7 @@ union bpf_attr { FN(xdp_store_bytes), \ FN(copy_from_user_task), \ FN(skb_set_tstamp), \ + FN(ima_file_hash), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 6789ab9668d920d7be636cb994d85be9a816f9d5 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Thu, 10 Mar 2022 13:16:55 -0800 Subject: compiler_types: Refactor the use of btf_type_tag attribute. Previous patches have introduced the compiler attribute btf_type_tag for __user and __percpu. The availability of this attribute depends on some CONFIGs and compiler support. This patch refactors the use of btf_type_tag by introducing BTF_TYPE_TAG, which hides all the dependencies. No functional change. Suggested-by: Andrii Nakryiko Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220310211655.3173786-1-haoluo@google.com --- include/linux/compiler_types.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index b9a8ae9440c7..1bc760ba400c 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -4,6 +4,13 @@ #ifndef __ASSEMBLY__ +#if defined(CONFIG_DEBUG_INFO_BTF) && defined(CONFIG_PAHOLE_HAS_BTF_TAG) && \ + __has_attribute(btf_type_tag) +# define BTF_TYPE_TAG(value) __attribute__((btf_type_tag(#value))) +#else +# define BTF_TYPE_TAG(value) /* nothing */ +#endif + #ifdef __CHECKER__ /* address spaces */ # define __kernel __attribute__((address_space(0))) @@ -31,19 +38,11 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __kernel # ifdef STRUCTLEAK_PLUGIN # define __user __attribute__((user)) -# elif defined(CONFIG_DEBUG_INFO_BTF) && defined(CONFIG_PAHOLE_HAS_BTF_TAG) && \ - __has_attribute(btf_type_tag) -# define __user __attribute__((btf_type_tag("user"))) # else -# define __user +# define __user BTF_TYPE_TAG(user) # endif # define __iomem -# if defined(CONFIG_DEBUG_INFO_BTF) && defined(CONFIG_PAHOLE_HAS_BTF_TAG) && \ - __has_attribute(btf_type_tag) -# define __percpu __attribute__((btf_type_tag("percpu"))) -# else -# define __percpu -# endif +# define __percpu BTF_TYPE_TAG(percpu) # define __rcu # define __chk_user_ptr(x) (void)0 # define __chk_io_ptr(x) (void)0 -- cgit v1.2.3 From 1926407a4ab0e59d5a27bed7b82029b356d80fa0 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 9 Mar 2022 23:20:33 +0100 Subject: net: openvswitch: fix uAPI incompatibility with existing user space Few years ago OVS user space made a strange choice in the commit [1] to define types only valid for the user space inside the copy of a kernel uAPI header. '#ifndef __KERNEL__' and another attribute was added later. This leads to the inevitable clash between user space and kernel types when the kernel uAPI is extended. The issue was unveiled with the addition of a new type for IPv6 extension header in kernel uAPI. When kernel provides the OVS_KEY_ATTR_IPV6_EXTHDRS attribute to the older user space application, application tries to parse it as OVS_KEY_ATTR_PACKET_TYPE and discards the whole netlink message as malformed. Since OVS_KEY_ATTR_IPV6_EXTHDRS is supplied along with every IPv6 packet that goes to the user space, IPv6 support is fully broken. Fixing that by bringing these user space attributes to the kernel uAPI to avoid the clash. Strictly speaking this is not the problem of the kernel uAPI, but changing it is the only way to avoid breakage of the older user space applications at this point. These 2 types are explicitly rejected now since they should not be passed to the kernel. Additionally, OVS_KEY_ATTR_TUNNEL_INFO moved out from the '#ifdef __KERNEL__' as there is no good reason to hide it from the userspace. And it's also explicitly rejected now, because it's for in-kernel use only. Comments with warnings were added to avoid the problem coming back. (1 << type) converted to (1ULL << type) to avoid integer overflow on OVS_KEY_ATTR_IPV6_EXTHDRS, since it equals 32 now. [1] beb75a40fdc2 ("userspace: Switching of L3 packets in L2 pipeline") Fixes: 28a3f0601727 ("net: openvswitch: IPv6: Add IPv6 extension header support") Link: https://lore.kernel.org/netdev/3adf00c7-fe65-3ef4-b6d7-6d8a0cad8a5f@nvidia.com Link: https://github.com/openvswitch/ovs/commit/beb75a40fdc295bfd6521b0068b4cd12f6de507c Reported-by: Roi Dayan Signed-off-by: Ilya Maximets Acked-by: Nicolas Dichtel Acked-by: Aaron Conole Link: https://lore.kernel.org/r/20220309222033.3018976-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 9d1710f20505..ce3e1738d427 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -351,11 +351,21 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */ OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */ OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */ - OVS_KEY_ATTR_IPV6_EXTHDRS, /* struct ovs_key_ipv6_exthdr */ -#ifdef __KERNEL__ - OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ -#endif + /* User space decided to squat on types 29 and 30. They are defined + * below, but should not be sent to the kernel. + * + * WARNING: No new types should be added unless they are defined + * for both kernel and user space (no 'ifdef's). It's hard + * to keep compatibility otherwise. + */ + OVS_KEY_ATTR_PACKET_TYPE, /* be32 packet type */ + OVS_KEY_ATTR_ND_EXTENSIONS, /* IPv6 Neighbor Discovery extensions */ + + OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info. + * For in-kernel use only. + */ + OVS_KEY_ATTR_IPV6_EXTHDRS, /* struct ovs_key_ipv6_exthdr */ __OVS_KEY_ATTR_MAX }; -- cgit v1.2.3 From 271907ee2f29cd1078fd219f0778fd824fb1971c Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 17 Jan 2022 15:14:44 +0200 Subject: net/mlx5: Query the maximum MCIA register read size from firmware The MCIA register supports either 12 or 32 dwords, use the correct value by querying the capability from the MCAM register. Signed-off-by: Gal Pressman Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 4 +++- include/linux/mlx5/port.h | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 318fae4b3560..745107ff681d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9691,7 +9691,9 @@ struct mlx5_ifc_pcam_reg_bits { }; struct mlx5_ifc_mcam_enhanced_features_bits { - u8 reserved_at_0[0x6a]; + u8 reserved_at_0[0x5d]; + u8 mcia_32dwords[0x1]; + u8 reserved_at_5e[0xc]; u8 reset_state[0x1]; u8 ptpcyc2realtime_modify[0x1]; u8 reserved_at_6c[0x2]; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 77ea4f9c5265..402413b3e914 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -56,7 +56,6 @@ enum mlx5_an_status { MLX5_AN_LINK_DOWN = 4, }; -#define MLX5_EEPROM_MAX_BYTES 32 #define MLX5_EEPROM_IDENTIFIER_BYTE_MASK 0x000000ff #define MLX5_I2C_ADDR_LOW 0x50 #define MLX5_I2C_ADDR_HIGH 0x51 -- cgit v1.2.3 From fcb610a86c53dfcfbb2aa62e704481112752f367 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 17 Jan 2022 15:53:06 +0200 Subject: net/mlx5: Parse module mapping using mlx5_ifc The assumption that the first byte in the module mapping dword is the module number shouldn't be hard-coded in the driver, but come from mlx5_ifc structs. While at it, fix the incorrect width for the 'rx_lane' and 'tx_lane' fields. Signed-off-by: Gal Pressman Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 8 ++++---- include/linux/mlx5/port.h | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 745107ff681d..91b7f730ed91 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9888,10 +9888,10 @@ struct mlx5_ifc_pcmr_reg_bits { }; struct mlx5_ifc_lane_2_module_mapping_bits { - u8 reserved_at_0[0x6]; - u8 rx_lane[0x2]; - u8 reserved_at_8[0x6]; - u8 tx_lane[0x2]; + u8 reserved_at_0[0x4]; + u8 rx_lane[0x4]; + u8 reserved_at_8[0x4]; + u8 tx_lane[0x4]; u8 reserved_at_10[0x8]; u8 module[0x8]; }; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 402413b3e914..28a928b0684b 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -56,7 +56,6 @@ enum mlx5_an_status { MLX5_AN_LINK_DOWN = 4, }; -#define MLX5_EEPROM_IDENTIFIER_BYTE_MASK 0x000000ff #define MLX5_I2C_ADDR_LOW 0x50 #define MLX5_I2C_ADDR_HIGH 0x51 #define MLX5_EEPROM_PAGE_LENGTH 256 -- cgit v1.2.3 From 286f950545e0d9c8aa802cbfc9676860bbc49179 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 16 Feb 2022 15:21:58 +0530 Subject: coresight: Drop unused 'none' enum value for each component CORESIGHT_DEV_TYPE_NONE/CORESIGHT_DEV_SUBTYPE_XXXX_NONE values are not used any where. Actual enumeration can start from 0. Just drop these unused enum values. Cc: Mathieu Poirier Cc: Suzuki K Poulose Cc: Mike Leach Cc: Leo Yan Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1645005118-10561-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Mathieu Poirier Signed-off-by: Suzuki K Poulose --- include/linux/coresight.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 93a2922b7653..9f445f09fcfe 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -36,7 +36,6 @@ extern struct bus_type coresight_bustype; enum coresight_dev_type { - CORESIGHT_DEV_TYPE_NONE, CORESIGHT_DEV_TYPE_SINK, CORESIGHT_DEV_TYPE_LINK, CORESIGHT_DEV_TYPE_LINKSINK, @@ -46,7 +45,6 @@ enum coresight_dev_type { }; enum coresight_dev_subtype_sink { - CORESIGHT_DEV_SUBTYPE_SINK_NONE, CORESIGHT_DEV_SUBTYPE_SINK_PORT, CORESIGHT_DEV_SUBTYPE_SINK_BUFFER, CORESIGHT_DEV_SUBTYPE_SINK_SYSMEM, @@ -54,21 +52,18 @@ enum coresight_dev_subtype_sink { }; enum coresight_dev_subtype_link { - CORESIGHT_DEV_SUBTYPE_LINK_NONE, CORESIGHT_DEV_SUBTYPE_LINK_MERG, CORESIGHT_DEV_SUBTYPE_LINK_SPLIT, CORESIGHT_DEV_SUBTYPE_LINK_FIFO, }; enum coresight_dev_subtype_source { - CORESIGHT_DEV_SUBTYPE_SOURCE_NONE, CORESIGHT_DEV_SUBTYPE_SOURCE_PROC, CORESIGHT_DEV_SUBTYPE_SOURCE_BUS, CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE, }; enum coresight_dev_subtype_helper { - CORESIGHT_DEV_SUBTYPE_HELPER_NONE, CORESIGHT_DEV_SUBTYPE_HELPER_CATU, }; -- cgit v1.2.3 From 2916b7a9c7c25ecf9be2f37e567a277e861f8e3f Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Tue, 22 Feb 2022 20:36:39 +0530 Subject: nl80211: fix typo of NL80211_IF_TYPE_OCB in documentation It should be NL80211_IFTYPE_OCB instead. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1645542399-4680-1-git-send-email-quic_vjakkam@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 98ed52663d6b..0568a79097b8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3259,7 +3259,7 @@ enum nl80211_attrs { * and therefore can't be created in the normal ways, use the * %NL80211_CMD_START_P2P_DEVICE and %NL80211_CMD_STOP_P2P_DEVICE * commands to create and destroy one - * @NL80211_IF_TYPE_OCB: Outside Context of a BSS + * @NL80211_IFTYPE_OCB: Outside Context of a BSS * This mode corresponds to the MIB variable dot11OCBActivated=true * @NL80211_IFTYPE_NAN: NAN device interface type (not a netdev) * @NL80211_IFTYPE_MAX: highest interface type number currently defined -- cgit v1.2.3 From 3af722cb735d212554027ec81e2aa2e6bf1ee34d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Mar 2022 17:12:10 +0100 Subject: powerpc/net: Implement powerpc specific csum_shift() to remove branch Today's implementation of csum_shift() leads to branching based on parity of 'offset' 000002f8 : 2f8: 70 a5 00 01 andi. r5,r5,1 2fc: 41 a2 00 08 beq 304 300: 54 84 c0 3e rotlwi r4,r4,24 304: 7c 63 20 14 addc r3,r3,r4 308: 7c 63 01 94 addze r3,r3 30c: 4e 80 00 20 blr Use first bit of 'offset' directly as input of the rotation instead of branching. 000002f8 : 2f8: 54 a5 1f 38 rlwinm r5,r5,3,28,28 2fc: 20 a5 00 20 subfic r5,r5,32 300: 5c 84 28 3e rotlw r4,r4,r5 304: 7c 63 20 14 addc r3,r3,r4 308: 7c 63 01 94 addze r3,r3 30c: 4e 80 00 20 blr And change to left shift instead of right shift to skip one more instruction. This has no impact on the final sum. 000002f8 : 2f8: 54 a5 1f 38 rlwinm r5,r5,3,28,28 2fc: 5c 84 28 3e rotlw r4,r4,r5 300: 7c 63 20 14 addc r3,r3,r4 304: 7c 63 01 94 addze r3,r3 308: 4e 80 00 20 blr Seems like only powerpc benefits from a branchless implementation. Other main architectures like ARM or X86 get better code with the generic implementation and its branch. Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller --- include/net/checksum.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/checksum.h b/include/net/checksum.h index 79c67f14c448..6bc783b7a06c 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -80,6 +80,7 @@ static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend) return csum16_add(csum, ~addend); } +#ifndef HAVE_ARCH_CSUM_SHIFT static __always_inline __wsum csum_shift(__wsum sum, int offset) { /* rotate sum to align it with a 16b boundary */ @@ -87,6 +88,7 @@ static __always_inline __wsum csum_shift(__wsum sum, int offset) return (__force __wsum)ror32((__force u32)sum, 8); return sum; } +#endif static __always_inline __wsum csum_block_add(__wsum csum, __wsum csum2, int offset) -- cgit v1.2.3 From 7d352ccf1e9935b5222ca84e8baeb07a0c8f94b9 Mon Sep 17 00:00:00 2001 From: Youghandhar Chintala Date: Tue, 8 Mar 2022 17:23:24 +0530 Subject: mac80211: Add support to trigger sta disconnect on hardware restart Currently in case of target hardware restart, we just reconfig and re-enable the security keys and enable the network queues to start data traffic back from where it was interrupted. Many ath10k wifi chipsets have sequence numbers for the data packets assigned by firmware and the mac sequence number will restart from zero after target hardware restart leading to mismatch in the sequence number expected by the remote peer vs the sequence number of the frame sent by the target firmware. This mismatch in sequence number will cause out-of-order packets on the remote peer and all the frames sent by the device are dropped until we reach the sequence number which was sent before we restarted the target hardware In order to fix this, we trigger a sta disconnect, in case of target hw restart. After this there will be a fresh connection and thereby avoiding the dropping of frames by remote peer. The right fix would be to pull the entire data path into the host which is not feasible or would need lots of complex changes and will still be inefficient. Tested on ath10k using WCN3990, QCA6174 Signed-off-by: Youghandhar Chintala Link: https://lore.kernel.org/r/20220308115325.5246-2-youghand@codeaurora.org Signed-off-by: Johannes Berg --- include/net/mac80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f118b8fe667a..b8e8c82b53aa 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6070,6 +6070,16 @@ void ieee80211_disconnect(struct ieee80211_vif *vif, bool reconnect); */ void ieee80211_resume_disconnect(struct ieee80211_vif *vif); +/** + * ieee80211_hw_restart_disconnect - disconnect from AP after + * hardware restart + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * + * Instructs mac80211 to disconnect from the AP after + * hardware restart. + */ +void ieee80211_hw_restart_disconnect(struct ieee80211_vif *vif); + /** * ieee80211_cqm_rssi_notify - inform a configured connection quality monitoring * rssi threshold triggered -- cgit v1.2.3 From c19bd02e9029f0f75b58f4b8662527da74be8985 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 11 Mar 2022 16:35:28 +0100 Subject: ALSA: hda: Add helper macros for DSP capable devices HDAudio drivers make heavy use of I/O operations. Declare a range of update, read and write helpers similar to those available for HDAudio legacy driver. These macros are used by AVS driver to improve code readability. Signed-off-by: Cezary Rojewski Acked-by: Takashi Iwai Link: https://lore.kernel.org/r/20220311153544.136854-2-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- include/sound/hdaudio.h | 3 +++ include/sound/hdaudio_ext.h | 50 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) (limited to 'include') diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h index 6a90ce405e60..15f15075238d 100644 --- a/include/sound/hdaudio.h +++ b/include/sound/hdaudio.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -448,6 +449,8 @@ static inline u16 snd_hdac_reg_readw(struct hdac_bus *bus, void __iomem *addr) #define snd_hdac_reg_writel(bus, addr, val) writel(val, addr) #define snd_hdac_reg_readl(bus, addr) readl(addr) +#define snd_hdac_reg_writeq(bus, addr, val) writeq(val, addr) +#define snd_hdac_reg_readq(bus, addr) readq(addr) /* * macros for easy use diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index 77123c3e4095..5a538799ff77 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -2,6 +2,8 @@ #ifndef __SOUND_HDAUDIO_EXT_H #define __SOUND_HDAUDIO_EXT_H +#include +#include #include int snd_hdac_ext_bus_init(struct hdac_bus *bus, struct device *dev, @@ -143,6 +145,54 @@ void snd_hdac_ext_bus_link_power(struct hdac_device *codec, bool enable); writew(((readw(addr + reg) & ~(mask)) | (val)), \ addr + reg) +#define snd_hdac_adsp_writeb(chip, reg, value) \ + snd_hdac_reg_writeb(chip, (chip)->dsp_ba + (reg), value) +#define snd_hdac_adsp_readb(chip, reg) \ + snd_hdac_reg_readb(chip, (chip)->dsp_ba + (reg)) +#define snd_hdac_adsp_writew(chip, reg, value) \ + snd_hdac_reg_writew(chip, (chip)->dsp_ba + (reg), value) +#define snd_hdac_adsp_readw(chip, reg) \ + snd_hdac_reg_readw(chip, (chip)->dsp_ba + (reg)) +#define snd_hdac_adsp_writel(chip, reg, value) \ + snd_hdac_reg_writel(chip, (chip)->dsp_ba + (reg), value) +#define snd_hdac_adsp_readl(chip, reg) \ + snd_hdac_reg_readl(chip, (chip)->dsp_ba + (reg)) +#define snd_hdac_adsp_writeq(chip, reg, value) \ + snd_hdac_reg_writeq(chip, (chip)->dsp_ba + (reg), value) +#define snd_hdac_adsp_readq(chip, reg) \ + snd_hdac_reg_readq(chip, (chip)->dsp_ba + (reg)) + +#define snd_hdac_adsp_updateb(chip, reg, mask, val) \ + snd_hdac_adsp_writeb(chip, reg, \ + (snd_hdac_adsp_readb(chip, reg) & ~(mask)) | (val)) +#define snd_hdac_adsp_updatew(chip, reg, mask, val) \ + snd_hdac_adsp_writew(chip, reg, \ + (snd_hdac_adsp_readw(chip, reg) & ~(mask)) | (val)) +#define snd_hdac_adsp_updatel(chip, reg, mask, val) \ + snd_hdac_adsp_writel(chip, reg, \ + (snd_hdac_adsp_readl(chip, reg) & ~(mask)) | (val)) +#define snd_hdac_adsp_updateq(chip, reg, mask, val) \ + snd_hdac_adsp_writeq(chip, reg, \ + (snd_hdac_adsp_readq(chip, reg) & ~(mask)) | (val)) + +#define snd_hdac_adsp_readb_poll(chip, reg, val, cond, delay_us, timeout_us) \ + readb_poll_timeout((chip)->dsp_ba + (reg), val, cond, \ + delay_us, timeout_us) +#define snd_hdac_adsp_readw_poll(chip, reg, val, cond, delay_us, timeout_us) \ + readw_poll_timeout((chip)->dsp_ba + (reg), val, cond, \ + delay_us, timeout_us) +#define snd_hdac_adsp_readl_poll(chip, reg, val, cond, delay_us, timeout_us) \ + readl_poll_timeout((chip)->dsp_ba + (reg), val, cond, \ + delay_us, timeout_us) +#define snd_hdac_adsp_readq_poll(chip, reg, val, cond, delay_us, timeout_us) \ + readq_poll_timeout((chip)->dsp_ba + (reg), val, cond, \ + delay_us, timeout_us) +#define snd_hdac_stream_readb_poll(strm, reg, val, cond, delay_us, timeout_us) \ + readb_poll_timeout((strm)->sd_addr + AZX_REG_ ## reg, val, cond, \ + delay_us, timeout_us) +#define snd_hdac_stream_readl_poll(strm, reg, val, cond, delay_us, timeout_us) \ + readl_poll_timeout((strm)->sd_addr + AZX_REG_ ## reg, val, cond, \ + delay_us, timeout_us) struct hdac_ext_device; -- cgit v1.2.3 From da0398099a83483014adc509a2845c88ccf672af Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 11 Mar 2022 16:35:29 +0100 Subject: ASoC: Export DAI register and widget ctor and dctor functions To allow for more flexibility i.e. populating component DAIs dynamically during its initialization, without being limited to topology loading procedure, expose snd_soc_register(), snd_soc_dapm_new_dai_widgets() and snd_soc_dapm_free_widget() functions. Allows users to first check available resources e.g. number of PCMs supported by HDAudio codec before allocating the number of DAPM widgets needed. This prevents superfluous objects from being created or allows driver to adjust to situation when resources are limited. Signed-off-by: Cezary Rojewski Link: https://lore.kernel.org/r/20220311153544.136854-3-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index c3039e97929a..ebb8e7a7fc29 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -429,6 +429,7 @@ struct snd_soc_dapm_widget *snd_soc_dapm_new_control_unlocked( const struct snd_soc_dapm_widget *widget); int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm, struct snd_soc_dai *dai); +void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w); int snd_soc_dapm_link_dai_widgets(struct snd_soc_card *card); void snd_soc_dapm_connect_dai_link_widgets(struct snd_soc_card *card); -- cgit v1.2.3 From b20dc3c684580ddc07eb48ee3c3dc7597cd5eebf Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 4 Mar 2022 17:40:42 +0100 Subject: gtp: Allow to create GTP device without FDs Currently, when the user wants to create GTP device, he has to provide file handles to the sockets created in userspace (IFLA_GTP_FD0, IFLA_GTP_FD1). This behaviour is not ideal, considering the option of adding support for GTP device creation through ip link. Ip link application is not a good place to create such sockets. This patch allows to create GTP device without providing IFLA_GTP_FD0 and IFLA_GTP_FD1 arguments. If the user sets IFLA_GTP_CREATE_SOCKETS attribute, then GTP module takes care of creating UDP sockets by itself. Sockets are created with the commonly known UDP ports used for GTP protocol (GTP0_PORT and GTP1U_PORT). In this case we don't have to provide encap_destroy because no extra deinitialization is needed, everything is covered by udp_tunnel_sock_release. Note: GTP instance created with only this change applied, does not handle GTP Echo Requests. This is implemented in the following patch. Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ddca20357e7e..ebd2aa3ef809 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -887,6 +887,7 @@ enum { IFLA_GTP_FD1, IFLA_GTP_PDP_HASHSIZE, IFLA_GTP_ROLE, + IFLA_GTP_CREATE_SOCKETS, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) -- cgit v1.2.3 From 9af41cc33471ea1efa6f77e188f055cc77d0a5c5 Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 4 Mar 2022 17:40:43 +0100 Subject: gtp: Implement GTP echo response Adding GTP device through ip link creates the situation where there is no userspace daemon which would handle GTP messages (Echo Request for example). GTP-U instance which would not respond to echo requests would violate GTP specification. When GTP packet arrives with GTP_ECHO_REQ message type, GTP_ECHO_RSP is send to the sender. GTP_ECHO_RSP message should contain information element with GTPIE_RECOVERY tag and restart counter value. For GTPv1 restart counter is not used and should be equal to 0, for GTPv0 restart counter contains information provided from userspace(IFLA_GTP_RESTART_COUNT). Signed-off-by: Wojciech Drewek Suggested-by: Harald Welte Reviewed-by: Harald Welte Tested-by: Harald Welte Signed-off-by: Tony Nguyen --- include/net/gtp.h | 31 +++++++++++++++++++++++++++++++ include/uapi/linux/if_link.h | 1 + 2 files changed, 32 insertions(+) (limited to 'include') diff --git a/include/net/gtp.h b/include/net/gtp.h index 0e16ebb2a82d..0e12c37f2958 100644 --- a/include/net/gtp.h +++ b/include/net/gtp.h @@ -7,8 +7,13 @@ #define GTP0_PORT 3386 #define GTP1U_PORT 2152 +/* GTP messages types */ +#define GTP_ECHO_REQ 1 /* Echo Request */ +#define GTP_ECHO_RSP 2 /* Echo Response */ #define GTP_TPDU 255 +#define GTPIE_RECOVERY 14 + struct gtp0_header { /* According to GSM TS 09.60. */ __u8 flags; __u8 type; @@ -27,6 +32,32 @@ struct gtp1_header { /* According to 3GPP TS 29.060. */ __be32 tid; } __attribute__ ((packed)); +struct gtp1_header_long { /* According to 3GPP TS 29.060. */ + __u8 flags; + __u8 type; + __be16 length; + __be32 tid; + __be16 seq; + __u8 npdu; + __u8 next; +} __packed; + +/* GTP Information Element */ +struct gtp_ie { + __u8 tag; + __u8 val; +} __packed; + +struct gtp0_packet { + struct gtp0_header gtp0_h; + struct gtp_ie ie; +} __packed; + +struct gtp1u_packet { + struct gtp1_header_long gtp1u_h; + struct gtp_ie ie; +} __packed; + #define GTP1_F_NPDU 0x01 #define GTP1_F_SEQ 0x02 #define GTP1_F_EXTHDR 0x04 diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ebd2aa3ef809..bd24c7dc10a2 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -888,6 +888,7 @@ enum { IFLA_GTP_PDP_HASHSIZE, IFLA_GTP_ROLE, IFLA_GTP_CREATE_SOCKETS, + IFLA_GTP_RESTART_COUNT, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) -- cgit v1.2.3 From d33bd757d362699cfce3c68b53cd12b947d196f4 Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 4 Mar 2022 17:40:44 +0100 Subject: gtp: Implement GTP echo request Adding GTP device through ip link creates the situation where GTP instance is not able to send GTP echo requests. Echo requests are used to check if GTP peer is still alive. With this patch, gtp_genl_ops are extended by new cmd (GTP_CMD_ECHOREQ) which allows to send echo request in the given version of GTP protocol (v0 or v1), from the given ms address to he given peer. TID is not inclued because in all path management messages it should be equal to 0. When GTP echo response is detected, multicast message is send to everyone in the gtp_genl_family. Message contains GTP version, ms address and peer address. Suggested-by: Harald Welte Signed-off-by: Wojciech Drewek Reviewed-by: Harald Welte Signed-off-by: Tony Nguyen --- include/uapi/linux/gtp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h index 79f9191bbb24..2f61298a7b77 100644 --- a/include/uapi/linux/gtp.h +++ b/include/uapi/linux/gtp.h @@ -8,6 +8,7 @@ enum gtp_genl_cmds { GTP_CMD_NEWPDP, GTP_CMD_DELPDP, GTP_CMD_GETPDP, + GTP_CMD_ECHOREQ, GTP_CMD_MAX, }; -- cgit v1.2.3 From e3acda7ade0a36c5cbebc2b54d30b7f08a4ba29b Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 4 Mar 2022 17:40:45 +0100 Subject: net/sched: Allow flower to match on GTP options Options are as follows: PDU_TYPE:QFI and they refernce to the fields from the PDU Session Protocol. PDU Session data is conveyed in GTP-U Extension Header. GTP-U Extension Header is described in 3GPP TS 29.281. PDU Session Protocol is described in 3GPP TS 38.415. PDU_TYPE - indicates the type of the PDU Session Information (4 bits) QFI - QoS Flow Identifier (6 bits) # ip link add gtp_dev type gtp role sgsn # tc qdisc add dev gtp_dev ingress # tc filter add dev gtp_dev protocol ip parent ffff: \ flower \ enc_key_id 11 \ gtp_opts 1:8/ff:ff \ action mirred egress redirect dev eth0 Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen --- include/net/gtp.h | 5 +++++ include/uapi/linux/if_tunnel.h | 4 +++- include/uapi/linux/pkt_cls.h | 15 +++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/gtp.h b/include/net/gtp.h index 0e12c37f2958..23c2aaae8a42 100644 --- a/include/net/gtp.h +++ b/include/net/gtp.h @@ -58,6 +58,11 @@ struct gtp1u_packet { struct gtp_ie ie; } __packed; +struct gtp_pdu_session_info { /* According to 3GPP TS 38.415. */ + u8 pdu_type; + u8 qfi; +}; + #define GTP1_F_NPDU 0x01 #define GTP1_F_SEQ 0x02 #define GTP1_F_EXTHDR 0x04 diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 7d9105533c7b..102119628ff5 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -176,8 +176,10 @@ enum { #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) #define TUNNEL_NOCACHE __cpu_to_be16(0x2000) #define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000) +#define TUNNEL_GTP_OPT __cpu_to_be16(0x8000) #define TUNNEL_OPTIONS_PRESENT \ - (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT) + (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT | \ + TUNNEL_GTP_OPT) #endif /* _UAPI_IF_TUNNEL_H_ */ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index ee38b35c3f57..404f97fb239c 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -616,6 +616,10 @@ enum { * TCA_FLOWER_KEY_ENC_OPT_ERSPAN_ * attributes */ + TCA_FLOWER_KEY_ENC_OPTS_GTP, /* Nested + * TCA_FLOWER_KEY_ENC_OPT_GTP_ + * attributes + */ __TCA_FLOWER_KEY_ENC_OPTS_MAX, }; @@ -654,6 +658,17 @@ enum { #define TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX \ (__TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX - 1) +enum { + TCA_FLOWER_KEY_ENC_OPT_GTP_UNSPEC, + TCA_FLOWER_KEY_ENC_OPT_GTP_PDU_TYPE, /* u8 */ + TCA_FLOWER_KEY_ENC_OPT_GTP_QFI, /* u8 */ + + __TCA_FLOWER_KEY_ENC_OPT_GTP_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPT_GTP_MAX \ + (__TCA_FLOWER_KEY_ENC_OPT_GTP_MAX - 1) + enum { TCA_FLOWER_KEY_MPLS_OPTS_UNSPEC, TCA_FLOWER_KEY_MPLS_OPTS_LSE, -- cgit v1.2.3 From 81dd9849fa4911f76a14f354a048865894b9751e Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 4 Mar 2022 17:40:46 +0100 Subject: gtp: Add support for checking GTP device type Add a function that checks if a net device type is GTP. Signed-off-by: Wojciech Drewek Reviewed-by: Harald Welte Signed-off-by: Tony Nguyen --- include/net/gtp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/gtp.h b/include/net/gtp.h index 23c2aaae8a42..c1d6169df331 100644 --- a/include/net/gtp.h +++ b/include/net/gtp.h @@ -63,6 +63,12 @@ struct gtp_pdu_session_info { /* According to 3GPP TS 38.415. */ u8 qfi; }; +static inline bool netif_is_gtp(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "gtp"); +} + #define GTP1_F_NPDU 0x01 #define GTP1_F_SEQ 0x02 #define GTP1_F_EXTHDR 0x04 -- cgit v1.2.3 From af6b9668e85ffd1502aada8036ccbf4dbd481708 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 3 Mar 2022 17:05:33 -0500 Subject: tracing: Move the defines to create TRACE_EVENTS into their own files In an effort to add custom event macros that can be used to create your own custom events based on existing tracepoints, move the defines of the special macros used in TRACE_EVENT() into their own files such that they can be reused for TRACE_CUSTOM_EVENT(). Link: https://lkml.kernel.org/r/20220303220625.553406495@goodmis.org Cc: Ingo Molnar Cc: Andrew Morton Cc: Joel Fernandes Cc: Peter Zijlstra Cc: Masami Hiramatsu Cc: Tom Zanussi Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/init.h | 37 +++ include/trace/stages/stage1_defines.h | 45 +++ include/trace/stages/stage2_defines.h | 48 ++++ include/trace/stages/stage3_defines.h | 129 +++++++++ include/trace/stages/stage4_defines.h | 57 ++++ include/trace/stages/stage5_defines.h | 83 ++++++ include/trace/stages/stage6_defines.h | 86 ++++++ include/trace/stages/stage7_defines.h | 34 +++ include/trace/trace_events.h | 499 +--------------------------------- 9 files changed, 527 insertions(+), 491 deletions(-) create mode 100644 include/trace/stages/init.h create mode 100644 include/trace/stages/stage1_defines.h create mode 100644 include/trace/stages/stage2_defines.h create mode 100644 include/trace/stages/stage3_defines.h create mode 100644 include/trace/stages/stage4_defines.h create mode 100644 include/trace/stages/stage5_defines.h create mode 100644 include/trace/stages/stage6_defines.h create mode 100644 include/trace/stages/stage7_defines.h (limited to 'include') diff --git a/include/trace/stages/init.h b/include/trace/stages/init.h new file mode 100644 index 000000000000..000bcfc8dd2e --- /dev/null +++ b/include/trace/stages/init.h @@ -0,0 +1,37 @@ + +#define __app__(x, y) str__##x##y +#define __app(x, y) __app__(x, y) + +#define TRACE_SYSTEM_STRING __app(TRACE_SYSTEM_VAR,__trace_system_name) + +#define TRACE_MAKE_SYSTEM_STR() \ + static const char TRACE_SYSTEM_STRING[] = \ + __stringify(TRACE_SYSTEM) + +TRACE_MAKE_SYSTEM_STR(); + +#undef TRACE_DEFINE_ENUM +#define TRACE_DEFINE_ENUM(a) \ + static struct trace_eval_map __used __initdata \ + __##TRACE_SYSTEM##_##a = \ + { \ + .system = TRACE_SYSTEM_STRING, \ + .eval_string = #a, \ + .eval_value = a \ + }; \ + static struct trace_eval_map __used \ + __section("_ftrace_eval_map") \ + *TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a + +#undef TRACE_DEFINE_SIZEOF +#define TRACE_DEFINE_SIZEOF(a) \ + static struct trace_eval_map __used __initdata \ + __##TRACE_SYSTEM##_##a = \ + { \ + .system = TRACE_SYSTEM_STRING, \ + .eval_string = "sizeof(" #a ")", \ + .eval_value = sizeof(a) \ + }; \ + static struct trace_eval_map __used \ + __section("_ftrace_eval_map") \ + *TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a diff --git a/include/trace/stages/stage1_defines.h b/include/trace/stages/stage1_defines.h new file mode 100644 index 000000000000..8ab88c766d2b --- /dev/null +++ b/include/trace/stages/stage1_defines.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Stage 1 definitions for creating trace events */ + +#undef __field +#define __field(type, item) type item; + +#undef __field_ext +#define __field_ext(type, item, filter_type) type item; + +#undef __field_struct +#define __field_struct(type, item) type item; + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) type item; + +#undef __array +#define __array(type, item, len) type item[len]; + +#undef __dynamic_array +#define __dynamic_array(type, item, len) u32 __data_loc_##item; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, -1) + +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) + +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1) + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args diff --git a/include/trace/stages/stage2_defines.h b/include/trace/stages/stage2_defines.h new file mode 100644 index 000000000000..9f2341df40da --- /dev/null +++ b/include/trace/stages/stage2_defines.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Stage 2 definitions for creating trace events */ + +#undef TRACE_DEFINE_ENUM +#define TRACE_DEFINE_ENUM(a) + +#undef TRACE_DEFINE_SIZEOF +#define TRACE_DEFINE_SIZEOF(a) + +#undef __field +#define __field(type, item) + +#undef __field_ext +#define __field_ext(type, item, filter_type) + +#undef __field_struct +#define __field_struct(type, item) + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) + +#undef __array +#define __array(type, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item, len) u32 item; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) + +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, -1) + +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) u32 item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) diff --git a/include/trace/stages/stage3_defines.h b/include/trace/stages/stage3_defines.h new file mode 100644 index 000000000000..0bc131993b7a --- /dev/null +++ b/include/trace/stages/stage3_defines.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Stage 3 definitions for creating trace events */ + +#undef __entry +#define __entry field + +#undef TP_printk +#define TP_printk(fmt, args...) fmt "\n", args + +#undef __get_dynamic_array +#define __get_dynamic_array(field) \ + ((void *)__entry + (__entry->__data_loc_##field & 0xffff)) + +#undef __get_dynamic_array_len +#define __get_dynamic_array_len(field) \ + ((__entry->__data_loc_##field >> 16) & 0xffff) + +#undef __get_str +#define __get_str(field) ((char *)__get_dynamic_array(field)) + +#undef __get_rel_dynamic_array +#define __get_rel_dynamic_array(field) \ + ((void *)__entry + \ + offsetof(typeof(*__entry), __rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ + (__entry->__rel_loc_##field & 0xffff)) + +#undef __get_rel_dynamic_array_len +#define __get_rel_dynamic_array_len(field) \ + ((__entry->__rel_loc_##field >> 16) & 0xffff) + +#undef __get_rel_str +#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) + +#undef __get_bitmask +#define __get_bitmask(field) \ + ({ \ + void *__bitmask = __get_dynamic_array(field); \ + unsigned int __bitmask_size; \ + __bitmask_size = __get_dynamic_array_len(field); \ + trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ + }) + +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) \ + ({ \ + void *__bitmask = __get_rel_dynamic_array(field); \ + unsigned int __bitmask_size; \ + __bitmask_size = __get_rel_dynamic_array_len(field); \ + trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ + }) + +#undef __print_flags +#define __print_flags(flag, delim, flag_array...) \ + ({ \ + static const struct trace_print_flags __flags[] = \ + { flag_array, { -1, NULL }}; \ + trace_print_flags_seq(p, delim, flag, __flags); \ + }) + +#undef __print_symbolic +#define __print_symbolic(value, symbol_array...) \ + ({ \ + static const struct trace_print_flags symbols[] = \ + { symbol_array, { -1, NULL }}; \ + trace_print_symbols_seq(p, value, symbols); \ + }) + +#undef __print_flags_u64 +#undef __print_symbolic_u64 +#if BITS_PER_LONG == 32 +#define __print_flags_u64(flag, delim, flag_array...) \ + ({ \ + static const struct trace_print_flags_u64 __flags[] = \ + { flag_array, { -1, NULL } }; \ + trace_print_flags_seq_u64(p, delim, flag, __flags); \ + }) + +#define __print_symbolic_u64(value, symbol_array...) \ + ({ \ + static const struct trace_print_flags_u64 symbols[] = \ + { symbol_array, { -1, NULL } }; \ + trace_print_symbols_seq_u64(p, value, symbols); \ + }) +#else +#define __print_flags_u64(flag, delim, flag_array...) \ + __print_flags(flag, delim, flag_array) + +#define __print_symbolic_u64(value, symbol_array...) \ + __print_symbolic(value, symbol_array) +#endif + +#undef __print_hex +#define __print_hex(buf, buf_len) \ + trace_print_hex_seq(p, buf, buf_len, false) + +#undef __print_hex_str +#define __print_hex_str(buf, buf_len) \ + trace_print_hex_seq(p, buf, buf_len, true) + +#undef __print_array +#define __print_array(array, count, el_size) \ + ({ \ + BUILD_BUG_ON(el_size != 1 && el_size != 2 && \ + el_size != 4 && el_size != 8); \ + trace_print_array_seq(p, array, count, el_size); \ + }) + +#undef __print_hex_dump +#define __print_hex_dump(prefix_str, prefix_type, \ + rowsize, groupsize, buf, len, ascii) \ + trace_print_hex_dump_seq(p, prefix_str, prefix_type, \ + rowsize, groupsize, buf, len, ascii) + +#undef __print_ns_to_secs +#define __print_ns_to_secs(value) \ + ({ \ + u64 ____val = (u64)(value); \ + do_div(____val, NSEC_PER_SEC); \ + ____val; \ + }) + +#undef __print_ns_without_secs +#define __print_ns_without_secs(value) \ + ({ \ + u64 ____val = (u64)(value); \ + (u32) do_div(____val, NSEC_PER_SEC); \ + }) diff --git a/include/trace/stages/stage4_defines.h b/include/trace/stages/stage4_defines.h new file mode 100644 index 000000000000..780a10fa5279 --- /dev/null +++ b/include/trace/stages/stage4_defines.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Stage 4 definitions for creating trace events */ + +#undef __field_ext +#define __field_ext(_type, _item, _filter_type) { \ + .type = #_type, .name = #_item, \ + .size = sizeof(_type), .align = __alignof__(_type), \ + .is_signed = is_signed_type(_type), .filter_type = _filter_type }, + +#undef __field_struct_ext +#define __field_struct_ext(_type, _item, _filter_type) { \ + .type = #_type, .name = #_item, \ + .size = sizeof(_type), .align = __alignof__(_type), \ + 0, .filter_type = _filter_type }, + +#undef __field +#define __field(type, item) __field_ext(type, item, FILTER_OTHER) + +#undef __field_struct +#define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER) + +#undef __array +#define __array(_type, _item, _len) { \ + .type = #_type"["__stringify(_len)"]", .name = #_item, \ + .size = sizeof(_type[_len]), .align = __alignof__(_type), \ + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, + +#undef __dynamic_array +#define __dynamic_array(_type, _item, _len) { \ + .type = "__data_loc " #_type "[]", .name = #_item, \ + .size = 4, .align = 4, \ + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, -1) + +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) + +#undef __rel_dynamic_array +#define __rel_dynamic_array(_type, _item, _len) { \ + .type = "__rel_loc " #_type "[]", .name = #_item, \ + .size = 4, .align = 4, \ + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) diff --git a/include/trace/stages/stage5_defines.h b/include/trace/stages/stage5_defines.h new file mode 100644 index 000000000000..fb15394aae31 --- /dev/null +++ b/include/trace/stages/stage5_defines.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Stage 5 definitions for creating trace events */ + +/* + * remember the offset of each array from the beginning of the event. + */ + +#undef __entry +#define __entry entry + +#undef __field +#define __field(type, item) + +#undef __field_ext +#define __field_ext(type, item, filter_type) + +#undef __field_struct +#define __field_struct(type, item) + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) + +#undef __array +#define __array(type, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + __item_length = (len) * sizeof(type); \ + __data_offsets->item = __data_size + \ + offsetof(typeof(*entry), __data); \ + __data_offsets->item |= __item_length << 16; \ + __data_size += __item_length; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, \ + strlen((src) ? (const char *)(src) : "(null)") + 1) + +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) + +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) \ + __item_length = (len) * sizeof(type); \ + __data_offsets->item = __data_size + \ + offsetof(typeof(*entry), __data) - \ + offsetof(typeof(*entry), __rel_loc_##item) - \ + sizeof(u32); \ + __data_offsets->item |= __item_length << 16; \ + __data_size += __item_length; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, \ + strlen((src) ? (const char *)(src) : "(null)") + 1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1) +/* + * __bitmask_size_in_bytes_raw is the number of bytes needed to hold + * num_possible_cpus(). + */ +#define __bitmask_size_in_bytes_raw(nr_bits) \ + (((nr_bits) + 7) / 8) + +#define __bitmask_size_in_longs(nr_bits) \ + ((__bitmask_size_in_bytes_raw(nr_bits) + \ + ((BITS_PER_LONG / 8) - 1)) / (BITS_PER_LONG / 8)) + +/* + * __bitmask_size_in_bytes is the number of bytes needed to hold + * num_possible_cpus() padded out to the nearest long. This is what + * is saved in the buffer, just to be consistent. + */ +#define __bitmask_size_in_bytes(nr_bits) \ + (__bitmask_size_in_longs(nr_bits) * (BITS_PER_LONG / 8)) + +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \ + __bitmask_size_in_longs(nr_bits)) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \ + __bitmask_size_in_longs(nr_bits)) diff --git a/include/trace/stages/stage6_defines.h b/include/trace/stages/stage6_defines.h new file mode 100644 index 000000000000..b3a1f26026be --- /dev/null +++ b/include/trace/stages/stage6_defines.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Stage 6 definitions for creating trace events */ + +#undef __entry +#define __entry entry + +#undef __field +#define __field(type, item) + +#undef __field_struct +#define __field_struct(type, item) + +#undef __array +#define __array(type, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + __entry->__data_loc_##item = __data_offsets.item; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, -1) + +#undef __assign_str +#define __assign_str(dst, src) \ + strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)"); + +#undef __assign_str_len +#define __assign_str_len(dst, src, len) \ + do { \ + memcpy(__get_str(dst), (src), (len)); \ + __get_str(dst)[len] = '\0'; \ + } while(0) + +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) + +#undef __get_bitmask +#define __get_bitmask(field) (char *)__get_dynamic_array(field) + +#undef __assign_bitmask +#define __assign_bitmask(dst, src, nr_bits) \ + memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) + +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) \ + __entry->__rel_loc_##item = __data_offsets.item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __assign_rel_str +#define __assign_rel_str(dst, src) \ + strcpy(__get_rel_str(dst), (src) ? (const char *)(src) : "(null)"); + +#undef __assign_rel_str_len +#define __assign_rel_str_len(dst, src, len) \ + do { \ + memcpy(__get_rel_str(dst), (src), (len)); \ + __get_rel_str(dst)[len] = '\0'; \ + } while (0) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) + +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) + +#undef __assign_rel_bitmask +#define __assign_rel_bitmask(dst, src, nr_bits) \ + memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) + +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef __perf_count +#define __perf_count(c) (c) + +#undef __perf_task +#define __perf_task(t) (t) diff --git a/include/trace/stages/stage7_defines.h b/include/trace/stages/stage7_defines.h new file mode 100644 index 000000000000..d65445328f18 --- /dev/null +++ b/include/trace/stages/stage7_defines.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Stage 7 definitions for creating trace events */ + +#undef __entry +#define __entry REC + +#undef __print_flags +#undef __print_symbolic +#undef __print_hex +#undef __print_hex_str +#undef __get_dynamic_array +#undef __get_dynamic_array_len +#undef __get_str +#undef __get_bitmask +#undef __get_rel_dynamic_array +#undef __get_rel_dynamic_array_len +#undef __get_rel_str +#undef __get_rel_bitmask +#undef __print_array +#undef __print_hex_dump + +/* + * The below is not executed in the kernel. It is only what is + * displayed in the print format for userspace to parse. + */ +#undef __print_ns_to_secs +#define __print_ns_to_secs(val) (val) / 1000000000UL + +#undef __print_ns_without_secs +#define __print_ns_without_secs(val) (val) % 1000000000UL + +#undef TP_printk +#define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args) diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 3d29919045af..8a8cd66cc6d5 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -24,42 +24,7 @@ #define TRACE_SYSTEM_VAR TRACE_SYSTEM #endif -#define __app__(x, y) str__##x##y -#define __app(x, y) __app__(x, y) - -#define TRACE_SYSTEM_STRING __app(TRACE_SYSTEM_VAR,__trace_system_name) - -#define TRACE_MAKE_SYSTEM_STR() \ - static const char TRACE_SYSTEM_STRING[] = \ - __stringify(TRACE_SYSTEM) - -TRACE_MAKE_SYSTEM_STR(); - -#undef TRACE_DEFINE_ENUM -#define TRACE_DEFINE_ENUM(a) \ - static struct trace_eval_map __used __initdata \ - __##TRACE_SYSTEM##_##a = \ - { \ - .system = TRACE_SYSTEM_STRING, \ - .eval_string = #a, \ - .eval_value = a \ - }; \ - static struct trace_eval_map __used \ - __section("_ftrace_eval_map") \ - *TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a - -#undef TRACE_DEFINE_SIZEOF -#define TRACE_DEFINE_SIZEOF(a) \ - static struct trace_eval_map __used __initdata \ - __##TRACE_SYSTEM##_##a = \ - { \ - .system = TRACE_SYSTEM_STRING, \ - .eval_string = "sizeof(" #a ")", \ - .eval_value = sizeof(a) \ - }; \ - static struct trace_eval_map __used \ - __section("_ftrace_eval_map") \ - *TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a +#include "stages/init.h" /* * DECLARE_EVENT_CLASS can be used to add a generic function @@ -80,48 +45,7 @@ TRACE_MAKE_SYSTEM_STR(); PARAMS(print)); \ DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); - -#undef __field -#define __field(type, item) type item; - -#undef __field_ext -#define __field_ext(type, item, filter_type) type item; - -#undef __field_struct -#define __field_struct(type, item) type item; - -#undef __field_struct_ext -#define __field_struct_ext(type, item, filter_type) type item; - -#undef __array -#define __array(type, item, len) type item[len]; - -#undef __dynamic_array -#define __dynamic_array(type, item, len) u32 __data_loc_##item; - -#undef __string -#define __string(item, src) __dynamic_array(char, item, -1) - -#undef __string_len -#define __string_len(item, src, len) __dynamic_array(char, item, -1) - -#undef __bitmask -#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) - -#undef __rel_dynamic_array -#define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item; - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) - -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1) - -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args +#include "stages/stage1_defines.h" #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ @@ -185,50 +109,7 @@ TRACE_MAKE_SYSTEM_STR(); * The size of an array is also encoded, in the higher 16 bits of . */ -#undef TRACE_DEFINE_ENUM -#define TRACE_DEFINE_ENUM(a) - -#undef TRACE_DEFINE_SIZEOF -#define TRACE_DEFINE_SIZEOF(a) - -#undef __field -#define __field(type, item) - -#undef __field_ext -#define __field_ext(type, item, filter_type) - -#undef __field_struct -#define __field_struct(type, item) - -#undef __field_struct_ext -#define __field_struct_ext(type, item, filter_type) - -#undef __array -#define __array(type, item, len) - -#undef __dynamic_array -#define __dynamic_array(type, item, len) u32 item; - -#undef __string -#define __string(item, src) __dynamic_array(char, item, -1) - -#undef __bitmask -#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) - -#undef __string_len -#define __string_len(item, src, len) __dynamic_array(char, item, -1) - -#undef __rel_dynamic_array -#define __rel_dynamic_array(type, item, len) u32 item; - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) - -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) +#include "stages/stage2_defines.h" #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -300,131 +181,7 @@ TRACE_MAKE_SYSTEM_STR(); * in binary. */ -#undef __entry -#define __entry field - -#undef TP_printk -#define TP_printk(fmt, args...) fmt "\n", args - -#undef __get_dynamic_array -#define __get_dynamic_array(field) \ - ((void *)__entry + (__entry->__data_loc_##field & 0xffff)) - -#undef __get_dynamic_array_len -#define __get_dynamic_array_len(field) \ - ((__entry->__data_loc_##field >> 16) & 0xffff) - -#undef __get_str -#define __get_str(field) ((char *)__get_dynamic_array(field)) - -#undef __get_rel_dynamic_array -#define __get_rel_dynamic_array(field) \ - ((void *)__entry + \ - offsetof(typeof(*__entry), __rel_loc_##field) + \ - sizeof(__entry->__rel_loc_##field) + \ - (__entry->__rel_loc_##field & 0xffff)) - -#undef __get_rel_dynamic_array_len -#define __get_rel_dynamic_array_len(field) \ - ((__entry->__rel_loc_##field >> 16) & 0xffff) - -#undef __get_rel_str -#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) - -#undef __get_bitmask -#define __get_bitmask(field) \ - ({ \ - void *__bitmask = __get_dynamic_array(field); \ - unsigned int __bitmask_size; \ - __bitmask_size = __get_dynamic_array_len(field); \ - trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ - }) - -#undef __get_rel_bitmask -#define __get_rel_bitmask(field) \ - ({ \ - void *__bitmask = __get_rel_dynamic_array(field); \ - unsigned int __bitmask_size; \ - __bitmask_size = __get_rel_dynamic_array_len(field); \ - trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ - }) - -#undef __print_flags -#define __print_flags(flag, delim, flag_array...) \ - ({ \ - static const struct trace_print_flags __flags[] = \ - { flag_array, { -1, NULL }}; \ - trace_print_flags_seq(p, delim, flag, __flags); \ - }) - -#undef __print_symbolic -#define __print_symbolic(value, symbol_array...) \ - ({ \ - static const struct trace_print_flags symbols[] = \ - { symbol_array, { -1, NULL }}; \ - trace_print_symbols_seq(p, value, symbols); \ - }) - -#undef __print_flags_u64 -#undef __print_symbolic_u64 -#if BITS_PER_LONG == 32 -#define __print_flags_u64(flag, delim, flag_array...) \ - ({ \ - static const struct trace_print_flags_u64 __flags[] = \ - { flag_array, { -1, NULL } }; \ - trace_print_flags_seq_u64(p, delim, flag, __flags); \ - }) - -#define __print_symbolic_u64(value, symbol_array...) \ - ({ \ - static const struct trace_print_flags_u64 symbols[] = \ - { symbol_array, { -1, NULL } }; \ - trace_print_symbols_seq_u64(p, value, symbols); \ - }) -#else -#define __print_flags_u64(flag, delim, flag_array...) \ - __print_flags(flag, delim, flag_array) - -#define __print_symbolic_u64(value, symbol_array...) \ - __print_symbolic(value, symbol_array) -#endif - -#undef __print_hex -#define __print_hex(buf, buf_len) \ - trace_print_hex_seq(p, buf, buf_len, false) - -#undef __print_hex_str -#define __print_hex_str(buf, buf_len) \ - trace_print_hex_seq(p, buf, buf_len, true) - -#undef __print_array -#define __print_array(array, count, el_size) \ - ({ \ - BUILD_BUG_ON(el_size != 1 && el_size != 2 && \ - el_size != 4 && el_size != 8); \ - trace_print_array_seq(p, array, count, el_size); \ - }) - -#undef __print_hex_dump -#define __print_hex_dump(prefix_str, prefix_type, \ - rowsize, groupsize, buf, len, ascii) \ - trace_print_hex_dump_seq(p, prefix_str, prefix_type, \ - rowsize, groupsize, buf, len, ascii) - -#undef __print_ns_to_secs -#define __print_ns_to_secs(value) \ - ({ \ - u64 ____val = (u64)(value); \ - do_div(____val, NSEC_PER_SEC); \ - ____val; \ - }) - -#undef __print_ns_without_secs -#define __print_ns_without_secs(value) \ - ({ \ - u64 ____val = (u64)(value); \ - (u32) do_div(____val, NSEC_PER_SEC); \ - }) +#include "stages/stage3_defines.h" #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -479,59 +236,7 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef __field_ext -#define __field_ext(_type, _item, _filter_type) { \ - .type = #_type, .name = #_item, \ - .size = sizeof(_type), .align = __alignof__(_type), \ - .is_signed = is_signed_type(_type), .filter_type = _filter_type }, - -#undef __field_struct_ext -#define __field_struct_ext(_type, _item, _filter_type) { \ - .type = #_type, .name = #_item, \ - .size = sizeof(_type), .align = __alignof__(_type), \ - 0, .filter_type = _filter_type }, - -#undef __field -#define __field(type, item) __field_ext(type, item, FILTER_OTHER) - -#undef __field_struct -#define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER) - -#undef __array -#define __array(_type, _item, _len) { \ - .type = #_type"["__stringify(_len)"]", .name = #_item, \ - .size = sizeof(_type[_len]), .align = __alignof__(_type), \ - .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, - -#undef __dynamic_array -#define __dynamic_array(_type, _item, _len) { \ - .type = "__data_loc " #_type "[]", .name = #_item, \ - .size = 4, .align = 4, \ - .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, - -#undef __string -#define __string(item, src) __dynamic_array(char, item, -1) - -#undef __string_len -#define __string_len(item, src, len) __dynamic_array(char, item, -1) - -#undef __bitmask -#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) - -#undef __rel_dynamic_array -#define __rel_dynamic_array(_type, _item, _len) { \ - .type = "__rel_loc " #_type "[]", .name = #_item, \ - .size = 4, .align = 4, \ - .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) - -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) +#include "stages/stage4_defines.h" #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ @@ -544,85 +249,7 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -/* - * remember the offset of each array from the beginning of the event. - */ - -#undef __entry -#define __entry entry - -#undef __field -#define __field(type, item) - -#undef __field_ext -#define __field_ext(type, item, filter_type) - -#undef __field_struct -#define __field_struct(type, item) - -#undef __field_struct_ext -#define __field_struct_ext(type, item, filter_type) - -#undef __array -#define __array(type, item, len) - -#undef __dynamic_array -#define __dynamic_array(type, item, len) \ - __item_length = (len) * sizeof(type); \ - __data_offsets->item = __data_size + \ - offsetof(typeof(*entry), __data); \ - __data_offsets->item |= __item_length << 16; \ - __data_size += __item_length; - -#undef __string -#define __string(item, src) __dynamic_array(char, item, \ - strlen((src) ? (const char *)(src) : "(null)") + 1) - -#undef __string_len -#define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) - -#undef __rel_dynamic_array -#define __rel_dynamic_array(type, item, len) \ - __item_length = (len) * sizeof(type); \ - __data_offsets->item = __data_size + \ - offsetof(typeof(*entry), __data) - \ - offsetof(typeof(*entry), __rel_loc_##item) - \ - sizeof(u32); \ - __data_offsets->item |= __item_length << 16; \ - __data_size += __item_length; - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, \ - strlen((src) ? (const char *)(src) : "(null)") + 1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1) -/* - * __bitmask_size_in_bytes_raw is the number of bytes needed to hold - * num_possible_cpus(). - */ -#define __bitmask_size_in_bytes_raw(nr_bits) \ - (((nr_bits) + 7) / 8) - -#define __bitmask_size_in_longs(nr_bits) \ - ((__bitmask_size_in_bytes_raw(nr_bits) + \ - ((BITS_PER_LONG / 8) - 1)) / (BITS_PER_LONG / 8)) - -/* - * __bitmask_size_in_bytes is the number of bytes needed to hold - * num_possible_cpus() padded out to the nearest long. This is what - * is saved in the buffer, just to be consistent. - */ -#define __bitmask_size_in_bytes(nr_bits) \ - (__bitmask_size_in_longs(nr_bits) * (BITS_PER_LONG / 8)) - -#undef __bitmask -#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \ - __bitmask_size_in_longs(nr_bits)) - -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \ - __bitmask_size_in_longs(nr_bits)) +#include "stages/stage5_defines.h" #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -745,88 +372,7 @@ static inline notrace int trace_event_get_offsets_##call( \ #define _TRACE_PERF_INIT(call) #endif /* CONFIG_PERF_EVENTS */ -#undef __entry -#define __entry entry - -#undef __field -#define __field(type, item) - -#undef __field_struct -#define __field_struct(type, item) - -#undef __array -#define __array(type, item, len) - -#undef __dynamic_array -#define __dynamic_array(type, item, len) \ - __entry->__data_loc_##item = __data_offsets.item; - -#undef __string -#define __string(item, src) __dynamic_array(char, item, -1) - -#undef __string_len -#define __string_len(item, src, len) __dynamic_array(char, item, -1) - -#undef __assign_str -#define __assign_str(dst, src) \ - strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)"); - -#undef __assign_str_len -#define __assign_str_len(dst, src, len) \ - do { \ - memcpy(__get_str(dst), (src), (len)); \ - __get_str(dst)[len] = '\0'; \ - } while(0) - -#undef __bitmask -#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) - -#undef __get_bitmask -#define __get_bitmask(field) (char *)__get_dynamic_array(field) - -#undef __assign_bitmask -#define __assign_bitmask(dst, src, nr_bits) \ - memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) - -#undef __rel_dynamic_array -#define __rel_dynamic_array(type, item, len) \ - __entry->__rel_loc_##item = __data_offsets.item; - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) - -#undef __assign_rel_str -#define __assign_rel_str(dst, src) \ - strcpy(__get_rel_str(dst), (src) ? (const char *)(src) : "(null)"); - -#undef __assign_rel_str_len -#define __assign_rel_str_len(dst, src, len) \ - do { \ - memcpy(__get_rel_str(dst), (src), (len)); \ - __get_rel_str(dst)[len] = '\0'; \ - } while (0) - -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) - -#undef __get_rel_bitmask -#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) - -#undef __assign_rel_bitmask -#define __assign_rel_bitmask(dst, src, nr_bits) \ - memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) - -#undef TP_fast_assign -#define TP_fast_assign(args...) args - -#undef __perf_count -#define __perf_count(c) (c) - -#undef __perf_task -#define __perf_task(t) (t) +#include "stages/stage6_defines.h" #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -872,36 +418,7 @@ static inline void ftrace_test_probe_##call(void) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef __entry -#define __entry REC - -#undef __print_flags -#undef __print_symbolic -#undef __print_hex -#undef __print_hex_str -#undef __get_dynamic_array -#undef __get_dynamic_array_len -#undef __get_str -#undef __get_bitmask -#undef __get_rel_dynamic_array -#undef __get_rel_dynamic_array_len -#undef __get_rel_str -#undef __get_rel_bitmask -#undef __print_array -#undef __print_hex_dump - -/* - * The below is not executed in the kernel. It is only what is - * displayed in the print format for userspace to parse. - */ -#undef __print_ns_to_secs -#define __print_ns_to_secs(val) (val) / 1000000000UL - -#undef __print_ns_without_secs -#define __print_ns_without_secs(val) (val) % 1000000000UL - -#undef TP_printk -#define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args) +#include "stages/stage7_defines.h" #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ -- cgit v1.2.3 From 3a73333fb370f7b65de9d94c53df503642bda789 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 3 Mar 2022 17:05:34 -0500 Subject: tracing: Add TRACE_CUSTOM_EVENT() macro To make it really easy to add custom events from modules, add a TRACE_CUSTOM_EVENT() macro that acts just like the TRACE_EVENT() macro, but creates a custom event to an already existing tracepoint. The trace_custom_sched.[ch] has been updated to use this new macro to show how simple it is. Link: https://lkml.kernel.org/r/20220303220625.738622494@goodmis.org Cc: Ingo Molnar Cc: Andrew Morton Cc: Joel Fernandes Cc: Peter Zijlstra Cc: Masami Hiramatsu Cc: Tom Zanussi Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 24 +++- include/trace/define_custom_trace.h | 77 +++++++++++++ include/trace/trace_custom_events.h | 221 ++++++++++++++++++++++++++++++++++++ 3 files changed, 321 insertions(+), 1 deletion(-) create mode 100644 include/trace/define_custom_trace.h create mode 100644 include/trace/trace_custom_events.h (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 70c069aef02c..9b09fd633d48 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -315,6 +315,7 @@ enum { TRACE_EVENT_FL_KPROBE_BIT, TRACE_EVENT_FL_UPROBE_BIT, TRACE_EVENT_FL_EPROBE_BIT, + TRACE_EVENT_FL_CUSTOM_BIT, }; /* @@ -328,6 +329,9 @@ enum { * KPROBE - Event is a kprobe * UPROBE - Event is a uprobe * EPROBE - Event is an event probe + * CUSTOM - Event is a custom event (to be attached to an exsiting tracepoint) + * This is set when the custom event has not been attached + * to a tracepoint yet, then it is cleared when it is. */ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), @@ -339,6 +343,7 @@ enum { TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT), TRACE_EVENT_FL_EPROBE = (1 << TRACE_EVENT_FL_EPROBE_BIT), + TRACE_EVENT_FL_CUSTOM = (1 << TRACE_EVENT_FL_CUSTOM_BIT), }; #define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE) @@ -440,7 +445,9 @@ static inline bool bpf_prog_array_valid(struct trace_event_call *call) static inline const char * trace_event_name(struct trace_event_call *call) { - if (call->flags & TRACE_EVENT_FL_TRACEPOINT) + if (call->flags & TRACE_EVENT_FL_CUSTOM) + return call->name; + else if (call->flags & TRACE_EVENT_FL_TRACEPOINT) return call->tp ? call->tp->name : NULL; else return call->name; @@ -901,3 +908,18 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, #endif #endif /* _LINUX_TRACE_EVENT_H */ + +/* + * Note: we keep the TRACE_CUSTOM_EVENT outside the include file ifdef protection. + * This is due to the way trace custom events work. If a file includes two + * trace event headers under one "CREATE_CUSTOM_TRACE_EVENTS" the first include + * will override the TRACE_CUSTOM_EVENT and break the second include. + */ + +#ifndef TRACE_CUSTOM_EVENT + +#define DECLARE_CUSTOM_EVENT_CLASS(name, proto, args, tstruct, assign, print) +#define DEFINE_CUSTOM_EVENT(template, name, proto, args) +#define TRACE_CUSTOM_EVENT(name, proto, args, struct, assign, print) + +#endif /* ifdef TRACE_CUSTOM_EVENT (see note above) */ diff --git a/include/trace/define_custom_trace.h b/include/trace/define_custom_trace.h new file mode 100644 index 000000000000..5827a4c92c74 --- /dev/null +++ b/include/trace/define_custom_trace.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Trace files that want to automate creation of all tracepoints defined + * in their file should include this file. The following are macros that the + * trace file may define: + * + * TRACE_SYSTEM defines the system the tracepoint is for + * + * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h + * This macro may be defined to tell define_trace.h what file to include. + * Note, leave off the ".h". + * + * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace + * then this macro can define the path to use. Note, the path is relative to + * define_trace.h, not the file including it. Full path names for out of tree + * modules must be used. + */ + +#ifdef CREATE_CUSTOM_TRACE_EVENTS + +/* Prevent recursion */ +#undef CREATE_CUSTOM_TRACE_EVENTS + +#include + +#undef TRACE_CUSTOM_EVENT +#define TRACE_CUSTOM_EVENT(name, proto, args, tstruct, assign, print) + +#undef DEFINE_CUSTOM_EVENT +#define DEFINE_CUSTOM_EVENT(template, name, proto, args) + +#undef TRACE_INCLUDE +#undef __TRACE_INCLUDE + +#ifndef TRACE_INCLUDE_FILE +# define TRACE_INCLUDE_FILE TRACE_SYSTEM +# define UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifndef TRACE_INCLUDE_PATH +# define __TRACE_INCLUDE(system) +# define UNDEF_TRACE_INCLUDE_PATH +#else +# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) +#endif + +# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system) + +/* Let the trace headers be reread */ +#define TRACE_CUSTOM_MULTI_READ + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#ifdef TRACEPOINTS_ENABLED +#include +#endif + +#undef TRACE_CUSTOM_EVENT +#undef DECLARE_CUSTOM_EVENT_CLASS +#undef DEFINE_CUSTOM_EVENT +#undef TRACE_CUSTOM_MULTI_READ + +/* Only undef what we defined in this file */ +#ifdef UNDEF_TRACE_INCLUDE_FILE +# undef TRACE_INCLUDE_FILE +# undef UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifdef UNDEF_TRACE_INCLUDE_PATH +# undef TRACE_INCLUDE_PATH +# undef UNDEF_TRACE_INCLUDE_PATH +#endif + +/* We may be processing more files */ +#define CREATE_CUSTOM_TRACE_POINTS + +#endif /* CREATE_CUSTOM_TRACE_POINTS */ diff --git a/include/trace/trace_custom_events.h b/include/trace/trace_custom_events.h new file mode 100644 index 000000000000..b567c7202339 --- /dev/null +++ b/include/trace/trace_custom_events.h @@ -0,0 +1,221 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This is similar to the trace_events.h file, but is to only + * create custom trace events to be attached to existing tracepoints. + * Where as the TRACE_EVENT() macro (from trace_events.h) will create + * both the trace event and the tracepoint it will attach the event to, + * TRACE_CUSTOM_EVENT() is to create only a custom version of an existing + * trace event (created by TRACE_EVENT() or DEFINE_EVENT()), and will + * be placed in the "custom" system. + */ + +#include + +/* All custom events are placed in the custom group */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM custom + +#ifndef TRACE_SYSTEM_VAR +#define TRACE_SYSTEM_VAR TRACE_SYSTEM +#endif + +/* The init stage creates the system string and enum mappings */ + +#include "stages/init.h" + +#undef TRACE_CUSTOM_EVENT +#define TRACE_CUSTOM_EVENT(name, proto, args, tstruct, assign, print) \ + DECLARE_CUSTOM_EVENT_CLASS(name, \ + PARAMS(proto), \ + PARAMS(args), \ + PARAMS(tstruct), \ + PARAMS(assign), \ + PARAMS(print)); \ + DEFINE_CUSTOM_EVENT(name, name, PARAMS(proto), PARAMS(args)); + +/* Stage 1 creates the structure of the recorded event layout */ + +#include "stages/stage1_defines.h" + +#undef DECLARE_CUSTOM_EVENT_CLASS +#define DECLARE_CUSTOM_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ + struct trace_custom_event_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + char __data[]; \ + }; \ + \ + static struct trace_event_class custom_event_class_##name; + +#undef DEFINE_CUSTOM_EVENT +#define DEFINE_CUSTOM_EVENT(template, name, proto, args) \ + static struct trace_event_call __used \ + __attribute__((__aligned__(4))) custom_event_##name + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* Stage 2 creates the custom class */ + +#include "stages/stage2_defines.h" + +#undef DECLARE_CUSTOM_EVENT_CLASS +#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ + struct trace_custom_event_data_offsets_##call { \ + tstruct; \ + }; + +#undef DEFINE_CUSTOM_EVENT +#define DEFINE_CUSTOM_EVENT(template, name, proto, args) + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* Stage 3 create the way to print the custom event */ + +#include "stages/stage3_defines.h" + +#undef DECLARE_CUSTOM_EVENT_CLASS +#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +static notrace enum print_line_t \ +trace_custom_raw_output_##call(struct trace_iterator *iter, int flags, \ + struct trace_event *trace_event) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct trace_seq __maybe_unused *p = &iter->tmp_seq; \ + struct trace_custom_event_raw_##call *field; \ + int ret; \ + \ + field = (typeof(field))iter->ent; \ + \ + ret = trace_raw_output_prep(iter, trace_event); \ + if (ret != TRACE_TYPE_HANDLED) \ + return ret; \ + \ + trace_event_printf(iter, print); \ + \ + return trace_handle_return(s); \ +} \ +static struct trace_event_functions trace_custom_event_type_funcs_##call = { \ + .trace = trace_custom_raw_output_##call, \ +}; + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* Stage 4 creates the offset layout for the fields */ + +#include "stages/stage4_defines.h" + +#undef DECLARE_CUSTOM_EVENT_CLASS +#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, func, print) \ +static struct trace_event_fields trace_custom_event_fields_##call[] = { \ + tstruct \ + {} }; + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* Stage 5 creates the helper function for dynamic fields */ + +#include "stages/stage5_defines.h" + +#undef DECLARE_CUSTOM_EVENT_CLASS +#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +static inline notrace int trace_custom_event_get_offsets_##call( \ + struct trace_custom_event_data_offsets_##call *__data_offsets, proto) \ +{ \ + int __data_size = 0; \ + int __maybe_unused __item_length; \ + struct trace_custom_event_raw_##call __maybe_unused *entry; \ + \ + tstruct; \ + \ + return __data_size; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* Stage 6 creates the probe function that records the event */ + +#include "stages/stage6_defines.h" + +#undef DECLARE_CUSTOM_EVENT_CLASS +#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ + \ +static notrace void \ +trace_custom_event_raw_event_##call(void *__data, proto) \ +{ \ + struct trace_event_file *trace_file = __data; \ + struct trace_custom_event_data_offsets_##call __maybe_unused __data_offsets; \ + struct trace_event_buffer fbuffer; \ + struct trace_custom_event_raw_##call *entry; \ + int __data_size; \ + \ + if (trace_trigger_soft_disabled(trace_file)) \ + return; \ + \ + __data_size = trace_custom_event_get_offsets_##call(&__data_offsets, args); \ + \ + entry = trace_event_buffer_reserve(&fbuffer, trace_file, \ + sizeof(*entry) + __data_size); \ + \ + if (!entry) \ + return; \ + \ + tstruct \ + \ + { assign; } \ + \ + trace_event_buffer_commit(&fbuffer); \ +} +/* + * The ftrace_test_custom_probe is compiled out, it is only here as a build time check + * to make sure that if the tracepoint handling changes, the ftrace probe will + * fail to compile unless it too is updated. + */ + +#undef DEFINE_CUSTOM_EVENT +#define DEFINE_CUSTOM_EVENT(template, call, proto, args) \ +static inline void ftrace_test_custom_probe_##call(void) \ +{ \ + check_trace_callback_type_##call(trace_custom_event_raw_event_##template); \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* Stage 7 creates the actual class and event structure for the custom event */ + +#include "stages/stage7_defines.h" + +#undef DECLARE_CUSTOM_EVENT_CLASS +#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +static char custom_print_fmt_##call[] = print; \ +static struct trace_event_class __used __refdata custom_event_class_##call = { \ + .system = TRACE_SYSTEM_STRING, \ + .fields_array = trace_custom_event_fields_##call, \ + .fields = LIST_HEAD_INIT(custom_event_class_##call.fields),\ + .raw_init = trace_event_raw_init, \ + .probe = trace_custom_event_raw_event_##call, \ + .reg = trace_event_reg, \ +}; + +#undef DEFINE_CUSTOM_EVENT +#define DEFINE_CUSTOM_EVENT(template, call, proto, args) \ + \ +static struct trace_event_call __used custom_event_##call = { \ + .name = #call, \ + .class = &custom_event_class_##template, \ + .event.funcs = &trace_custom_event_type_funcs_##template, \ + .print_fmt = custom_print_fmt_##template, \ + .flags = TRACE_EVENT_FL_CUSTOM, \ +}; \ +static inline int trace_custom_event_##call##_update(struct tracepoint *tp) \ +{ \ + if (tp->name && strcmp(tp->name, #call) == 0) { \ + custom_event_##call.tp = tp; \ + custom_event_##call.flags = TRACE_EVENT_FL_TRACEPOINT; \ + return 1; \ + } \ + return 0; \ +} \ +static struct trace_event_call __used \ +__section("_ftrace_events") *__custom_event_##call = &custom_event_##call + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -- cgit v1.2.3 From 380af29b8d7670c445965bd573ab219aff0c4c11 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 10 Mar 2022 21:37:09 -0500 Subject: tracing: Add snapshot at end of kernel boot up Add ftrace_boot_snapshot kernel parameter that will take a snapshot at the end of boot up just before switching over to user space (it happens during the kernel freeing of init memory). This is useful when there's interesting data that can be collected from kernel start up, but gets overridden by user space start up code. With this option, the ring buffer content from the boot up traces gets saved in the snapshot at the end of boot up. This trace can be read from: /sys/kernel/tracing/snapshot Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9999e29187de..37b619185ec9 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -30,6 +30,12 @@ #define ARCH_SUPPORTS_FTRACE_OPS 0 #endif +#ifdef CONFIG_TRACING +extern void ftrace_boot_snapshot(void); +#else +static inline void ftrace_boot_snapshot(void) { } +#endif + #ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops; struct ftrace_regs; @@ -215,7 +221,10 @@ struct ftrace_ops_hash { void ftrace_free_init_mem(void); void ftrace_free_mem(struct module *mod, void *start, void *end); #else -static inline void ftrace_free_init_mem(void) { } +static inline void ftrace_free_init_mem(void) +{ + ftrace_boot_snapshot(); +} static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } #endif -- cgit v1.2.3 From b65700d046a60d0a29f6289e13c62d0c93689f11 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Tue, 8 Mar 2022 18:25:15 +0100 Subject: remoteproc: move rproc_da_to_va declaration to remoteproc.h The rproc_da_to_va() API is an exported function, so move its declaration from the remoteproc local remoteproc_internal.h to the public remoteproc.h file. This will allow drivers outside of the remoteproc folder to be able to use this API. Signed-off-by: Suman Anna Signed-off-by: Dave Gerlach [adjusted line numbers to apply] Signed-off-by: Drew Fustini Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220308172515.29556-1-dfustini@baylibre.com --- include/linux/remoteproc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 93a1d0050fbc..b2ee325e0af1 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -675,6 +675,7 @@ void rproc_shutdown(struct rproc *rproc); int rproc_detach(struct rproc *rproc); int rproc_set_firmware(struct rproc *rproc, const char *fw_name); void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type); +void *rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem); void rproc_coredump_using_sections(struct rproc *rproc); int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size); int rproc_coredump_add_custom_segment(struct rproc *rproc, -- cgit v1.2.3 From c993ee0f9f81caf5767a50d1faeba39a0dc82af2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Mar 2022 13:23:31 +0000 Subject: watch_queue: Fix filter limit check In watch_queue_set_filter(), there are a couple of places where we check that the filter type value does not exceed what the type_filter bitmap can hold. One place calculates the number of bits by: if (tf[i].type >= sizeof(wfilter->type_filter) * 8) which is fine, but the second does: if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG) which is not. This can lead to a couple of out-of-bounds writes due to a too-large type: (1) __set_bit() on wfilter->type_filter (2) Writing more elements in wfilter->filters[] than we allocated. Fix this by just using the proper WATCH_TYPE__NR instead, which is the number of types we actually know about. The bug may cause an oops looking something like: BUG: KASAN: slab-out-of-bounds in watch_queue_set_filter+0x659/0x740 Write of size 4 at addr ffff88800d2c66bc by task watch_queue_oob/611 ... Call Trace: dump_stack_lvl+0x45/0x59 print_address_description.constprop.0+0x1f/0x150 ... kasan_report.cold+0x7f/0x11b ... watch_queue_set_filter+0x659/0x740 ... __x64_sys_ioctl+0x127/0x190 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Allocated by task 611: kasan_save_stack+0x1e/0x40 __kasan_kmalloc+0x81/0xa0 watch_queue_set_filter+0x23a/0x740 __x64_sys_ioctl+0x127/0x190 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff88800d2c66a0 which belongs to the cache kmalloc-32 of size 32 The buggy address is located 28 bytes inside of 32-byte region [ffff88800d2c66a0, ffff88800d2c66c0) Fixes: c73be61cede5 ("pipe: Add general notification queue support") Reported-by: Jann Horn Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/watch_queue.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h index c994d1b2cdba..3b9a40ae8bdb 100644 --- a/include/linux/watch_queue.h +++ b/include/linux/watch_queue.h @@ -28,7 +28,8 @@ struct watch_type_filter { struct watch_filter { union { struct rcu_head rcu; - unsigned long type_filter[2]; /* Bitmask of accepted types */ + /* Bitmask of accepted types */ + DECLARE_BITMAP(type_filter, WATCH_TYPE__NR); }; u32 nr_filters; /* Number of filters */ struct watch_type_filter filters[]; -- cgit v1.2.3 From 413a4a6b0b5553f2423d210f65e98c211b99c3f8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Mar 2022 16:02:18 +0000 Subject: cachefiles: Fix volume coherency attribute A network filesystem may set coherency data on a volume cookie, and if given, cachefiles will store this in an xattr on the directory in the cache corresponding to the volume. The function that sets the xattr just stores the contents of the volume coherency buffer directly into the xattr, with nothing added; the checking function, on the other hand, has a cut'n'paste error whereby it tries to interpret the xattr contents as would be the xattr on an ordinary file (using the cachefiles_xattr struct). This results in a failure to match the coherency data because the buffer ends up being shifted by 18 bytes. Fix this by defining a structure specifically for the volume xattr and making both the setting and checking functions use it. Since the volume coherency doesn't work if used, take the opportunity to insert a reserved field for future use, set it to 0 and check that it is 0. Log mismatch through the appropriate tracepoint. Note that this only affects cifs; 9p, afs, ceph and nfs don't use the volume coherency data at the moment. Fixes: 32e150037dce ("fscache, cachefiles: Store the volume coherency data") Reported-by: Rohith Surabattula Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: Steve French cc: linux-cifs@vger.kernel.org cc: linux-cachefs@redhat.com Signed-off-by: Linus Torvalds --- include/trace/events/cachefiles.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index c6f5aa74db89..2c530637e10a 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -56,6 +56,7 @@ enum cachefiles_coherency_trace { cachefiles_coherency_set_ok, cachefiles_coherency_vol_check_cmp, cachefiles_coherency_vol_check_ok, + cachefiles_coherency_vol_check_resv, cachefiles_coherency_vol_check_xattr, cachefiles_coherency_vol_set_fail, cachefiles_coherency_vol_set_ok, @@ -139,6 +140,7 @@ enum cachefiles_error_trace { EM(cachefiles_coherency_set_ok, "SET ok ") \ EM(cachefiles_coherency_vol_check_cmp, "VOL BAD cmp ") \ EM(cachefiles_coherency_vol_check_ok, "VOL OK ") \ + EM(cachefiles_coherency_vol_check_resv, "VOL BAD resv") \ EM(cachefiles_coherency_vol_check_xattr,"VOL BAD xatt") \ EM(cachefiles_coherency_vol_set_fail, "VOL SET fail") \ E_(cachefiles_coherency_vol_set_ok, "VOL SET ok ") -- cgit v1.2.3 From c13b780c4597e1e6cee3154053a196aa329b1367 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Sun, 13 Feb 2022 14:12:42 -0600 Subject: remoteproc: Change rproc_shutdown() to return a status The rproc_shutdown() function is currently not returning any error code, and any failures within rproc_stop() are not passed back to the users. Change the signature to return a success value back to the callers. The remoteproc sysfs and cdev interfaces are also updated to return back this status to userspace. Signed-off-by: Suman Anna Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220213201246.25952-2-s-anna@ti.com --- include/linux/remoteproc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index b2ee325e0af1..7c943f0a2fc4 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -671,7 +671,7 @@ rproc_of_resm_mem_entry_init(struct device *dev, u32 of_resm_idx, size_t len, u32 da, const char *name, ...); int rproc_boot(struct rproc *rproc); -void rproc_shutdown(struct rproc *rproc); +int rproc_shutdown(struct rproc *rproc); int rproc_detach(struct rproc *rproc); int rproc_set_firmware(struct rproc *rproc, const char *fw_name); void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type); -- cgit v1.2.3 From 84bd3690bf54a2f2f3b8449afa022aac1957ba17 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 9 Mar 2022 19:49:37 -0800 Subject: nvdimm/namespace: Delete nd_namespace_blk Now that none of the configuration paths consider BLK namespaces, delete the BLK namespace data and supporting code. Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/164688417727.2879318.11691110761800109662.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/linux/nd.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include') diff --git a/include/linux/nd.h b/include/linux/nd.h index 4813c7089e5c..7b2ccbdc1cbc 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -136,27 +136,6 @@ struct nd_namespace_pmem { int id; }; -/** - * struct nd_namespace_blk - namespace for dimm-bounded persistent memory - * @alt_name: namespace name supplied in the dimm label - * @uuid: namespace name supplied in the dimm label - * @id: ida allocated id - * @lbasize: blk namespaces have a native sector size when btt not present - * @size: sum of all the resource ranges allocated to this namespace - * @num_resources: number of dpa extents to claim - * @res: discontiguous dpa extents for given dimm - */ -struct nd_namespace_blk { - struct nd_namespace_common common; - char *alt_name; - uuid_t *uuid; - int id; - unsigned long lbasize; - resource_size_t size; - int num_resources; - struct resource **res; -}; - static inline struct nd_namespace_io *to_nd_namespace_io(const struct device *dev) { return container_of(dev, struct nd_namespace_io, common.dev); @@ -169,11 +148,6 @@ static inline struct nd_namespace_pmem *to_nd_namespace_pmem(const struct device return container_of(nsio, struct nd_namespace_pmem, nsio); } -static inline struct nd_namespace_blk *to_nd_namespace_blk(const struct device *dev) -{ - return container_of(dev, struct nd_namespace_blk, common.dev); -} - /** * nvdimm_read_bytes() - synchronously read bytes from an nvdimm namespace * @ndns: device to read -- cgit v1.2.3 From 3b6c6c039707f6bb7c64af2aa82a437fabb93aee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 9 Mar 2022 19:49:48 -0800 Subject: nvdimm/region: Delete nd_blk_region infrastructure Now that the nd_namespace_blk infrastructure is removed, delete all the region machinery to coordinate provisioning aliased capacity between PMEM and BLK. Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/164688418803.2879318.1302315202397235855.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 24 ------------------------ include/uapi/linux/ndctl.h | 2 -- 2 files changed, 26 deletions(-) (limited to 'include') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 7074aa9af525..0d61e07b6827 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -25,8 +25,6 @@ struct badrange { }; enum { - /* when a dimm supports both PMEM and BLK access a label is required */ - NDD_ALIASING = 0, /* unarmed memory devices may not persist writes */ NDD_UNARMED = 1, /* locked memory devices should not be accessed */ @@ -35,8 +33,6 @@ enum { NDD_SECURITY_OVERWRITE = 3, /* tracking whether or not there is a pending device reference */ NDD_WORK_PENDING = 4, - /* ignore / filter NSLABEL_FLAG_LOCAL for this DIMM, i.e. no aliasing */ - NDD_NOBLK = 5, /* dimm supports namespace labels */ NDD_LABELING = 6, @@ -140,21 +136,6 @@ static inline void __iomem *devm_nvdimm_ioremap(struct device *dev, } struct nvdimm_bus; -struct module; -struct nd_blk_region; -struct nd_blk_region_desc { - int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); - int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, - void *iobuf, u64 len, int rw); - struct nd_region_desc ndr_desc; -}; - -static inline struct nd_blk_region_desc *to_blk_region_desc( - struct nd_region_desc *ndr_desc) -{ - return container_of(ndr_desc, struct nd_blk_region_desc, ndr_desc); - -} /* * Note that separate bits for locked + unlocked are defined so that @@ -257,7 +238,6 @@ struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm); struct nvdimm *to_nvdimm(struct device *dev); struct nd_region *to_nd_region(struct device *dev); struct device *nd_region_dev(struct nd_region *nd_region); -struct nd_blk_region *to_nd_blk_region(struct device *dev); struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus); const char *nvdimm_name(struct nvdimm *nvdimm); @@ -295,10 +275,6 @@ struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus, struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, struct nd_region_desc *ndr_desc); void *nd_region_provider_data(struct nd_region *nd_region); -void *nd_blk_region_provider_data(struct nd_blk_region *ndbr); -void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data); -struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr); -unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr); unsigned int nd_region_acquire_lane(struct nd_region *nd_region); void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 8cf1e4884fd5..17e02b64ea2e 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -189,7 +189,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) #define ND_DEVICE_REGION_BLK 3 /* nd_region: (parent of BLK namespaces) */ #define ND_DEVICE_NAMESPACE_IO 4 /* legacy persistent memory */ #define ND_DEVICE_NAMESPACE_PMEM 5 /* PMEM namespace (may alias with BLK) */ -#define ND_DEVICE_NAMESPACE_BLK 6 /* BLK namespace (may alias with PMEM) */ #define ND_DEVICE_DAX_PMEM 7 /* Device DAX interface to pmem */ enum nd_driver_flags { @@ -198,7 +197,6 @@ enum nd_driver_flags { ND_DRIVER_REGION_BLK = 1 << ND_DEVICE_REGION_BLK, ND_DRIVER_NAMESPACE_IO = 1 << ND_DEVICE_NAMESPACE_IO, ND_DRIVER_NAMESPACE_PMEM = 1 << ND_DEVICE_NAMESPACE_PMEM, - ND_DRIVER_NAMESPACE_BLK = 1 << ND_DEVICE_NAMESPACE_BLK, ND_DRIVER_DAX_PMEM = 1 << ND_DEVICE_DAX_PMEM, }; -- cgit v1.2.3 From c97448437847bd76116b3a077e44808e946bb1ae Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 25 Feb 2022 15:35:29 +0100 Subject: clk: Add clk_drop_range In order to reset the range on a clock, we need to call clk_set_rate_range with a minimum of 0 and a maximum of ULONG_MAX. Since it's fairly inconvenient, let's introduce a clk_drop_range() function that will do just this. Suggested-by: Stephen Boyd Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220225143534.405820-8-maxime@cerno.tech Signed-off-by: Stephen Boyd --- include/linux/clk.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/clk.h b/include/linux/clk.h index 266e8de3cb51..39faa54efe88 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -986,6 +986,17 @@ static inline void clk_bulk_disable_unprepare(int num_clks, clk_bulk_unprepare(num_clks, clks); } +/** + * clk_drop_range - Reset any range set on that clock + * @clk: clock source + * + * Returns success (0) or negative errno. + */ +static inline int clk_drop_range(struct clk *clk) +{ + return clk_set_rate_range(clk, 0, ULONG_MAX); +} + /** * clk_get_optional - lookup and obtain a reference to an optional clock * producer. -- cgit v1.2.3 From d3826a95222c44a527f76b011bb5af8c924632e9 Mon Sep 17 00:00:00 2001 From: Dirk van der Merwe Date: Fri, 11 Mar 2022 11:43:06 +0100 Subject: nfp: add support for NFP3800/NFP3803 PCIe devices Enable binding the nfp driver to NFP3800 and NFP3803 devices. The PCIE_SRAM offset is different for the NFP3800 device, which also only supports a single explicit group. Changes to Dirk's work: * 48-bit dma addressing is not ready yet. Keep 40-bit dma addressing for NFP3800. Signed-off-by: Dirk van der Merwe Signed-off-by: Jakub Kicinski Signed-off-by: Fei Qin Signed-off-by: Simon Horman Signed-off-by: Jakub Kicinski --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c7e6f2043c7d..5462c29f0538 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2531,9 +2531,11 @@ #define PCI_VENDOR_ID_HUAWEI 0x19e5 #define PCI_VENDOR_ID_NETRONOME 0x19ee +#define PCI_DEVICE_ID_NETRONOME_NFP3800 0x3800 #define PCI_DEVICE_ID_NETRONOME_NFP4000 0x4000 #define PCI_DEVICE_ID_NETRONOME_NFP5000 0x5000 #define PCI_DEVICE_ID_NETRONOME_NFP6000 0x6000 +#define PCI_DEVICE_ID_NETRONOME_NFP3800_VF 0x3803 #define PCI_DEVICE_ID_NETRONOME_NFP6000_VF 0x6003 #define PCI_VENDOR_ID_QMI 0x1a32 -- cgit v1.2.3 From 8e6ed963763fe21429eabfc76c69ce2b0163a3dd Mon Sep 17 00:00:00 2001 From: Jiyong Park Date: Fri, 11 Mar 2022 11:00:16 +0900 Subject: vsock: each transport cycles only on its own sockets When iterating over sockets using vsock_for_each_connected_socket, make sure that a transport filters out sockets that don't belong to the transport. There actually was an issue caused by this; in a nested VM configuration, destroying the nested VM (which often involves the closing of /dev/vhost-vsock if there was h2g connections to the nested VM) kills not only the h2g connections, but also all existing g2h connections to the (outmost) host which are totally unrelated. Tested: Executed the following steps on Cuttlefish (Android running on a VM) [1]: (1) Enter into an `adb shell` session - to have a g2h connection inside the VM, (2) open and then close /dev/vhost-vsock by `exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb session is not reset. [1] https://android.googlesource.com/device/google/cuttlefish/ Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: Jiyong Park Link: https://lore.kernel.org/r/20220311020017.1509316-1-jiyong@google.com Signed-off-by: Jakub Kicinski --- include/net/af_vsock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index ab207677e0a8..f742e50207fb 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -205,7 +205,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_remove_sock(struct vsock_sock *vsk); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); +void vsock_for_each_connected_socket(struct vsock_transport *transport, + void (*fn)(struct sock *sk)); int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); bool vsock_find_cid(unsigned int cid); -- cgit v1.2.3 From 625788b5844511cf4c30cffa7fa0bc3a69cebc82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Mar 2022 21:14:20 -0800 Subject: net: add per-cpu storage and net->core_stats Before adding yet another possibly contended atomic_long_t, it is time to add per-cpu storage for existing ones: dev->tx_dropped, dev->rx_dropped, and dev->rx_nohandler Because many devices do not have to increment such counters, allocate the per-cpu storage on demand, so that dev_get_stats() does not have to spend considerable time folding zero counters. Note that some drivers have abused these counters which were supposed to be only used by core networking stack. v4: should use per_cpu_ptr() in dev_get_stats() (Jakub) v3: added a READ_ONCE() in netdev_core_stats_alloc() (Paolo) v2: add a missing include (reported by kernel test robot ) Change in netdev_core_stats_alloc() (Jakub) Signed-off-by: Eric Dumazet Cc: jeffreyji Reviewed-by: Brian Vazquez Reviewed-by: Jakub Kicinski Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20220311051420.2608812-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 46 +++++++++++++++++++++++++++++++++++++--------- include/net/bonding.h | 2 +- 2 files changed, 38 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index acd3cf69b61f..0d994710b335 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -194,6 +195,14 @@ struct net_device_stats { unsigned long tx_compressed; }; +/* per-cpu stats, allocated on demand. + * Try to fit them in a single cache line, for dev_get_stats() sake. + */ +struct net_device_core_stats { + local_t rx_dropped; + local_t tx_dropped; + local_t rx_nohandler; +} __aligned(4 * sizeof(local_t)); #include #include @@ -1735,12 +1744,8 @@ enum netdev_ml_priv_type { * @stats: Statistics struct, which was left as a legacy, use * rtnl_link_stats64 instead * - * @rx_dropped: Dropped packets by core network, - * do not use this in drivers - * @tx_dropped: Dropped packets by core network, + * @core_stats: core networking counters, * do not use this in drivers - * @rx_nohandler: nohandler dropped packets by core network on - * inactive devices, do not use this in drivers * @carrier_up_count: Number of times the carrier has been up * @carrier_down_count: Number of times the carrier has been down * @@ -2023,9 +2028,7 @@ struct net_device { struct net_device_stats stats; /* not used by modern drivers */ - atomic_long_t rx_dropped; - atomic_long_t tx_dropped; - atomic_long_t rx_nohandler; + struct net_device_core_stats __percpu *core_stats; /* Stats to monitor link on/off, flapping */ atomic_t carrier_up_count; @@ -3839,13 +3842,38 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev, return false; } +struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev); + +static inline struct net_device_core_stats *dev_core_stats(struct net_device *dev) +{ + /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ + struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); + + if (likely(p)) + return this_cpu_ptr(p); + + return netdev_core_stats_alloc(dev); +} + +#define DEV_CORE_STATS_INC(FIELD) \ +static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ +{ \ + struct net_device_core_stats *p = dev_core_stats(dev); \ + \ + if (p) \ + local_inc(&p->FIELD); \ +} +DEV_CORE_STATS_INC(rx_dropped) +DEV_CORE_STATS_INC(tx_dropped) +DEV_CORE_STATS_INC(rx_nohandler) + static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, const bool check_mtu) { if (skb_orphan_frags(skb, GFP_ATOMIC) || unlikely(!__is_skb_forwardable(dev, skb, check_mtu))) { - atomic_long_inc(&dev->rx_dropped); + dev_core_stats_rx_dropped_inc(dev); kfree_skb(skb); return NET_RX_DROP; } diff --git a/include/net/bonding.h b/include/net/bonding.h index d0dfe727e0b1..b14f4c0b4e9e 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -770,7 +770,7 @@ extern const struct sysfs_ops slave_sysfs_ops; static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb) { - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); dev_kfree_skb_any(skb); return NET_XMIT_DROP; } -- cgit v1.2.3 From f2aa197e4794bf4c2c0c9570684f86e6fa103e8b Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Sat, 5 Mar 2022 11:41:03 +0800 Subject: cgroup: Fix suspicious rcu_dereference_check() usage warning task_css_set_check() will use rcu_dereference_check() to check for rcu_read_lock_held() on the read-side, which is not true after commit dc6e0818bc9a ("sched/cpuacct: Optimize away RCU read lock"). This commit drop explicit rcu_read_lock(), change to RCU-sched read-side critical section. So fix the RCU warning by adding check for rcu_read_lock_sched_held(). Fixes: dc6e0818bc9a ("sched/cpuacct: Optimize away RCU read lock") Reported-by: Linux Kernel Functional Testing Reported-by: syzbot+16e3f2c77e7c5a0113f9@syzkaller.appspotmail.com Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Tested-by: Zhouyi Zhou Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/20220305034103.57123-1-zhouchengming@bytedance.com --- include/linux/cgroup.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1e356c222756..0d1ada8968d7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -450,6 +450,7 @@ extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ + rcu_read_lock_sched_held() || \ lockdep_is_held(&cgroup_mutex) || \ lockdep_is_held(&css_set_lock) || \ ((task)->flags & PF_EXITING) || (__c)) -- cgit v1.2.3 From 6f98a4bfee72c22f50aedb39fb761567969865fe Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 7 Feb 2022 17:19:24 +0100 Subject: random: block in /dev/urandom This topic has come up countless times, and usually doesn't go anywhere. This time I thought I'd bring it up with a slightly narrower focus, updated for some developments over the last three years: we finally can make /dev/urandom always secure, in light of the fact that our RNG is now always seeded. Ever since Linus' 50ee7529ec45 ("random: try to actively add entropy rather than passively wait for it"), the RNG does a haveged-style jitter dance around the scheduler, in order to produce entropy (and credit it) for the case when we're stuck in wait_for_random_bytes(). How ever you feel about the Linus Jitter Dance is beside the point: it's been there for three years and usually gets the RNG initialized in a second or so. As a matter of fact, this is what happens currently when people use getrandom(). It's already there and working, and most people have been using it for years without realizing. So, given that the kernel has grown this mechanism for seeding itself from nothing, and that this procedure happens pretty fast, maybe there's no point any longer in having /dev/urandom give insecure bytes. In the past we didn't want the boot process to deadlock, which was understandable. But now, in the worst case, a second goes by, and the problem is resolved. It seems like maybe we're finally at a point when we can get rid of the infamous "urandom read hole". The one slight drawback is that the Linus Jitter Dance relies on random_ get_entropy() being implemented. The first lines of try_to_generate_ entropy() are: stack.now = random_get_entropy(); if (stack.now == random_get_entropy()) return; On most platforms, random_get_entropy() is simply aliased to get_cycles(). The number of machines without a cycle counter or some other implementation of random_get_entropy() in 2022, which can also run a mainline kernel, and at the same time have a both broken and out of date userspace that relies on /dev/urandom never blocking at boot is thought to be exceedingly low. And to be clear: those museum pieces without cycle counters will continue to run Linux just fine, and even /dev/urandom will be operable just like before; the RNG just needs to be seeded first through the usual means, which should already be the case now. On systems that really do want unseeded randomness, we already offer getrandom(GRND_INSECURE), which is in use by, e.g., systemd for seeding their hash tables at boot. Nothing in this commit would affect GRND_INSECURE, and it remains the means of getting those types of random numbers. This patch goes a long way toward eliminating a long overdue userspace crypto footgun. After several decades of endless user confusion, we will finally be able to say, "use any single one of our random interfaces and you'll be fine. They're all the same. It doesn't matter." And that, I think, is really something. Finally all of those blog posts and disagreeing forums and contradictory articles will all become correct about whatever they happened to recommend, and along with it, a whole class of vulnerabilities eliminated. With very minimal downside, we're finally in a position where we can make this change. Cc: Dinh Nguyen Cc: Nick Hu Cc: Max Filippov Cc: Palmer Dabbelt Cc: David S. Miller Cc: Yoshinori Sato Cc: Michal Simek Cc: Borislav Petkov Cc: Guo Ren Cc: Geert Uytterhoeven Cc: Joshua Kinard Cc: David Laight Cc: Dominik Brodowski Cc: Eric Biggers Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Kees Cook Cc: Lennart Poettering Cc: Konstantin Ryabitsev Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Theodore Ts'o Signed-off-by: Jason A. Donenfeld --- include/linux/random.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index 6148b8d1ccf3..725a4d08c0a0 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -44,7 +44,7 @@ extern void del_random_ready_callback(struct random_ready_callback *rdy); extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes); #ifndef MODULE -extern const struct file_operations random_fops, urandom_fops; +extern const struct file_operations random_fops; #endif u32 get_random_u32(void); -- cgit v1.2.3 From ae099e8e98fb01395228628be5a4661e3bd86fe4 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 23 Feb 2022 13:43:44 +0100 Subject: random: add mechanism for VM forks to reinitialize crng When a VM forks, we must immediately mix in additional information to the stream of random output so that two forks or a rollback don't produce the same stream of random numbers, which could have catastrophic cryptographic consequences. This commit adds a simple API, add_vmfork_ randomness(), for that, by force reseeding the crng. This has the added benefit of also draining the entropy pool and setting its timer back, so that any old entropy that was there prior -- which could have already been used by a different fork, or generally gone stale -- does not contribute to the accounting of the next 256 bits. Cc: Dominik Brodowski Cc: Theodore Ts'o Cc: Jann Horn Cc: Eric Biggers Reviewed-by: Ard Biesheuvel Signed-off-by: Jason A. Donenfeld --- include/linux/random.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index 725a4d08c0a0..117468f3a92e 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -34,6 +34,7 @@ extern void add_input_randomness(unsigned int type, unsigned int code, extern void add_interrupt_randomness(int irq) __latent_entropy; extern void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy); +extern void add_vmfork_randomness(const void *unique_vm_id, size_t size); extern void get_random_bytes(void *buf, size_t nbytes); extern int wait_for_random_bytes(void); -- cgit v1.2.3 From d273845ecb0e0626842782a4497f0c5876139ec3 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 25 Feb 2022 16:55:52 +0100 Subject: ACPI: allow longer device IDs We create a list of ACPI "PNP" IDs which contains _HID, _CID, and CLS entries of the respective devices. However, when making structs for matching, we squeeze those IDs into acpi_device_id, which only has 9 bytes space to store the identifier. The subsystem actually captures the full length of the IDs, and the modalias has the full length, but this struct we use for matching is limited. It originally had 16 bytes, but was changed to only have 9 in 6543becf26ff ("mod/file2alias: make modalias generation safe for cross compiling"), presumably on the theory that it would match the ACPI spec so it didn't matter. Unfortunately, while most people adhere to the ACPI specs, Microsoft decided that its VM Generation Counter device [1] should only be identifiable by _CID with a value of "VM_Gen_Counter", which is longer than 9 characters. To allow device drivers to match identifiers that exceed the 9 byte limit, this simply ups the length to 16, just like it was before the aforementioned commit. Empirical testing indicates that this doesn't actually increase vmlinux size on 64-bit, because the ulong in the same struct caused there to be 7 bytes of padding anyway, and when doing a s/M/Y/g i386_defconfig build, the bzImage only increased by 0.0055%, so negligible. This patch is a prerequisite to add support for VMGenID in Linux, the subsequent patch in this series. It has been confirmed to also work on the udev/modalias side in userspace. [1] https://download.microsoft.com/download/3/1/C/31CFC307-98CA-4CA5-914C-D9772691E214/VirtualMachineGenerationID.docx Signed-off-by: Alexander Graf Co-developed-by: Jason A. Donenfeld [Jason: reworked commit message a bit, went with len=16 approach.] Cc: Mika Westerberg Cc: Andy Shevchenko Cc: Len Brown Cc: Greg Kroah-Hartman Reviewed-by: Ard Biesheuvel Acked-by: Hans de Goede Acked-by: Rafael J. Wysocki Signed-off-by: Jason A. Donenfeld --- include/linux/mod_devicetable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 4bb71979a8fd..5da5d990ff58 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -211,7 +211,7 @@ struct css_device_id { kernel_ulong_t driver_data; }; -#define ACPI_ID_LEN 9 +#define ACPI_ID_LEN 16 struct acpi_device_id { __u8 id[ACPI_ID_LEN]; -- cgit v1.2.3 From a4107d34f960df99ca07fa8eb022425a804f59f3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 1 Mar 2022 15:14:04 +0100 Subject: random: do not export add_vmfork_randomness() unless needed Since add_vmfork_randomness() is only called from vmgenid.o, we can guard it in CONFIG_VMGENID, similarly to how we do with add_disk_randomness() and CONFIG_BLOCK. If we ever have multiple things calling into add_vmfork_randomness(), we can add another shared Kconfig symbol for that, but for now, this is good enough. Even though add_vmfork_randomess() is a pretty small function, removing it means that there are only calls to crng_reseed(false) and none to crng_reseed(true), which means the compiler can constant propagate the false, removing branches from crng_reseed() and its descendants. Additionally, we don't even need the symbol to be exported if CONFIG_VMGENID is not a module, so conditionalize that too. Cc: Dominik Brodowski Cc: Theodore Ts'o Signed-off-by: Jason A. Donenfeld --- include/linux/random.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index 117468f3a92e..f209f1a78899 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -34,7 +34,9 @@ extern void add_input_randomness(unsigned int type, unsigned int code, extern void add_interrupt_randomness(int irq) __latent_entropy; extern void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy); +#if IS_ENABLED(CONFIG_VMGENID) extern void add_vmfork_randomness(const void *unique_vm_id, size_t size); +#endif extern void get_random_bytes(void *buf, size_t nbytes); extern int wait_for_random_bytes(void); -- cgit v1.2.3 From 5acd35487dc911541672b3ffc322851769c32a56 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 1 Mar 2022 20:03:49 +0100 Subject: random: replace custom notifier chain with standard one We previously rolled our own randomness readiness notifier, which only has two users in the whole kernel. Replace this with a more standard atomic notifier block that serves the same purpose with less code. Also unexport the symbols, because no modules use it, only unconditional builtins. The only drawback is that it's possible for a notification handler returning the "stop" code to prevent further processing, but given that there are only two users, and that we're unexporting this anyway, that doesn't seem like a significant drawback for the simplification we receive here. Cc: Greg Kroah-Hartman Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- include/linux/random.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index f209f1a78899..fab1ab0563b4 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -10,11 +10,7 @@ #include -struct random_ready_callback { - struct list_head list; - void (*func)(struct random_ready_callback *rdy); - struct module *owner; -}; +struct notifier_block; extern void add_device_randomness(const void *, size_t); extern void add_bootloader_randomness(const void *, size_t); @@ -42,8 +38,8 @@ extern void get_random_bytes(void *buf, size_t nbytes); extern int wait_for_random_bytes(void); extern int __init rand_initialize(void); extern bool rng_is_initialized(void); -extern int add_random_ready_callback(struct random_ready_callback *rdy); -extern void del_random_ready_callback(struct random_ready_callback *rdy); +extern int register_random_ready_notifier(struct notifier_block *nb); +extern int unregister_random_ready_notifier(struct notifier_block *nb); extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes); #ifndef MODULE -- cgit v1.2.3 From f3c2682bad7bc6033c837e9c66e5af881fe8d465 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 1 Mar 2022 20:22:39 +0100 Subject: random: provide notifier for VM fork Drivers such as WireGuard need to learn when VMs fork in order to clear sessions. This commit provides a simple notifier_block for that, with a register and unregister function. When no VM fork detection is compiled in, this turns into a no-op, similar to how the power notifier works. Cc: Dominik Brodowski Cc: Theodore Ts'o Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jason A. Donenfeld --- include/linux/random.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index fab1ab0563b4..c0baffe7afb1 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -32,6 +32,11 @@ extern void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy); #if IS_ENABLED(CONFIG_VMGENID) extern void add_vmfork_randomness(const void *unique_vm_id, size_t size); +extern int register_random_vmfork_notifier(struct notifier_block *nb); +extern int unregister_random_vmfork_notifier(struct notifier_block *nb); +#else +static inline int register_random_vmfork_notifier(struct notifier_block *nb) { return 0; } +static inline int unregister_random_vmfork_notifier(struct notifier_block *nb) { return 0; } #endif extern void get_random_bytes(void *buf, size_t nbytes); -- cgit v1.2.3 From df227dc8a68d81b7fe3416eca16d1f21d0b537f0 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Wed, 9 Feb 2022 22:16:31 -0600 Subject: mailbox: ti-msgmgr: Operate mailbox in polled mode during system suspend During the system suspend path we must set all queues to operate in polled mode as it is possible for any protocol built using this mailbox, such as TISCI, to require communication during the no irq phase of suspend, and we cannot rely on interrupts there. Polled mode is implemented by allowing the mailbox user to define an RX channel as part of the message that is sent which is what gets polled for a response. If polled mode is enabled, this will immediately be polled for a response at the end of the mailbox send_data op before returning success for the data send or timing out if no response is received. Finally, to ensure polled mode is always enabled during system suspend, iterate through all queues to set RX queues to polled mode during system suspend and disable polled mode for all in the resume handler. Signed-off-by: Dave Gerlach Signed-off-by: Jassi Brar --- include/linux/soc/ti/ti-msgmgr.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/soc/ti/ti-msgmgr.h b/include/linux/soc/ti/ti-msgmgr.h index 1f6e76d423cf..69a8d7682c4b 100644 --- a/include/linux/soc/ti/ti-msgmgr.h +++ b/include/linux/soc/ti/ti-msgmgr.h @@ -1,7 +1,7 @@ /* * Texas Instruments' Message Manager * - * Copyright (C) 2015-2016 Texas Instruments Incorporated - https://www.ti.com/ + * Copyright (C) 2015-2022 Texas Instruments Incorporated - https://www.ti.com/ * Nishanth Menon * * This program is free software; you can redistribute it and/or modify @@ -17,10 +17,14 @@ #ifndef TI_MSGMGR_H #define TI_MSGMGR_H +struct mbox_chan; + /** * struct ti_msgmgr_message - Message Manager structure * @len: Length of data in the Buffer * @buf: Buffer pointer + * @chan_rx: Expected channel for response, must be provided to use polled rx + * @timeout_rx_ms: Timeout value to use if polling for response * * This is the structure for data used in mbox_send_message * the length of data buffer used depends on the SoC integration @@ -30,6 +34,8 @@ struct ti_msgmgr_message { size_t len; u8 *buf; + struct mbox_chan *chan_rx; + int timeout_rx_ms; }; #endif /* TI_MSGMGR_H */ -- cgit v1.2.3 From 7af1974af0a9ba8a8ed2e3e947d87dd4d9a78d27 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Sat, 12 Mar 2022 11:09:47 +0530 Subject: ext4: fix ext4_fc_stats trace point ftrace's __print_symbolic() requires that any enum values used in the symbol to string translation table be wrapped in a TRACE_DEFINE_ENUM so that the enum value can be decoded from the ftrace ring buffer by user space tooling. This patch also fixes few other problems found in this trace point. e.g. dereferencing structures in TP_printk which should not be done at any cost. Also to avoid checkpatch warnings, this patch removes those whitespaces/tab stops issues. Cc: stable@kernel.org Fixes: aa75f4d3daae ("ext4: main fast-commit commit path") Reported-by: Steven Rostedt Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Reviewed-by: Steven Rostedt (Google) Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/b4b9691414c35c62e570b723e661c80674169f9a.1647057583.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 78 ++++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 19e957b7f941..1a0b7030f72a 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -95,6 +95,17 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) +TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_NOMEM); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_SWAP_BOOT); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_RESIZE); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_RENAME_DIR); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_FALLOC_RANGE); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_INODE_JOURNAL_DATA); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX); + #define show_fc_reason(reason) \ __print_symbolic(reason, \ { EXT4_FC_REASON_XATTR, "XATTR"}, \ @@ -2723,41 +2734,50 @@ TRACE_EVENT(ext4_fc_commit_stop, #define FC_REASON_NAME_STAT(reason) \ show_fc_reason(reason), \ - __entry->sbi->s_fc_stats.fc_ineligible_reason_count[reason] + __entry->fc_ineligible_rc[reason] TRACE_EVENT(ext4_fc_stats, - TP_PROTO(struct super_block *sb), - - TP_ARGS(sb), + TP_PROTO(struct super_block *sb), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(struct ext4_sb_info *, sbi) - __field(int, count) - ), + TP_ARGS(sb), - TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->sbi = EXT4_SB(sb); - ), + TP_STRUCT__entry( + __field(dev_t, dev) + __array(unsigned int, fc_ineligible_rc, EXT4_FC_REASON_MAX) + __field(unsigned long, fc_commits) + __field(unsigned long, fc_ineligible_commits) + __field(unsigned long, fc_numblks) + ), - TP_printk("dev %d:%d fc ineligible reasons:\n" - "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; " - "num_commits:%ld, ineligible: %ld, numblks: %ld", - MAJOR(__entry->dev), MINOR(__entry->dev), - FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), - FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), - FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), - FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), - FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), - FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), - FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), - FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), - FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), - __entry->sbi->s_fc_stats.fc_num_commits, - __entry->sbi->s_fc_stats.fc_ineligible_commits, - __entry->sbi->s_fc_stats.fc_numblks) + TP_fast_assign( + int i; + __entry->dev = sb->s_dev; + for (i = 0; i < EXT4_FC_REASON_MAX; i++) { + __entry->fc_ineligible_rc[i] = + EXT4_SB(sb)->s_fc_stats.fc_ineligible_reason_count[i]; + } + __entry->fc_commits = EXT4_SB(sb)->s_fc_stats.fc_num_commits; + __entry->fc_ineligible_commits = + EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits; + __entry->fc_numblks = EXT4_SB(sb)->s_fc_stats.fc_numblks; + ), + + TP_printk("dev %d,%d fc ineligible reasons:\n" + "%s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u " + "num_commits:%lu, ineligible: %lu, numblks: %lu", + MAJOR(__entry->dev), MINOR(__entry->dev), + FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), + FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), + FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), + FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), + FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), + FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), + FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), + FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), + FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), + __entry->fc_commits, __entry->fc_ineligible_commits, + __entry->fc_numblks) ); #define DEFINE_TRACE_DENTRY_EVENT(__type) \ -- cgit v1.2.3 From 8cb5a30372ef5cf2b1d258fce1711d80f834740a Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Sat, 12 Mar 2022 11:09:48 +0530 Subject: ext4: convert ext4_fc_track_dentry type events to use event class One should use DECLARE_EVENT_CLASS for similar event types instead of defining TRACE_EVENT for each event type. This is helpful in reducing the text section footprint for e.g. [1] [1]: https://lwn.net/Articles/381064/ Signed-off-by: Ritesh Harjani Reviewed-by: Harshad Shirwadkar Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/a019cb46219ef4b30e4d98d7ced7d8819a2fc61d.1647057583.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 56 +++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 1a0b7030f72a..3d48fcb62987 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2780,33 +2780,39 @@ TRACE_EVENT(ext4_fc_stats, __entry->fc_numblks) ); -#define DEFINE_TRACE_DENTRY_EVENT(__type) \ - TRACE_EVENT(ext4_fc_track_##__type, \ - TP_PROTO(struct inode *inode, struct dentry *dentry, int ret), \ - \ - TP_ARGS(inode, dentry, ret), \ - \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(int, ino) \ - __field(int, error) \ - ), \ - \ - TP_fast_assign( \ - __entry->dev = inode->i_sb->s_dev; \ - __entry->ino = inode->i_ino; \ - __entry->error = ret; \ - ), \ - \ - TP_printk("dev %d:%d, inode %d, error %d, fc_%s", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, __entry->error, \ - #__type) \ +DECLARE_EVENT_CLASS(ext4_fc_track_dentry, + + TP_PROTO(struct inode *inode, struct dentry *dentry, int ret), + + TP_ARGS(inode, dentry, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, i_ino) + __field(int, error) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->i_ino = inode->i_ino; + __entry->error = ret; + ), + + TP_printk("dev %d,%d, ino %lu, error %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->i_ino, __entry->error ) +); + +#define DEFINE_EVENT_CLASS_DENTRY(__type) \ +DEFINE_EVENT(ext4_fc_track_dentry, ext4_fc_track_##__type, \ + TP_PROTO(struct inode *inode, struct dentry *dentry, int ret), \ + TP_ARGS(inode, dentry, ret) \ +) -DEFINE_TRACE_DENTRY_EVENT(create); -DEFINE_TRACE_DENTRY_EVENT(link); -DEFINE_TRACE_DENTRY_EVENT(unlink); +DEFINE_EVENT_CLASS_DENTRY(create); +DEFINE_EVENT_CLASS_DENTRY(link); +DEFINE_EVENT_CLASS_DENTRY(unlink); TRACE_EVENT(ext4_fc_track_inode, TP_PROTO(struct inode *inode, int ret), -- cgit v1.2.3 From 8109517b394e6deab5fd21cc5460e82ffed229c6 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Mon, 24 Jan 2022 11:25:24 +0100 Subject: rpmsg: ctrl: Introduce new RPMSG_CREATE/RELEASE_DEV_IOCTL controls Allow the user space application to create and release an rpmsg device by adding RPMSG_CREATE_DEV_IOCTL and RPMSG_RELEASE_DEV_IOCTL ioctrls to the /dev/rpmsg_ctrl interface The RPMSG_CREATE_DEV_IOCTL Ioctl can be used to instantiate a local rpmsg device. Depending on the back-end implementation, the associated rpmsg driver is probed and a NS announcement can be sent to the remote processor. The RPMSG_RELEASE_DEV_IOCTL allows the user application to release a rpmsg device created either by the remote processor or with the RPMSG_CREATE_DEV_IOCTL call. Depending on the back-end implementation, the associated rpmsg driver is removed and a NS destroy rpmsg can be sent to the remote processor. Suggested-by: Mathieu Poirier Signed-off-by: Arnaud Pouliquen Reviewed-by: Mathieu Poirier Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220124102524.295783-12-arnaud.pouliquen@foss.st.com --- include/uapi/linux/rpmsg.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/rpmsg.h b/include/uapi/linux/rpmsg.h index f5ca8740f3fb..1637e68177d9 100644 --- a/include/uapi/linux/rpmsg.h +++ b/include/uapi/linux/rpmsg.h @@ -33,4 +33,14 @@ struct rpmsg_endpoint_info { */ #define RPMSG_DESTROY_EPT_IOCTL _IO(0xb5, 0x2) +/** + * Instantiate a new local rpmsg service device. + */ +#define RPMSG_CREATE_DEV_IOCTL _IOW(0xb5, 0x3, struct rpmsg_endpoint_info) + +/** + * Release a local rpmsg device. + */ +#define RPMSG_RELEASE_DEV_IOCTL _IOW(0xb5, 0x4, struct rpmsg_endpoint_info) + #endif -- cgit v1.2.3 From b5fdf66f6eb2560784c6f60131dc567de06267dc Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Tue, 1 Mar 2022 14:37:27 -0500 Subject: NFS: Remove remaining dfprintks related to fscache and remove NFSDBG_FSCACHE The fscache cookie APIs including fscache_acquire_cookie() and fscache_relinquish_cookie() now have very good tracing. Thus, there is no real need for dfprintks in the NFS fscache interface. The NFS fscache interface has removed all dfprintks so remove the NFSDBG_FSCACHE defines. Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust --- include/uapi/linux/nfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nfs_fs.h b/include/uapi/linux/nfs_fs.h index 3afe3767c55d..ae0de165c014 100644 --- a/include/uapi/linux/nfs_fs.h +++ b/include/uapi/linux/nfs_fs.h @@ -52,7 +52,7 @@ #define NFSDBG_CALLBACK 0x0100 #define NFSDBG_CLIENT 0x0200 #define NFSDBG_MOUNT 0x0400 -#define NFSDBG_FSCACHE 0x0800 +#define NFSDBG_FSCACHE 0x0800 /* unused */ #define NFSDBG_PNFS 0x1000 #define NFSDBG_PNFS_LD 0x2000 #define NFSDBG_STATE 0x4000 -- cgit v1.2.3 From a41b05edfedb939440e83666f23de3ef9af33acf Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 7 Mar 2022 10:41:44 +1100 Subject: SUNRPC/auth: async tasks mustn't block waiting for memory When memory is short, new worker threads cannot be created and we depend on the minimum one rpciod thread to be able to handle everything. So it must not block waiting for memory. mempools are particularly a problem as memory can only be released back to the mempool by an async rpc task running. If all available workqueue threads are waiting on the mempool, no thread is available to return anything. lookup_cred() can block on a mempool or kmalloc - and this can cause deadlocks. So add a new RPCAUTH_LOOKUP flag for async lookups and don't block on memory. If the -ENOMEM gets back to call_refreshresult(), wait a short while and try again. HZ>>4 is chosen as it is used elsewhere for -ENOMEM retries. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 98da816b5fc2..3e6ce288a7fc 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -99,6 +99,7 @@ struct rpc_auth_create_args { /* Flags for rpcauth_lookupcred() */ #define RPCAUTH_LOOKUP_NEW 0x01 /* Accept an uninitialised cred */ +#define RPCAUTH_LOOKUP_ASYNC 0x02 /* Don't block waiting for memory */ /* * Client authentication ops -- cgit v1.2.3 From 89c2be8a951654758dffeaaa6272328d9c8f29be Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 7 Mar 2022 10:41:44 +1100 Subject: NFS: discard NFS_RPC_SWAPFLAGS and RPC_TASK_ROOTCREDS NFS_RPC_SWAPFLAGS is only used for READ requests. It sets RPC_TASK_SWAPPER which gives some memory-allocation priority to requests. This is not needed for swap READ - though it is for writes where it is set via a different mechanism. RPC_TASK_ROOTCREDS causes the 'machine' credential to be used. This is not needed as the root credential is saved when the swap file is opened, and this is used for all IO. So NFS_RPC_SWAPFLAGS isn't needed, and as it is the only user of RPC_TASK_ROOTCREDS, that isn't needed either. Remove both. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 5 ----- include/linux/sunrpc/sched.h | 1 - include/trace/events/sunrpc.h | 1 - 3 files changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 3893386ceaed..9074ed0b65aa 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -45,11 +45,6 @@ */ #define NFS_MAX_TRANSPORTS 16 -/* - * These are the default flags for swap requests - */ -#define NFS_RPC_SWAPFLAGS (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) - /* * Size of the NFS directory verifier */ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index db964bb63912..56710f8056d3 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -124,7 +124,6 @@ struct rpc_task_setup { #define RPC_TASK_MOVEABLE 0x0004 /* nfs4.1+ rpc tasks */ #define RPC_TASK_NULLCREDS 0x0010 /* Use AUTH_NULL credential */ #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ -#define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ #define RPC_TASK_NO_ROUND_ROBIN 0x0100 /* send requests on "main" xprt */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 29982d60b68a..ac33892da411 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -311,7 +311,6 @@ TRACE_EVENT(rpc_request, { RPC_TASK_MOVEABLE, "MOVEABLE" }, \ { RPC_TASK_NULLCREDS, "NULLCREDS" }, \ { RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \ - { RPC_TASK_ROOTCREDS, "ROOTCREDS" }, \ { RPC_TASK_DYNAMIC, "DYNAMIC" }, \ { RPC_TASK_NO_ROUND_ROBIN, "NO_ROUND_ROBIN" }, \ { RPC_TASK_SOFT, "SOFT" }, \ -- cgit v1.2.3 From 4dc73c679114a2f408567e2e44770ed934190db2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 7 Mar 2022 10:41:44 +1100 Subject: NFSv4: keep state manager thread active if swap is enabled If we are swapping over NFSv4, we may not be able to allocate memory to start the state-manager thread at the time when we need it. So keep it always running when swap is enabled, and just signal it to start. This requires updating and testing the cl_swapper count on the root rpc_clnt after following all ->cl_parent links. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 82f7c2730b9a..49ba486aea5f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1797,6 +1797,8 @@ struct nfs_rpc_ops { struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); int (*discover_trunking)(struct nfs_server *, struct nfs_fh *); + void (*enable_swap)(struct inode *inode); + void (*disable_swap)(struct inode *inode); }; /* -- cgit v1.2.3 From 64158668ac8b31626a8ce48db4cad08496eb8340 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 7 Mar 2022 10:41:44 +1100 Subject: NFS: swap IO handling is slightly different for O_DIRECT IO 1/ Taking the i_rwsem for swap IO triggers lockdep warnings regarding possible deadlocks with "fs_reclaim". These deadlocks could, I believe, eventuate if a buffered read on the swapfile was attempted. We don't need coherence with the page cache for a swap file, and buffered writes are forbidden anyway. There is no other need for i_rwsem during direct IO. So never take it for swap_rw() 2/ generic_write_checks() explicitly forbids writes to swap, and performs checks that are not needed for swap. So bypass it for swap_rw(). Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 9074ed0b65aa..c47c448befc8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -508,10 +508,10 @@ static inline const struct cred *nfs_file_cred(struct file *file) * linux/fs/nfs/direct.c */ extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *); -extern ssize_t nfs_file_direct_read(struct kiocb *iocb, - struct iov_iter *iter); -extern ssize_t nfs_file_direct_write(struct kiocb *iocb, - struct iov_iter *iter); +ssize_t nfs_file_direct_read(struct kiocb *iocb, + struct iov_iter *iter, bool swap); +ssize_t nfs_file_direct_write(struct kiocb *iocb, + struct iov_iter *iter, bool swap); /* * linux/fs/nfs/dir.c -- cgit v1.2.3 From 1f4a5983d623d6dbda4cc7587a2d9d798e0d4035 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Fri, 11 Mar 2022 17:04:03 +0800 Subject: net: macvlan: add net device refcount tracker Add net device refcount tracker to macvlan. Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 10c94a3936ca..b42294739063 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -21,6 +21,7 @@ struct macvlan_dev { struct hlist_node hlist; struct macvlan_port *port; struct net_device *lowerdev; + netdevice_tracker dev_tracker; void *accel_priv; struct vlan_pcpu_stats __percpu *pcpu_stats; -- cgit v1.2.3 From fbd9a2ceba5c74bbfa19cf257ae4b4b2c820860d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 11 Mar 2022 16:03:42 +0100 Subject: net: Add lockdep asserts to ____napi_schedule(). ____napi_schedule() needs to be invoked with disabled interrupts due to __raise_softirq_irqoff (in order not to corrupt the per-CPU list). ____napi_schedule() needs also to be invoked from an interrupt context so that the raised-softirq is processed while the interrupt context is left. Add lockdep asserts for both conditions. While this is the second time the irq/softirq check is needed, provide a generic lockdep_assert_softirq_will_run() which is used by both caller. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- include/linux/lockdep.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 467b94257105..0cc65d216701 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -329,6 +329,12 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); #define lockdep_assert_none_held_once() \ lockdep_assert_once(!current->lockdep_depth) +/* + * Ensure that softirq is handled within the callchain and not delayed and + * handled by chance. + */ +#define lockdep_assert_softirq_will_run() \ + lockdep_assert_once(hardirq_count() | softirq_count()) #define lockdep_recursing(tsk) ((tsk)->lockdep_recursion) @@ -414,6 +420,7 @@ extern int lockdep_is_held(const void *); #define lockdep_assert_held_read(l) do { (void)(l); } while (0) #define lockdep_assert_held_once(l) do { (void)(l); } while (0) #define lockdep_assert_none_held_once() do { } while (0) +#define lockdep_assert_softirq_will_run() do { } while (0) #define lockdep_recursing(tsk) (0) -- cgit v1.2.3 From d538eca85c2aa6ecb5036e6ff47efc93d9f294da Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 11 Mar 2022 23:15:18 +0200 Subject: net: dsa: report and change port default priority using dcbnl The port-based default QoS class is assigned to packets that lack a VLAN PCP (or the port is configured to not trust the VLAN PCP), an IP DSCP (or the port is configured to not trust IP DSCP), and packets on which no tc-skbedit action has matched. Similar to other drivers, this can be exposed to user space using the DCB Application Priority Table. IEEE 802.1Q-2018 specifies in Table D-8 - Sel field values that when the Selector is 1, the Protocol ID value of 0 denotes the "Default application priority. For use when application priority is not otherwise specified." The way in which the dcbnl integration in DSA has been designed has to do with its requirements. Andrew Lunn explains that SOHO switches are expected to come with some sort of pre-configured QoS profile, and that it is desirable for this to come pre-loaded into the DSA slave interfaces' DCB application priority table. In the dcbnl design, this is possible because calls to dcb_ieee_setapp() can be initiated by anyone including being self-initiated by this device driver. However, what makes this challenging to implement in DSA is that the DSA core manages the net_devices (effectively hiding them from drivers), while drivers manage the hardware. The DSA core has no knowledge of what individual drivers' QoS policies are. DSA could export to drivers a wrapper over dcb_ieee_setapp() and these could call that function to pre-populate the app priority table, however drivers don't have a good moment in time to do this. The dsa_switch_ops :: setup() method gets called before the net_devices are created (dsa_slave_create), and so is dsa_switch_ops :: port_setup(). What remains is dsa_switch_ops :: port_enable(), but this gets called upon each ndo_open. If we add app table entries on every open, we'd need to remove them on close, to avoid duplicate entry errors. But if we delete app priority entries on close, what we delete may not be the initial, driver pre-populated entries, but rather user-added entries. So it is clear that letting drivers choose the timing of the dcb_ieee_setapp() call is inappropriate. The alternative which was chosen is to introduce hardware-specific ops in dsa_switch_ops, and effectively hide dcbnl details from drivers as well. For pre-populating the application table, dsa_slave_dcbnl_init() will call ds->ops->port_get_default_prio() which is supposed to read from hardware. If the operation succeeds, DSA creates a default-prio app table entry. The method is called as soon as the slave_dev is registered, but before we release the rtnl_mutex. This is done such that user space sees the app table entries as soon as it sees the interface being registered. The fact that we populate slave_dev->dcbnl_ops with a non-NULL pointer changes behavior in dcb_doit() from net/dcb/dcbnl.c, which used to return -EOPNOTSUPP for any dcbnl operation where netdev->dcbnl_ops is NULL. Because there are still dcbnl-unaware DSA drivers even if they have dcbnl_ops populated, the way to restore the behavior is to make all dcbnl_ops return -EOPNOTSUPP on absence of the hardware-specific dsa_switch_ops method. The dcbnl framework absurdly allows there to be more than one app table entry for the same selector and protocol (in other words, more than one port-based default priority). In the iproute2 dcb program, there is a "replace" syntactical sugar command which performs an "add" and a "del" to hide this away. But we choose the largest configured priority when we call ds->ops->port_set_default_prio(), using __fls(). When there is no default-prio app table entry left, the port-default priority is restored to 0. Link: https://patchwork.kernel.org/project/netdevbpf/patch/20210113154139.1803705-2-olteanv@gmail.com/ Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 9d16505fc0e2..1220af73151b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -892,6 +892,13 @@ struct dsa_switch_ops { int (*get_ts_info)(struct dsa_switch *ds, int port, struct ethtool_ts_info *ts); + /* + * DCB ops + */ + int (*port_get_default_prio)(struct dsa_switch *ds, int port); + int (*port_set_default_prio)(struct dsa_switch *ds, int port, + u8 prio); + /* * Suspend and resume */ -- cgit v1.2.3 From 47d75f7822064d024ec9207c0fc1777f983783b7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 11 Mar 2022 23:15:19 +0200 Subject: net: dsa: report and change port dscp priority using dcbnl Similar to the port-based default priority, IEEE 802.1Q-2018 allows the Application Priority Table to define QoS classes (0 to 7) per IP DSCP value (0 to 63). In the absence of an app table entry for a packet with DSCP value X, QoS classification for that packet falls back to other methods (VLAN PCP or port-based default). The presence of an app table for DSCP value X with priority Y makes the hardware classify the packet to QoS class Y. As opposed to the default-prio where DSA exposes only a "set" in dsa_switch_ops (because the port-based default is the fallback, it always exists, either implicitly or explicitly), for DSCP priorities we expose an "add" and a "del". The addition of a DSCP entry means trusting that DSCP priority, the deletion means ignoring it. Drivers that already trust (at least some) DSCP values can describe their configuration in dsa_switch_ops :: port_get_dscp_prio(), which is called for each DSCP value from 0 to 63. Again, there can be more than one dcbnl app table entry for the same DSCP value, DSA chooses the one with the largest configured priority. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 1220af73151b..9bfe984fcdbf 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -898,6 +898,11 @@ struct dsa_switch_ops { int (*port_get_default_prio)(struct dsa_switch *ds, int port); int (*port_set_default_prio)(struct dsa_switch *ds, int port, u8 prio); + int (*port_get_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp); + int (*port_add_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp, + u8 prio); + int (*port_del_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp, + u8 prio); /* * Suspend and resume -- cgit v1.2.3 From 978777d0fb06663523281aa50a5040c3aa31fbe7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 11 Mar 2022 23:15:20 +0200 Subject: net: dsa: felix: configure default-prio and dscp priorities Follow the established programming model for this driver and provide shims in the felix DSA driver which call the implementations from the ocelot switch lib. The ocelot switchdev driver wasn't integrated with dcbnl due to lack of hardware availability. The switch doesn't have any fancy QoS classification enabled by default. The provided getters will create a default-prio app table entry of 0, and no dscp entry. However, the getters have been made to actually retrieve the hardware configuration rather than static values, to be future proof in case DSA will need this information from more call paths. For default-prio, there is a single field per port, in ANA_PORT_QOS_CFG, called QOS_DEFAULT_VAL. DSCP classification is enabled per-port, again via ANA_PORT_QOS_CFG (field QOS_DSCP_ENA), and individual DSCP values are configured as trusted or not through register ANA_DSCP_CFG (replicated 64 times). An untrusted DSCP value falls back to other QoS classification methods. If trusted, the selected ANA_DSCP_CFG register also holds the QoS class in the QOS_DSCP_VAL field. The hardware also supports DSCP remapping (DSCP value X is translated to DSCP value Y before the QoS class is determined based on the app table entry for Y) and DSCP packet rewriting. The dcbnl framework, for being so flexible in other useless areas, doesn't appear to support this. So this functionality has been left out. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index ee3c59639d70..4d51e2a7120f 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -869,6 +869,11 @@ int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port, struct switchdev_brport_flags val); void ocelot_port_bridge_flags(struct ocelot *ocelot, int port, struct switchdev_brport_flags val); +int ocelot_port_get_default_prio(struct ocelot *ocelot, int port); +int ocelot_port_set_default_prio(struct ocelot *ocelot, int port, u8 prio); +int ocelot_port_get_dscp_prio(struct ocelot *ocelot, int port, u8 dscp); +int ocelot_port_add_dscp_prio(struct ocelot *ocelot, int port, u8 dscp, u8 prio); +int ocelot_port_del_dscp_prio(struct ocelot *ocelot, int port, u8 dscp, u8 prio); int ocelot_port_bridge_join(struct ocelot *ocelot, int port, struct net_device *bridge, int bridge_num, struct netlink_ext_ack *extack); -- cgit v1.2.3 From 2c7d2a230237e7c43fa067d695937b7e484bb92a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 15 Dec 2021 15:39:58 -0500 Subject: btrfs: add definition for EXTENT_TREE_V2 This adds the initial definition of the EXTENT_TREE_V2 incompat feature flag. This also hides the support behind CONFIG_BTRFS_DEBUG. THIS IS A IN DEVELOPMENT FORMAT CHANGE, DO NOT USE UNLESS YOU ARE A DEVELOPER OR A TESTER. The format is in flux and will be added in stages, any fs will need to be re-made between updates to the format. Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 738619994e26..1cb1a3860f1d 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -309,6 +309,7 @@ struct btrfs_ioctl_fs_info_args { #define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10) #define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11) #define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12) +#define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13) struct btrfs_ioctl_feature_flags { __u64 compat_flags; -- cgit v1.2.3 From 9c54e80ddc6bd89596a4046d451908700476fd14 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 15 Dec 2021 15:40:07 -0500 Subject: btrfs: add code to support the block group root This code adds the on disk structures for the block group root, which will hold the block group items for extent tree v2. Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 1 + include/uapi/linux/btrfs_tree.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 0d729664b4b4..f068ff30d654 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -53,6 +53,7 @@ struct btrfs_space_info; { BTRFS_TREE_RELOC_OBJECTID, "TREE_RELOC" }, \ { BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" }, \ { BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" }, \ + { BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" },\ { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" }) #define show_root_type(obj) \ diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 5416f1f1a77a..b069752a8ecf 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -53,6 +53,9 @@ /* tracks free space in block groups. */ #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL +/* Holds the block group items for extent tree v2. */ +#define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL + /* device stats in the device tree */ #define BTRFS_DEV_STATS_OBJECTID 0ULL -- cgit v1.2.3 From 871129332d74c9e94bd110932ac4445833995639 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 4 Sep 2019 12:13:25 -0700 Subject: fs: export rw_verify_area() I'm adding btrfs ioctls to read and write compressed data, and rather than duplicating the checks in rw_verify_area(), let's just export it. Reviewed-by: Josef Bacik Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index e2d892b201b0..0ebfc2519212 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3173,6 +3173,7 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size); extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t); extern loff_t no_seek_end_llseek(struct file *, loff_t, int); +int rw_verify_area(int, struct file *, const loff_t *, size_t); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); extern int stream_open(struct inode * inode, struct file * filp); -- cgit v1.2.3 From f6f7a25a650818c61defa97d60a79b216618315f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 12 Aug 2021 15:34:57 -0700 Subject: fs: export variant of generic_write_checks without iov_iter Encoded I/O in Btrfs needs to check a write with a given logical size without an iov_iter that matches that size (because the iov_iter we have is for the compressed data). So, factor out the parts of generic_write_check() that don't need an iov_iter into a new generic_write_checks_count() function and export that. Reviewed-by: Nikolay Borisov Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0ebfc2519212..27746a3da8fd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3130,6 +3130,7 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); +int generic_write_checks_count(struct kiocb *iocb, loff_t *count); extern int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count); extern int generic_file_rw_checks(struct file *file_in, struct file *file_out); -- cgit v1.2.3 From dcb77a9ae87dc1ae2c54ea2e629da357e694b664 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Aug 2021 15:58:29 -0700 Subject: btrfs: add definitions and documentation for encoded I/O ioctls In order to allow sending and receiving compressed data without decompressing it, we need an interface to write pre-compressed data directly to the filesystem and the matching interface to read compressed data without decompressing it. This adds the definitions for ioctls to do that and detailed explanations of how to use them. Reviewed-by: Nikolay Borisov Signed-off-by: Omar Sandoval Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 132 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 1cb1a3860f1d..d956b2993970 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -869,6 +869,134 @@ struct btrfs_ioctl_get_subvol_rootref_args { __u8 align[7]; }; +/* + * Data and metadata for an encoded read or write. + * + * Encoded I/O bypasses any encoding automatically done by the filesystem (e.g., + * compression). This can be used to read the compressed contents of a file or + * write pre-compressed data directly to a file. + * + * BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially + * preadv/pwritev with additional metadata about how the data is encoded and the + * size of the unencoded data. + * + * BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills + * the metadata fields, and returns the size of the encoded data. It reads one + * extent per call. It can also read data which is not encoded. + * + * BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data + * from the iovecs, and returns the size of the encoded data. Note that the + * encoded data is not validated when it is written; if it is not valid (e.g., + * it cannot be decompressed), then a subsequent read may return an error. + * + * Since the filesystem page cache contains decoded data, encoded I/O bypasses + * the page cache. Encoded I/O requires CAP_SYS_ADMIN. + */ +struct btrfs_ioctl_encoded_io_args { + /* Input parameters for both reads and writes. */ + + /* + * iovecs containing encoded data. + * + * For reads, if the size of the encoded data is larger than the sum of + * iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with + * ENOBUFS. + * + * For writes, the size of the encoded data is the sum of iov[n].iov_len + * for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may + * increase in the future). This must also be less than or equal to + * unencoded_len. + */ + const struct iovec __user *iov; + /* Number of iovecs. */ + unsigned long iovcnt; + /* + * Offset in file. + * + * For writes, must be aligned to the sector size of the filesystem. + */ + __s64 offset; + /* Currently must be zero. */ + __u64 flags; + + /* + * For reads, the following members are output parameters that will + * contain the returned metadata for the encoded data. + * For writes, the following members must be set to the metadata for the + * encoded data. + */ + + /* + * Length of the data in the file. + * + * Must be less than or equal to unencoded_len - unencoded_offset. For + * writes, must be aligned to the sector size of the filesystem unless + * the data ends at or beyond the current end of the file. + */ + __u64 len; + /* + * Length of the unencoded (i.e., decrypted and decompressed) data. + * + * For writes, must be no more than 128 KiB (this limit may increase in + * the future). If the unencoded data is actually longer than + * unencoded_len, then it is truncated; if it is shorter, then it is + * extended with zeroes. + */ + __u64 unencoded_len; + /* + * Offset from the first byte of the unencoded data to the first byte of + * logical data in the file. + * + * Must be less than unencoded_len. + */ + __u64 unencoded_offset; + /* + * BTRFS_ENCODED_IO_COMPRESSION_* type. + * + * For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE. + */ + __u32 compression; + /* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */ + __u32 encryption; + /* + * Reserved for future expansion. + * + * For reads, always returned as zero. Users should check for non-zero + * bytes. If there are any, then the kernel has a newer version of this + * structure with additional information that the user definition is + * missing. + * + * For writes, must be zeroed. + */ + __u8 reserved[64]; +}; + +/* Data is not compressed. */ +#define BTRFS_ENCODED_IO_COMPRESSION_NONE 0 +/* Data is compressed as a single zlib stream. */ +#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1 +/* + * Data is compressed as a single zstd frame with the windowLog compression + * parameter set to no more than 17. + */ +#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2 +/* + * Data is compressed sector by sector (using the sector size indicated by the + * name of the constant) with LZO1X and wrapped in the format documented in + * fs/btrfs/lzo.c. For writes, the compression sector size must match the + * filesystem sector size. + */ +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7 +#define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8 + +/* Data is not encrypted. */ +#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0 +#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1 + /* Error codes as returned by the kernel */ enum btrfs_err_code { BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1, @@ -997,5 +1125,9 @@ enum btrfs_err_code { struct btrfs_ioctl_ino_lookup_user_args) #define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \ struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_ENCODED_READ _IOR(BTRFS_IOCTL_MAGIC, 64, \ + struct btrfs_ioctl_encoded_io_args) +#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \ + struct btrfs_ioctl_encoded_io_args) #endif /* _UAPI_LINUX_BTRFS_H */ -- cgit v1.2.3 From ec090a0392ff634e9304c19a8578d476a9e0c830 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 25 Feb 2022 16:46:56 +0200 Subject: mtd: core: Remove partid and partname debugfs files partid and partname debugfs files were used just by SPI NOR, but they were replaced by sysfs entries. Since these debugfs files are no longer used in mtd, remove dead code. The directory is kept as it is used by nandsim, mtdswap and docg3. Signed-off-by: Tudor Ambarus Reviewed-by: Pratyush Yadav Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220225144656.634682-1-tudor.ambarus@microchip.com --- include/linux/mtd/mtd.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 1b3fc8c71ab3..151607e9d64a 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -188,9 +188,6 @@ struct module; /* only needed for owner field in mtd_info */ */ struct mtd_debug_info { struct dentry *dfs_dir; - - const char *partname; - const char *partid; }; /** -- cgit v1.2.3 From fc93db153b0187a9047f00bfd5cce75108530593 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 12 Mar 2022 13:45:05 -0800 Subject: net: disable preemption in dev_core_stats_XXX_inc() helpers syzbot was kind enough to remind us that dev->{tx_dropped|rx_dropped} could be increased in process context. BUG: using smp_processor_id() in preemptible [00000000] code: syz-executor413/3593 caller is netdev_core_stats_alloc+0x98/0x110 net/core/dev.c:10298 CPU: 1 PID: 3593 Comm: syz-executor413 Not tainted 5.17.0-rc7-syzkaller-02426-g97aeb877de7f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 check_preemption_disabled+0x16b/0x170 lib/smp_processor_id.c:49 netdev_core_stats_alloc+0x98/0x110 net/core/dev.c:10298 dev_core_stats include/linux/netdevice.h:3855 [inline] dev_core_stats_rx_dropped_inc include/linux/netdevice.h:3866 [inline] tun_get_user+0x3455/0x3ab0 drivers/net/tun.c:1800 tun_chr_write_iter+0xe1/0x200 drivers/net/tun.c:2015 call_write_iter include/linux/fs.h:2074 [inline] new_sync_write+0x431/0x660 fs/read_write.c:503 vfs_write+0x7cd/0xae0 fs/read_write.c:590 ksys_write+0x12d/0x250 fs/read_write.c:643 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f2cf4f887e3 Code: 5d 41 5c 41 5d 41 5e e9 9b fd ff ff 66 2e 0f 1f 84 00 00 00 00 00 90 64 8b 04 25 18 00 00 00 85 c0 75 14 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 55 c3 0f 1f 40 00 48 83 ec 28 48 89 54 24 18 RSP: 002b:00007ffd50dd5fd8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007ffd50dd6000 RCX: 00007f2cf4f887e3 RDX: 000000000000002a RSI: 0000000000000000 RDI: 00000000000000c8 RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffd50dd5ff0 R14: 00007ffd50dd5fe8 R15: 00007ffd50dd5fe4 Fixes: 625788b58445 ("net: add per-cpu storage and net->core_stats") Signed-off-by: Eric Dumazet Cc: jeffreyji Cc: Brian Vazquez Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20220312214505.3294762-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0d994710b335..8cbe96ce0a2c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3858,10 +3858,14 @@ static inline struct net_device_core_stats *dev_core_stats(struct net_device *de #define DEV_CORE_STATS_INC(FIELD) \ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ { \ - struct net_device_core_stats *p = dev_core_stats(dev); \ + struct net_device_core_stats *p; \ + \ + preempt_disable(); \ + p = dev_core_stats(dev); \ \ if (p) \ local_inc(&p->FIELD); \ + preempt_enable(); \ } DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) -- cgit v1.2.3 From c14c6843aeb8cdc8f6b0e49411d230e6f6dfda62 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:23 +0000 Subject: fs: read_mapping_page() should take a struct file argument While read_cache_page() takes a void *, because you can pass a pointer to anything as the first argument of filler_t, if we are calling read_mapping_page(), it will be passed as the first argument of ->readpage, so we know this must be a struct file pointer, and we should let the compiler enforce that for us. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/pagemap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 270bf5136c34..55a80d8f0e9c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -636,15 +636,15 @@ extern int read_cache_pages(struct address_space *mapping, struct list_head *pages, filler_t *filler, void *data); static inline struct page *read_mapping_page(struct address_space *mapping, - pgoff_t index, void *data) + pgoff_t index, struct file *file) { - return read_cache_page(mapping, index, NULL, data); + return read_cache_page(mapping, index, NULL, file); } static inline struct folio *read_mapping_folio(struct address_space *mapping, - pgoff_t index, void *data) + pgoff_t index, struct file *file) { - return read_cache_folio(mapping, index, NULL, data); + return read_cache_folio(mapping, index, NULL, file); } /* -- cgit v1.2.3 From cd1067beeebfe23fc8cab071790fefb273962d51 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:26 +0000 Subject: buffer: Add folio_buffers() While there is no intent to use large folios in filesystems using buffer heads, converting the filesystems to use single-page folios is still worth doing to remove legacy infrastructure and hidden calls to compound_head(). These helper functions are needed for that conversion to take place. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/buffer_head.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 36f33685c8c0..3451f1fcda12 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -144,6 +144,7 @@ BUFFER_FNS(Defer_Completion, defer_completion) ((struct buffer_head *)page_private(page)); \ }) #define page_has_buffers(page) PagePrivate(page) +#define folio_buffers(folio) folio_get_private(folio) void buffer_check_dirty_writeback(struct page *page, bool *dirty, bool *writeback); -- cgit v1.2.3 From 2e7e80f7e7e9dbbb3c2a85ee923ca32826052816 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:27 +0000 Subject: fs: Convert is_partially_uptodate to folios Since the uptodate property is maintained on a per-folio basis, the is_partially_uptodate method should also take a folio. Fix the types at the same time so it's clear that it returns true/false and takes the count in bytes, not blocks. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/buffer_head.h | 3 +-- include/linux/fs.h | 4 ++-- include/linux/iomap.h | 3 +-- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 3451f1fcda12..79d465057889 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -225,8 +225,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, get_block_t *get_block, struct writeback_control *wbc, bh_end_io_t *handler); int block_read_full_page(struct page*, get_block_t*); -int block_is_partially_uptodate(struct page *page, unsigned long from, - unsigned long count); +bool block_is_partially_uptodate(struct folio *, size_t from, size_t count); int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, get_block_t *get_block); int __block_write_begin(struct page *page, loff_t pos, unsigned len, diff --git a/include/linux/fs.h b/include/linux/fs.h index e2d892b201b0..5939e6694ada 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -400,8 +400,8 @@ struct address_space_operations { bool (*isolate_page)(struct page *, isolate_mode_t); void (*putback_page)(struct page *); int (*launder_page) (struct page *); - int (*is_partially_uptodate) (struct page *, unsigned long, - unsigned long); + bool (*is_partially_uptodate) (struct folio *, size_t from, + size_t count); void (*is_dirty_writeback) (struct page *, bool *, bool *); int (*error_remove_page)(struct address_space *, struct page *); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 97a3a2edb585..3bcbb264f83f 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -227,8 +227,7 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); int iomap_readpage(struct page *page, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); -int iomap_is_partially_uptodate(struct page *page, unsigned long from, - unsigned long count); +bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); int iomap_releasepage(struct page *page, gfp_t gfp_mask); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); void iomap_invalidatepage(struct page *page, unsigned int offset, -- cgit v1.2.3 From aa1b46dcdc7baaf5fec0be25782ef24b26aa209e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 13 Mar 2022 21:15:02 -1000 Subject: block: fix rq-qos breakage from skipping rq_qos_done_bio() a647a524a467 ("block: don't call rq_qos_ops->done_bio if the bio isn't tracked") made bio_endio() skip rq_qos_done_bio() if BIO_TRACKED is not set. While this fixed a potential oops, it also broke blk-iocost by skipping the done_bio callback for merged bios. Before, whether a bio goes through rq_qos_throttle() or rq_qos_merge(), rq_qos_done_bio() would be called on the bio on completion with BIO_TRACKED distinguishing the former from the latter. rq_qos_done_bio() is not called for bios which wenth through rq_qos_merge(). This royally confuses blk-iocost as the merged bios never finish and are considered perpetually in-flight. One reliably reproducible failure mode is an intermediate cgroup geting stuck active preventing its children from being activated due to the leaf-only rule, leading to loss of control. The following is from resctl-bench protection scenario which emulates isolating a web server like workload from a memory bomb run on an iocost configuration which should yield a reasonable level of protection. # cat /sys/block/nvme2n1/device/model Samsung SSD 970 PRO 512GB # cat /sys/fs/cgroup/io.cost.model 259:0 ctrl=user model=linear rbps=834913556 rseqiops=93622 rrandiops=102913 wbps=618985353 wseqiops=72325 wrandiops=71025 # cat /sys/fs/cgroup/io.cost.qos 259:0 enable=1 ctrl=user rpct=95.00 rlat=18776 wpct=95.00 wlat=8897 min=60.00 max=100.00 # resctl-bench -m 29.6G -r out.json run protection::scenario=mem-hog,loops=1 ... Memory Hog Summary ================== IO Latency: R p50=242u:336u/2.5m p90=794u:1.4m/7.5m p99=2.7m:8.0m/62.5m max=8.0m:36.4m/350m W p50=221u:323u/1.5m p90=709u:1.2m/5.5m p99=1.5m:2.5m/9.5m max=6.9m:35.9m/350m Isolation and Request Latency Impact Distributions: min p01 p05 p10 p25 p50 p75 p90 p95 p99 max mean stdev isol% 15.90 15.90 15.90 40.05 57.24 59.07 60.01 74.63 74.63 90.35 90.35 58.12 15.82 lat-imp% 0 0 0 0 0 4.55 14.68 15.54 233.5 548.1 548.1 53.88 143.6 Result: isol=58.12:15.82% lat_imp=53.88%:143.6 work_csv=100.0% missing=3.96% The isolation result of 58.12% is close to what this device would show without any IO control. Fix it by introducing a new flag BIO_QOS_MERGED to mark merged bios and calling rq_qos_done_bio() on them too. For consistency and clarity, rename BIO_TRACKED to BIO_QOS_THROTTLED. The flag checks are moved into rq_qos_done_bio() so that it's next to the code paths that set the flags. With the patch applied, the above same benchmark shows: # resctl-bench -m 29.6G -r out.json run protection::scenario=mem-hog,loops=1 ... Memory Hog Summary ================== IO Latency: R p50=123u:84.4u/985u p90=322u:256u/2.5m p99=1.6m:1.4m/9.5m max=11.1m:36.0m/350m W p50=429u:274u/995u p90=1.7m:1.3m/4.5m p99=3.4m:2.7m/11.5m max=7.9m:5.9m/26.5m Isolation and Request Latency Impact Distributions: min p01 p05 p10 p25 p50 p75 p90 p95 p99 max mean stdev isol% 84.91 84.91 89.51 90.73 92.31 94.49 96.36 98.04 98.71 100.0 100.0 94.42 2.81 lat-imp% 0 0 0 0 0 2.81 5.73 11.11 13.92 17.53 22.61 4.10 4.68 Result: isol=94.42:2.81% lat_imp=4.10%:4.68 work_csv=58.34% missing=0% Signed-off-by: Tejun Heo Fixes: a647a524a467 ("block: don't call rq_qos_ops->done_bio if the bio isn't tracked") Cc: stable@vger.kernel.org # v5.15+ Cc: Ming Lei Cc: Yu Kuai Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/Yi7rdrzQEHjJLGKB@slm.duckdns.org Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 5561e58d158a..0c3563b45fe9 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -324,7 +324,8 @@ enum { BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ - BIO_TRACKED, /* set if bio goes through the rq_qos path */ + BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ + BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */ -- cgit v1.2.3 From 338e953f1bd1a0ef4c86b5ab44735e1f6519d721 Mon Sep 17 00:00:00 2001 From: Guodong Liu Date: Wed, 16 Feb 2022 11:21:22 +0800 Subject: dt-bindings: pinctrl: mt8186: add pinctrl file and binding document 1. This patch adds pinctrl file for mt8186. 2. This patch adds mt8186 compatible node in binding document. Signed-off-by: Guodong Liu Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20220216032124.28067-2-guodong.liu@mediatek.com Signed-off-by: Linus Walleij --- include/dt-bindings/pinctrl/mt8186-pinfunc.h | 1174 ++++++++++++++++++++++++++ 1 file changed, 1174 insertions(+) create mode 100644 include/dt-bindings/pinctrl/mt8186-pinfunc.h (limited to 'include') diff --git a/include/dt-bindings/pinctrl/mt8186-pinfunc.h b/include/dt-bindings/pinctrl/mt8186-pinfunc.h new file mode 100644 index 000000000000..18d6683c6f65 --- /dev/null +++ b/include/dt-bindings/pinctrl/mt8186-pinfunc.h @@ -0,0 +1,1174 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (C) 2021 MediaTek Inc. + * Author: Guodong Liu + * + */ + +#ifndef __MT8186_PINFUNC_H +#define __MT8186_PINFUNC_H + +#include "mt65xx.h" + +#define PINMUX_GPIO0__FUNC_GPIO0 (MTK_PIN_NO(0) | 0) +#define PINMUX_GPIO0__FUNC_I2S0_MCK (MTK_PIN_NO(0) | 1) +#define PINMUX_GPIO0__FUNC_SPI0_CLK_B (MTK_PIN_NO(0) | 2) +#define PINMUX_GPIO0__FUNC_I2S2_MCK (MTK_PIN_NO(0) | 3) +#define PINMUX_GPIO0__FUNC_CMFLASH0 (MTK_PIN_NO(0) | 4) +#define PINMUX_GPIO0__FUNC_SCP_SPI0_CK (MTK_PIN_NO(0) | 5) +#define PINMUX_GPIO0__FUNC_TP_GPIO0_AO (MTK_PIN_NO(0) | 6) +#define PINMUX_GPIO0__FUNC_DBG_MON_A0 (MTK_PIN_NO(0) | 7) + +#define PINMUX_GPIO1__FUNC_GPIO1 (MTK_PIN_NO(1) | 0) +#define PINMUX_GPIO1__FUNC_I2S0_BCK (MTK_PIN_NO(1) | 1) +#define PINMUX_GPIO1__FUNC_SPI0_CSB_B (MTK_PIN_NO(1) | 2) +#define PINMUX_GPIO1__FUNC_I2S2_BCK (MTK_PIN_NO(1) | 3) +#define PINMUX_GPIO1__FUNC_CMFLASH1 (MTK_PIN_NO(1) | 4) +#define PINMUX_GPIO1__FUNC_SCP_SPI0_CS (MTK_PIN_NO(1) | 5) +#define PINMUX_GPIO1__FUNC_TP_GPIO1_AO (MTK_PIN_NO(1) | 6) + +#define PINMUX_GPIO2__FUNC_GPIO2 (MTK_PIN_NO(2) | 0) +#define PINMUX_GPIO2__FUNC_I2S0_LRCK (MTK_PIN_NO(2) | 1) +#define PINMUX_GPIO2__FUNC_SPI0_MO_B (MTK_PIN_NO(2) | 2) +#define PINMUX_GPIO2__FUNC_I2S2_LRCK (MTK_PIN_NO(2) | 3) +#define PINMUX_GPIO2__FUNC_CMFLASH2 (MTK_PIN_NO(2) | 4) +#define PINMUX_GPIO2__FUNC_SCP_SPI0_MO (MTK_PIN_NO(2) | 5) +#define PINMUX_GPIO2__FUNC_TP_GPIO2_AO (MTK_PIN_NO(2) | 6) + +#define PINMUX_GPIO3__FUNC_GPIO3 (MTK_PIN_NO(3) | 0) +#define PINMUX_GPIO3__FUNC_I2S0_DI (MTK_PIN_NO(3) | 1) +#define PINMUX_GPIO3__FUNC_SPI0_MI_B (MTK_PIN_NO(3) | 2) +#define PINMUX_GPIO3__FUNC_I2S2_DI (MTK_PIN_NO(3) | 3) +#define PINMUX_GPIO3__FUNC_SRCLKENAI1 (MTK_PIN_NO(3) | 4) +#define PINMUX_GPIO3__FUNC_SCP_SPI0_MI (MTK_PIN_NO(3) | 5) +#define PINMUX_GPIO3__FUNC_TP_GPIO3_AO (MTK_PIN_NO(3) | 6) + +#define PINMUX_GPIO4__FUNC_GPIO4 (MTK_PIN_NO(4) | 0) +#define PINMUX_GPIO4__FUNC_I2S3_DO (MTK_PIN_NO(4) | 1) +#define PINMUX_GPIO4__FUNC_I2S1_DO (MTK_PIN_NO(4) | 3) +#define PINMUX_GPIO4__FUNC_TP_GPIO4_AO (MTK_PIN_NO(4) | 6) + +#define PINMUX_GPIO5__FUNC_GPIO5 (MTK_PIN_NO(5) | 0) +#define PINMUX_GPIO5__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(5) | 1) +#define PINMUX_GPIO5__FUNC_TP_GPIO5_AO (MTK_PIN_NO(5) | 6) + +#define PINMUX_GPIO6__FUNC_GPIO6 (MTK_PIN_NO(6) | 0) +#define PINMUX_GPIO6__FUNC_I2S3_MCK (MTK_PIN_NO(6) | 1) +#define PINMUX_GPIO6__FUNC_SPI1_CLK_B (MTK_PIN_NO(6) | 2) +#define PINMUX_GPIO6__FUNC_I2S1_MCK (MTK_PIN_NO(6) | 3) +#define PINMUX_GPIO6__FUNC_DPI_DATA22 (MTK_PIN_NO(6) | 4) +#define PINMUX_GPIO6__FUNC_TP_GPIO6_AO (MTK_PIN_NO(6) | 6) + +#define PINMUX_GPIO7__FUNC_GPIO7 (MTK_PIN_NO(7) | 0) +#define PINMUX_GPIO7__FUNC_I2S3_BCK (MTK_PIN_NO(7) | 1) +#define PINMUX_GPIO7__FUNC_SPI1_CSB_B (MTK_PIN_NO(7) | 2) +#define PINMUX_GPIO7__FUNC_I2S1_BCK (MTK_PIN_NO(7) | 3) +#define PINMUX_GPIO7__FUNC_DPI_DATA23 (MTK_PIN_NO(7) | 4) +#define PINMUX_GPIO7__FUNC_TP_GPIO7_AO (MTK_PIN_NO(7) | 6) + +#define PINMUX_GPIO8__FUNC_GPIO8 (MTK_PIN_NO(8) | 0) +#define PINMUX_GPIO8__FUNC_I2S3_LRCK (MTK_PIN_NO(8) | 1) +#define PINMUX_GPIO8__FUNC_SPI1_MO_B (MTK_PIN_NO(8) | 2) +#define PINMUX_GPIO8__FUNC_I2S1_LRCK (MTK_PIN_NO(8) | 3) +#define PINMUX_GPIO8__FUNC_CONN_UART0_RXD (MTK_PIN_NO(8) | 4) +#define PINMUX_GPIO8__FUNC_SSPM_URXD_AO (MTK_PIN_NO(8) | 5) +#define PINMUX_GPIO8__FUNC_ADSP_UART_RX (MTK_PIN_NO(8) | 6) +#define PINMUX_GPIO8__FUNC_CONN_MCU_DBGACK_N (MTK_PIN_NO(8) | 7) + +#define PINMUX_GPIO9__FUNC_GPIO9 (MTK_PIN_NO(9) | 0) +#define PINMUX_GPIO9__FUNC_I2S3_DO (MTK_PIN_NO(9) | 1) +#define PINMUX_GPIO9__FUNC_SPI1_MI_B (MTK_PIN_NO(9) | 2) +#define PINMUX_GPIO9__FUNC_I2S1_DO (MTK_PIN_NO(9) | 3) +#define PINMUX_GPIO9__FUNC_CONN_UART0_TXD (MTK_PIN_NO(9) | 4) +#define PINMUX_GPIO9__FUNC_SSPM_UTXD_AO (MTK_PIN_NO(9) | 5) +#define PINMUX_GPIO9__FUNC_ADSP_UART_TX (MTK_PIN_NO(9) | 6) +#define PINMUX_GPIO9__FUNC_CONN_MCU_DBGI_N (MTK_PIN_NO(9) | 7) + +#define PINMUX_GPIO10__FUNC_GPIO10 (MTK_PIN_NO(10) | 0) +#define PINMUX_GPIO10__FUNC_I2S0_MCK (MTK_PIN_NO(10) | 1) +#define PINMUX_GPIO10__FUNC_SPI4_CLK_A (MTK_PIN_NO(10) | 2) +#define PINMUX_GPIO10__FUNC_I2S2_MCK (MTK_PIN_NO(10) | 3) +#define PINMUX_GPIO10__FUNC_SPM_JTAG_TDI (MTK_PIN_NO(10) | 4) +#define PINMUX_GPIO10__FUNC_SCP_JTAG_TDI (MTK_PIN_NO(10) | 5) +#define PINMUX_GPIO10__FUNC_ADSP_JTAG_TDI (MTK_PIN_NO(10) | 6) +#define PINMUX_GPIO10__FUNC_CONN_MCU_TDI (MTK_PIN_NO(10) | 7) + +#define PINMUX_GPIO11__FUNC_GPIO11 (MTK_PIN_NO(11) | 0) +#define PINMUX_GPIO11__FUNC_I2S0_BCK (MTK_PIN_NO(11) | 1) +#define PINMUX_GPIO11__FUNC_SPI4_CSB_A (MTK_PIN_NO(11) | 2) +#define PINMUX_GPIO11__FUNC_I2S2_BCK (MTK_PIN_NO(11) | 3) +#define PINMUX_GPIO11__FUNC_SPM_JTAG_TRSTN (MTK_PIN_NO(11) | 4) +#define PINMUX_GPIO11__FUNC_SCP_JTAG_TRSTN (MTK_PIN_NO(11) | 5) +#define PINMUX_GPIO11__FUNC_ADSP_JTAG_TRSTN (MTK_PIN_NO(11) | 6) +#define PINMUX_GPIO11__FUNC_CONN_MCU_TRST_B (MTK_PIN_NO(11) | 7) + +#define PINMUX_GPIO12__FUNC_GPIO12 (MTK_PIN_NO(12) | 0) +#define PINMUX_GPIO12__FUNC_I2S0_LRCK (MTK_PIN_NO(12) | 1) +#define PINMUX_GPIO12__FUNC_SPI4_MO_A (MTK_PIN_NO(12) | 2) +#define PINMUX_GPIO12__FUNC_I2S2_LRCK (MTK_PIN_NO(12) | 3) +#define PINMUX_GPIO12__FUNC_SPM_JTAG_TCK (MTK_PIN_NO(12) | 4) +#define PINMUX_GPIO12__FUNC_SCP_JTAG_TCK (MTK_PIN_NO(12) | 5) +#define PINMUX_GPIO12__FUNC_ADSP_JTAG_TCK (MTK_PIN_NO(12) | 6) +#define PINMUX_GPIO12__FUNC_CONN_MCU_TCK (MTK_PIN_NO(12) | 7) + +#define PINMUX_GPIO13__FUNC_GPIO13 (MTK_PIN_NO(13) | 0) +#define PINMUX_GPIO13__FUNC_I2S0_DI (MTK_PIN_NO(13) | 1) +#define PINMUX_GPIO13__FUNC_SPI4_MI_A (MTK_PIN_NO(13) | 2) +#define PINMUX_GPIO13__FUNC_I2S2_DI (MTK_PIN_NO(13) | 3) +#define PINMUX_GPIO13__FUNC_SPM_JTAG_TDO (MTK_PIN_NO(13) | 4) +#define PINMUX_GPIO13__FUNC_SCP_JTAG_TDO (MTK_PIN_NO(13) | 5) +#define PINMUX_GPIO13__FUNC_ADSP_JTAG_TDO (MTK_PIN_NO(13) | 6) +#define PINMUX_GPIO13__FUNC_CONN_MCU_TDO (MTK_PIN_NO(13) | 7) + +#define PINMUX_GPIO14__FUNC_GPIO14 (MTK_PIN_NO(14) | 0) +#define PINMUX_GPIO14__FUNC_CLKM0 (MTK_PIN_NO(14) | 3) +#define PINMUX_GPIO14__FUNC_SPM_JTAG_TMS (MTK_PIN_NO(14) | 4) +#define PINMUX_GPIO14__FUNC_SCP_JTAG_TMS (MTK_PIN_NO(14) | 5) +#define PINMUX_GPIO14__FUNC_ADSP_JTAG_TMS (MTK_PIN_NO(14) | 6) +#define PINMUX_GPIO14__FUNC_CONN_MCU_TMS (MTK_PIN_NO(14) | 7) + +#define PINMUX_GPIO15__FUNC_GPIO15 (MTK_PIN_NO(15) | 0) +#define PINMUX_GPIO15__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(15) | 1) +#define PINMUX_GPIO15__FUNC_SRCLKENAI1 (MTK_PIN_NO(15) | 2) +#define PINMUX_GPIO15__FUNC_CLKM1 (MTK_PIN_NO(15) | 3) +#define PINMUX_GPIO15__FUNC_PWM0 (MTK_PIN_NO(15) | 4) + +#define PINMUX_GPIO16__FUNC_GPIO16 (MTK_PIN_NO(16) | 0) +#define PINMUX_GPIO16__FUNC_CONN_WIFI_TXD (MTK_PIN_NO(16) | 1) +#define PINMUX_GPIO16__FUNC_SRCLKENAI0 (MTK_PIN_NO(16) | 2) +#define PINMUX_GPIO16__FUNC_CLKM2 (MTK_PIN_NO(16) | 3) +#define PINMUX_GPIO16__FUNC_PWM1 (MTK_PIN_NO(16) | 4) + +#define PINMUX_GPIO17__FUNC_GPIO17 (MTK_PIN_NO(17) | 0) +#define PINMUX_GPIO17__FUNC_CLKM3 (MTK_PIN_NO(17) | 3) +#define PINMUX_GPIO17__FUNC_PWM2 (MTK_PIN_NO(17) | 4) +#define PINMUX_GPIO17__FUNC_DBG_MON_A32 (MTK_PIN_NO(17) | 7) + +#define PINMUX_GPIO18__FUNC_GPIO18 (MTK_PIN_NO(18) | 0) +#define PINMUX_GPIO18__FUNC_CMVREF0 (MTK_PIN_NO(18) | 2) +#define PINMUX_GPIO18__FUNC_SPI2_CLK_B (MTK_PIN_NO(18) | 6) +#define PINMUX_GPIO18__FUNC_DBG_MON_A26 (MTK_PIN_NO(18) | 7) + +#define PINMUX_GPIO19__FUNC_GPIO19 (MTK_PIN_NO(19) | 0) +#define PINMUX_GPIO19__FUNC_CMVREF1 (MTK_PIN_NO(19) | 2) +#define PINMUX_GPIO19__FUNC_ANT_SEL3 (MTK_PIN_NO(19) | 5) +#define PINMUX_GPIO19__FUNC_SPI2_CSB_B (MTK_PIN_NO(19) | 6) +#define PINMUX_GPIO19__FUNC_DBG_MON_A2 (MTK_PIN_NO(19) | 7) + +#define PINMUX_GPIO20__FUNC_GPIO20 (MTK_PIN_NO(20) | 0) +#define PINMUX_GPIO20__FUNC_CMVREF2 (MTK_PIN_NO(20) | 2) +#define PINMUX_GPIO20__FUNC_ANT_SEL4 (MTK_PIN_NO(20) | 5) +#define PINMUX_GPIO20__FUNC_SPI2_MO_B (MTK_PIN_NO(20) | 6) +#define PINMUX_GPIO20__FUNC_DBG_MON_A3 (MTK_PIN_NO(20) | 7) + +#define PINMUX_GPIO21__FUNC_GPIO21 (MTK_PIN_NO(21) | 0) +#define PINMUX_GPIO21__FUNC_I2S0_MCK (MTK_PIN_NO(21) | 1) +#define PINMUX_GPIO21__FUNC_I2S1_MCK (MTK_PIN_NO(21) | 2) +#define PINMUX_GPIO21__FUNC_I2S3_MCK (MTK_PIN_NO(21) | 3) +#define PINMUX_GPIO21__FUNC_ANT_SEL5 (MTK_PIN_NO(21) | 5) +#define PINMUX_GPIO21__FUNC_SPI2_MI_B (MTK_PIN_NO(21) | 6) +#define PINMUX_GPIO21__FUNC_DBG_MON_A4 (MTK_PIN_NO(21) | 7) + +#define PINMUX_GPIO22__FUNC_GPIO22 (MTK_PIN_NO(22) | 0) +#define PINMUX_GPIO22__FUNC_I2S0_BCK (MTK_PIN_NO(22) | 1) +#define PINMUX_GPIO22__FUNC_I2S1_BCK (MTK_PIN_NO(22) | 2) +#define PINMUX_GPIO22__FUNC_I2S3_BCK (MTK_PIN_NO(22) | 3) +#define PINMUX_GPIO22__FUNC_TDM_RX_LRCK (MTK_PIN_NO(22) | 4) +#define PINMUX_GPIO22__FUNC_ANT_SEL6 (MTK_PIN_NO(22) | 5) +#define PINMUX_GPIO22__FUNC_DBG_MON_A5 (MTK_PIN_NO(22) | 7) + +#define PINMUX_GPIO23__FUNC_GPIO23 (MTK_PIN_NO(23) | 0) +#define PINMUX_GPIO23__FUNC_I2S0_LRCK (MTK_PIN_NO(23) | 1) +#define PINMUX_GPIO23__FUNC_I2S1_LRCK (MTK_PIN_NO(23) | 2) +#define PINMUX_GPIO23__FUNC_I2S3_LRCK (MTK_PIN_NO(23) | 3) +#define PINMUX_GPIO23__FUNC_TDM_RX_BCK (MTK_PIN_NO(23) | 4) +#define PINMUX_GPIO23__FUNC_ANT_SEL7 (MTK_PIN_NO(23) | 5) +#define PINMUX_GPIO23__FUNC_DBG_MON_A6 (MTK_PIN_NO(23) | 7) + +#define PINMUX_GPIO24__FUNC_GPIO24 (MTK_PIN_NO(24) | 0) +#define PINMUX_GPIO24__FUNC_I2S0_DI (MTK_PIN_NO(24) | 1) +#define PINMUX_GPIO24__FUNC_I2S1_DO (MTK_PIN_NO(24) | 2) +#define PINMUX_GPIO24__FUNC_I2S3_DO (MTK_PIN_NO(24) | 3) +#define PINMUX_GPIO24__FUNC_TDM_RX_MCK (MTK_PIN_NO(24) | 4) +#define PINMUX_GPIO24__FUNC_DBG_MON_A7 (MTK_PIN_NO(24) | 7) + +#define PINMUX_GPIO25__FUNC_GPIO25 (MTK_PIN_NO(25) | 0) +#define PINMUX_GPIO25__FUNC_I2S2_MCK (MTK_PIN_NO(25) | 1) +#define PINMUX_GPIO25__FUNC_PCM_CLK (MTK_PIN_NO(25) | 2) +#define PINMUX_GPIO25__FUNC_SPI4_CLK_B (MTK_PIN_NO(25) | 3) +#define PINMUX_GPIO25__FUNC_TDM_RX_DATA0 (MTK_PIN_NO(25) | 4) +#define PINMUX_GPIO25__FUNC_DBG_MON_A8 (MTK_PIN_NO(25) | 7) + +#define PINMUX_GPIO26__FUNC_GPIO26 (MTK_PIN_NO(26) | 0) +#define PINMUX_GPIO26__FUNC_I2S2_BCK (MTK_PIN_NO(26) | 1) +#define PINMUX_GPIO26__FUNC_PCM_SYNC (MTK_PIN_NO(26) | 2) +#define PINMUX_GPIO26__FUNC_SPI4_CSB_B (MTK_PIN_NO(26) | 3) +#define PINMUX_GPIO26__FUNC_TDM_RX_DATA1 (MTK_PIN_NO(26) | 4) +#define PINMUX_GPIO26__FUNC_DBG_MON_A9 (MTK_PIN_NO(26) | 7) + +#define PINMUX_GPIO27__FUNC_GPIO27 (MTK_PIN_NO(27) | 0) +#define PINMUX_GPIO27__FUNC_I2S2_LRCK (MTK_PIN_NO(27) | 1) +#define PINMUX_GPIO27__FUNC_PCM_DI (MTK_PIN_NO(27) | 2) +#define PINMUX_GPIO27__FUNC_SPI4_MO_B (MTK_PIN_NO(27) | 3) +#define PINMUX_GPIO27__FUNC_TDM_RX_DATA2 (MTK_PIN_NO(27) | 4) +#define PINMUX_GPIO27__FUNC_DBG_MON_A10 (MTK_PIN_NO(27) | 7) + +#define PINMUX_GPIO28__FUNC_GPIO28 (MTK_PIN_NO(28) | 0) +#define PINMUX_GPIO28__FUNC_I2S2_DI (MTK_PIN_NO(28) | 1) +#define PINMUX_GPIO28__FUNC_PCM_DO (MTK_PIN_NO(28) | 2) +#define PINMUX_GPIO28__FUNC_SPI4_MI_B (MTK_PIN_NO(28) | 3) +#define PINMUX_GPIO28__FUNC_TDM_RX_DATA3 (MTK_PIN_NO(28) | 4) + +#define PINMUX_GPIO29__FUNC_GPIO29 (MTK_PIN_NO(29) | 0) +#define PINMUX_GPIO29__FUNC_ANT_SEL0 (MTK_PIN_NO(29) | 1) +#define PINMUX_GPIO29__FUNC_GPS_L1_ELNA_EN (MTK_PIN_NO(29) | 2) + +#define PINMUX_GPIO30__FUNC_GPIO30 (MTK_PIN_NO(30) | 0) +#define PINMUX_GPIO30__FUNC_ANT_SEL1 (MTK_PIN_NO(30) | 1) + +#define PINMUX_GPIO31__FUNC_GPIO31 (MTK_PIN_NO(31) | 0) +#define PINMUX_GPIO31__FUNC_ANT_SEL2 (MTK_PIN_NO(31) | 1) +#define PINMUX_GPIO31__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(31) | 2) +#define PINMUX_GPIO31__FUNC_SRCLKENAI1 (MTK_PIN_NO(31) | 3) + +#define PINMUX_GPIO32__FUNC_GPIO32 (MTK_PIN_NO(32) | 0) +#define PINMUX_GPIO32__FUNC_URXD0 (MTK_PIN_NO(32) | 1) +#define PINMUX_GPIO32__FUNC_UTXD0 (MTK_PIN_NO(32) | 2) +#define PINMUX_GPIO32__FUNC_ADSP_UART_RX (MTK_PIN_NO(32) | 3) +#define PINMUX_GPIO32__FUNC_TP_URXD1_AO (MTK_PIN_NO(32) | 4) + +#define PINMUX_GPIO33__FUNC_GPIO33 (MTK_PIN_NO(33) | 0) +#define PINMUX_GPIO33__FUNC_UTXD0 (MTK_PIN_NO(33) | 1) +#define PINMUX_GPIO33__FUNC_URXD0 (MTK_PIN_NO(33) | 2) +#define PINMUX_GPIO33__FUNC_ADSP_UART_TX (MTK_PIN_NO(33) | 3) +#define PINMUX_GPIO33__FUNC_TP_UTXD1_AO (MTK_PIN_NO(33) | 4) + +#define PINMUX_GPIO34__FUNC_GPIO34 (MTK_PIN_NO(34) | 0) +#define PINMUX_GPIO34__FUNC_URXD1 (MTK_PIN_NO(34) | 1) +#define PINMUX_GPIO34__FUNC_TP_URXD2_AO (MTK_PIN_NO(34) | 2) +#define PINMUX_GPIO34__FUNC_SSPM_URXD_AO (MTK_PIN_NO(34) | 3) +#define PINMUX_GPIO34__FUNC_ADSP_UART_RX (MTK_PIN_NO(34) | 4) +#define PINMUX_GPIO34__FUNC_CONN_UART0_RXD (MTK_PIN_NO(34) | 5) + +#define PINMUX_GPIO35__FUNC_GPIO35 (MTK_PIN_NO(35) | 0) +#define PINMUX_GPIO35__FUNC_UTXD1 (MTK_PIN_NO(35) | 1) +#define PINMUX_GPIO35__FUNC_TP_UTXD2_AO (MTK_PIN_NO(35) | 2) +#define PINMUX_GPIO35__FUNC_SSPM_UTXD_AO (MTK_PIN_NO(35) | 3) +#define PINMUX_GPIO35__FUNC_ADSP_UART_TX (MTK_PIN_NO(35) | 4) +#define PINMUX_GPIO35__FUNC_CONN_UART0_TXD (MTK_PIN_NO(35) | 5) +#define PINMUX_GPIO35__FUNC_CONN_WIFI_TXD (MTK_PIN_NO(35) | 6) + +#define PINMUX_GPIO36__FUNC_GPIO36 (MTK_PIN_NO(36) | 0) +#define PINMUX_GPIO36__FUNC_SPI0_CLK_A (MTK_PIN_NO(36) | 1) +#define PINMUX_GPIO36__FUNC_CLKM0 (MTK_PIN_NO(36) | 2) +#define PINMUX_GPIO36__FUNC_SCP_SPI0_CK (MTK_PIN_NO(36) | 4) +#define PINMUX_GPIO36__FUNC_SPINOR_CK (MTK_PIN_NO(36) | 5) +#define PINMUX_GPIO36__FUNC_DBG_MON_A11 (MTK_PIN_NO(36) | 7) + +#define PINMUX_GPIO37__FUNC_GPIO37 (MTK_PIN_NO(37) | 0) +#define PINMUX_GPIO37__FUNC_SPI0_CSB_A (MTK_PIN_NO(37) | 1) +#define PINMUX_GPIO37__FUNC_CLKM1 (MTK_PIN_NO(37) | 2) +#define PINMUX_GPIO37__FUNC_PWM0 (MTK_PIN_NO(37) | 3) +#define PINMUX_GPIO37__FUNC_SCP_SPI0_CS (MTK_PIN_NO(37) | 4) +#define PINMUX_GPIO37__FUNC_SPINOR_CS (MTK_PIN_NO(37) | 5) +#define PINMUX_GPIO37__FUNC_DBG_MON_A12 (MTK_PIN_NO(37) | 7) + +#define PINMUX_GPIO38__FUNC_GPIO38 (MTK_PIN_NO(38) | 0) +#define PINMUX_GPIO38__FUNC_SPI0_MO_A (MTK_PIN_NO(38) | 1) +#define PINMUX_GPIO38__FUNC_CLKM2 (MTK_PIN_NO(38) | 2) +#define PINMUX_GPIO38__FUNC_PWM1 (MTK_PIN_NO(38) | 3) +#define PINMUX_GPIO38__FUNC_SCP_SPI0_MO (MTK_PIN_NO(38) | 4) +#define PINMUX_GPIO38__FUNC_SPINOR_IO0 (MTK_PIN_NO(38) | 5) +#define PINMUX_GPIO38__FUNC_DBG_MON_A13 (MTK_PIN_NO(38) | 7) + +#define PINMUX_GPIO39__FUNC_GPIO39 (MTK_PIN_NO(39) | 0) +#define PINMUX_GPIO39__FUNC_SPI0_MI_A (MTK_PIN_NO(39) | 1) +#define PINMUX_GPIO39__FUNC_CLKM3 (MTK_PIN_NO(39) | 2) +#define PINMUX_GPIO39__FUNC_PWM2 (MTK_PIN_NO(39) | 3) +#define PINMUX_GPIO39__FUNC_SCP_SPI0_MI (MTK_PIN_NO(39) | 4) +#define PINMUX_GPIO39__FUNC_SPINOR_IO1 (MTK_PIN_NO(39) | 5) +#define PINMUX_GPIO39__FUNC_DBG_MON_A14 (MTK_PIN_NO(39) | 7) + +#define PINMUX_GPIO40__FUNC_GPIO40 (MTK_PIN_NO(40) | 0) +#define PINMUX_GPIO40__FUNC_SPI1_CLK_A (MTK_PIN_NO(40) | 1) +#define PINMUX_GPIO40__FUNC_SCP_SPI1_CK (MTK_PIN_NO(40) | 2) +#define PINMUX_GPIO40__FUNC_UCTS0 (MTK_PIN_NO(40) | 4) +#define PINMUX_GPIO40__FUNC_SPINOR_IO2 (MTK_PIN_NO(40) | 5) +#define PINMUX_GPIO40__FUNC_TP_UCTS1_AO (MTK_PIN_NO(40) | 6) +#define PINMUX_GPIO40__FUNC_DBG_MON_A15 (MTK_PIN_NO(40) | 7) + +#define PINMUX_GPIO41__FUNC_GPIO41 (MTK_PIN_NO(41) | 0) +#define PINMUX_GPIO41__FUNC_SPI1_CSB_A (MTK_PIN_NO(41) | 1) +#define PINMUX_GPIO41__FUNC_SCP_SPI1_CS (MTK_PIN_NO(41) | 2) +#define PINMUX_GPIO41__FUNC_PWM0 (MTK_PIN_NO(41) | 3) +#define PINMUX_GPIO41__FUNC_URTS0 (MTK_PIN_NO(41) | 4) +#define PINMUX_GPIO41__FUNC_SPINOR_IO3 (MTK_PIN_NO(41) | 5) +#define PINMUX_GPIO41__FUNC_TP_URTS1_AO (MTK_PIN_NO(41) | 6) +#define PINMUX_GPIO41__FUNC_DBG_MON_A16 (MTK_PIN_NO(41) | 7) + +#define PINMUX_GPIO42__FUNC_GPIO42 (MTK_PIN_NO(42) | 0) +#define PINMUX_GPIO42__FUNC_SPI1_MO_A (MTK_PIN_NO(42) | 1) +#define PINMUX_GPIO42__FUNC_SCP_SPI1_MO (MTK_PIN_NO(42) | 2) +#define PINMUX_GPIO42__FUNC_PWM1 (MTK_PIN_NO(42) | 3) +#define PINMUX_GPIO42__FUNC_UCTS1 (MTK_PIN_NO(42) | 4) +#define PINMUX_GPIO42__FUNC_TP_UCTS2_AO (MTK_PIN_NO(42) | 6) +#define PINMUX_GPIO42__FUNC_DBG_MON_A17 (MTK_PIN_NO(42) | 7) + +#define PINMUX_GPIO43__FUNC_GPIO43 (MTK_PIN_NO(43) | 0) +#define PINMUX_GPIO43__FUNC_SPI1_MI_A (MTK_PIN_NO(43) | 1) +#define PINMUX_GPIO43__FUNC_SCP_SPI1_MI (MTK_PIN_NO(43) | 2) +#define PINMUX_GPIO43__FUNC_PWM2 (MTK_PIN_NO(43) | 3) +#define PINMUX_GPIO43__FUNC_URTS1 (MTK_PIN_NO(43) | 4) +#define PINMUX_GPIO43__FUNC_TP_URTS2_AO (MTK_PIN_NO(43) | 6) +#define PINMUX_GPIO43__FUNC_DBG_MON_A18 (MTK_PIN_NO(43) | 7) + +#define PINMUX_GPIO44__FUNC_GPIO44 (MTK_PIN_NO(44) | 0) +#define PINMUX_GPIO44__FUNC_SPI2_CLK_A (MTK_PIN_NO(44) | 1) +#define PINMUX_GPIO44__FUNC_SCP_SPI0_CK (MTK_PIN_NO(44) | 2) +#define PINMUX_GPIO44__FUNC_DBG_MON_A19 (MTK_PIN_NO(44) | 7) + +#define PINMUX_GPIO45__FUNC_GPIO45 (MTK_PIN_NO(45) | 0) +#define PINMUX_GPIO45__FUNC_SPI2_CSB_A (MTK_PIN_NO(45) | 1) +#define PINMUX_GPIO45__FUNC_SCP_SPI0_CS (MTK_PIN_NO(45) | 2) +#define PINMUX_GPIO45__FUNC_DBG_MON_A20 (MTK_PIN_NO(45) | 7) + +#define PINMUX_GPIO46__FUNC_GPIO46 (MTK_PIN_NO(46) | 0) +#define PINMUX_GPIO46__FUNC_SPI2_MO_A (MTK_PIN_NO(46) | 1) +#define PINMUX_GPIO46__FUNC_SCP_SPI0_MO (MTK_PIN_NO(46) | 2) +#define PINMUX_GPIO46__FUNC_DBG_MON_A21 (MTK_PIN_NO(46) | 7) + +#define PINMUX_GPIO47__FUNC_GPIO47 (MTK_PIN_NO(47) | 0) +#define PINMUX_GPIO47__FUNC_SPI2_MI_A (MTK_PIN_NO(47) | 1) +#define PINMUX_GPIO47__FUNC_SCP_SPI0_MI (MTK_PIN_NO(47) | 2) +#define PINMUX_GPIO47__FUNC_DBG_MON_A22 (MTK_PIN_NO(47) | 7) + +#define PINMUX_GPIO48__FUNC_GPIO48 (MTK_PIN_NO(48) | 0) +#define PINMUX_GPIO48__FUNC_SPI3_CLK (MTK_PIN_NO(48) | 1) +#define PINMUX_GPIO48__FUNC_TP_URXD1_AO (MTK_PIN_NO(48) | 2) +#define PINMUX_GPIO48__FUNC_TP_URXD2_AO (MTK_PIN_NO(48) | 3) +#define PINMUX_GPIO48__FUNC_URXD1 (MTK_PIN_NO(48) | 4) +#define PINMUX_GPIO48__FUNC_I2S2_MCK (MTK_PIN_NO(48) | 5) +#define PINMUX_GPIO48__FUNC_SCP_SPI0_CK (MTK_PIN_NO(48) | 6) + +#define PINMUX_GPIO49__FUNC_GPIO49 (MTK_PIN_NO(49) | 0) +#define PINMUX_GPIO49__FUNC_SPI3_CSB (MTK_PIN_NO(49) | 1) +#define PINMUX_GPIO49__FUNC_TP_UTXD1_AO (MTK_PIN_NO(49) | 2) +#define PINMUX_GPIO49__FUNC_TP_UTXD2_AO (MTK_PIN_NO(49) | 3) +#define PINMUX_GPIO49__FUNC_UTXD1 (MTK_PIN_NO(49) | 4) +#define PINMUX_GPIO49__FUNC_I2S2_BCK (MTK_PIN_NO(49) | 5) +#define PINMUX_GPIO49__FUNC_SCP_SPI0_CS (MTK_PIN_NO(49) | 6) + +#define PINMUX_GPIO50__FUNC_GPIO50 (MTK_PIN_NO(50) | 0) +#define PINMUX_GPIO50__FUNC_SPI3_MO (MTK_PIN_NO(50) | 1) +#define PINMUX_GPIO50__FUNC_I2S2_LRCK (MTK_PIN_NO(50) | 5) +#define PINMUX_GPIO50__FUNC_SCP_SPI0_MO (MTK_PIN_NO(50) | 6) + +#define PINMUX_GPIO51__FUNC_GPIO51 (MTK_PIN_NO(51) | 0) +#define PINMUX_GPIO51__FUNC_SPI3_MI (MTK_PIN_NO(51) | 1) +#define PINMUX_GPIO51__FUNC_I2S2_DI (MTK_PIN_NO(51) | 5) +#define PINMUX_GPIO51__FUNC_SCP_SPI0_MI (MTK_PIN_NO(51) | 6) + +#define PINMUX_GPIO52__FUNC_GPIO52 (MTK_PIN_NO(52) | 0) +#define PINMUX_GPIO52__FUNC_SPI5_CLK (MTK_PIN_NO(52) | 1) +#define PINMUX_GPIO52__FUNC_I2S2_MCK (MTK_PIN_NO(52) | 2) +#define PINMUX_GPIO52__FUNC_I2S1_MCK (MTK_PIN_NO(52) | 3) +#define PINMUX_GPIO52__FUNC_SCP_SPI1_CK (MTK_PIN_NO(52) | 4) +#define PINMUX_GPIO52__FUNC_LVTS_26M (MTK_PIN_NO(52) | 5) +#define PINMUX_GPIO52__FUNC_DFD_TCK_XI (MTK_PIN_NO(52) | 6) +#define PINMUX_GPIO52__FUNC_DBG_MON_B30 (MTK_PIN_NO(52) | 7) + +#define PINMUX_GPIO53__FUNC_GPIO53 (MTK_PIN_NO(53) | 0) +#define PINMUX_GPIO53__FUNC_SPI5_CSB (MTK_PIN_NO(53) | 1) +#define PINMUX_GPIO53__FUNC_I2S2_BCK (MTK_PIN_NO(53) | 2) +#define PINMUX_GPIO53__FUNC_I2S1_BCK (MTK_PIN_NO(53) | 3) +#define PINMUX_GPIO53__FUNC_SCP_SPI1_CS (MTK_PIN_NO(53) | 4) +#define PINMUX_GPIO53__FUNC_LVTS_FOUT (MTK_PIN_NO(53) | 5) +#define PINMUX_GPIO53__FUNC_DFD_TDI (MTK_PIN_NO(53) | 6) +#define PINMUX_GPIO53__FUNC_DBG_MON_B31 (MTK_PIN_NO(53) | 7) + +#define PINMUX_GPIO54__FUNC_GPIO54 (MTK_PIN_NO(54) | 0) +#define PINMUX_GPIO54__FUNC_SPI5_MO (MTK_PIN_NO(54) | 1) +#define PINMUX_GPIO54__FUNC_I2S2_LRCK (MTK_PIN_NO(54) | 2) +#define PINMUX_GPIO54__FUNC_I2S1_LRCK (MTK_PIN_NO(54) | 3) +#define PINMUX_GPIO54__FUNC_SCP_SPI1_MO (MTK_PIN_NO(54) | 4) +#define PINMUX_GPIO54__FUNC_LVTS_SCK (MTK_PIN_NO(54) | 5) +#define PINMUX_GPIO54__FUNC_DFD_TDO (MTK_PIN_NO(54) | 6) +#define PINMUX_GPIO54__FUNC_DBG_MON_A1 (MTK_PIN_NO(54) | 7) + +#define PINMUX_GPIO55__FUNC_GPIO55 (MTK_PIN_NO(55) | 0) +#define PINMUX_GPIO55__FUNC_SPI5_MI (MTK_PIN_NO(55) | 1) +#define PINMUX_GPIO55__FUNC_I2S2_DI (MTK_PIN_NO(55) | 2) +#define PINMUX_GPIO55__FUNC_I2S1_DO (MTK_PIN_NO(55) | 3) +#define PINMUX_GPIO55__FUNC_SCP_SPI1_MI (MTK_PIN_NO(55) | 4) +#define PINMUX_GPIO55__FUNC_LVTS_SDO (MTK_PIN_NO(55) | 5) +#define PINMUX_GPIO55__FUNC_DFD_TMS (MTK_PIN_NO(55) | 6) +#define PINMUX_GPIO55__FUNC_DBG_MON_B32 (MTK_PIN_NO(55) | 7) + +#define PINMUX_GPIO56__FUNC_GPIO56 (MTK_PIN_NO(56) | 0) +#define PINMUX_GPIO56__FUNC_I2S1_DO (MTK_PIN_NO(56) | 1) +#define PINMUX_GPIO56__FUNC_I2S3_DO (MTK_PIN_NO(56) | 2) +#define PINMUX_GPIO56__FUNC_DBG_MON_A23 (MTK_PIN_NO(56) | 7) + +#define PINMUX_GPIO57__FUNC_GPIO57 (MTK_PIN_NO(57) | 0) +#define PINMUX_GPIO57__FUNC_I2S1_BCK (MTK_PIN_NO(57) | 1) +#define PINMUX_GPIO57__FUNC_I2S3_BCK (MTK_PIN_NO(57) | 2) +#define PINMUX_GPIO57__FUNC_DBG_MON_A24 (MTK_PIN_NO(57) | 7) + +#define PINMUX_GPIO58__FUNC_GPIO58 (MTK_PIN_NO(58) | 0) +#define PINMUX_GPIO58__FUNC_I2S1_LRCK (MTK_PIN_NO(58) | 1) +#define PINMUX_GPIO58__FUNC_I2S3_LRCK (MTK_PIN_NO(58) | 2) +#define PINMUX_GPIO58__FUNC_DBG_MON_A25 (MTK_PIN_NO(58) | 7) + +#define PINMUX_GPIO59__FUNC_GPIO59 (MTK_PIN_NO(59) | 0) +#define PINMUX_GPIO59__FUNC_I2S1_MCK (MTK_PIN_NO(59) | 1) +#define PINMUX_GPIO59__FUNC_I2S3_MCK (MTK_PIN_NO(59) | 2) +#define PINMUX_GPIO59__FUNC_DBG_MON_A27 (MTK_PIN_NO(59) | 7) + +#define PINMUX_GPIO60__FUNC_GPIO60 (MTK_PIN_NO(60) | 0) +#define PINMUX_GPIO60__FUNC_TDM_RX_LRCK (MTK_PIN_NO(60) | 1) +#define PINMUX_GPIO60__FUNC_ANT_SEL3 (MTK_PIN_NO(60) | 2) +#define PINMUX_GPIO60__FUNC_CONN_MCU_DBGACK_N (MTK_PIN_NO(60) | 5) + +#define PINMUX_GPIO61__FUNC_GPIO61 (MTK_PIN_NO(61) | 0) +#define PINMUX_GPIO61__FUNC_TDM_RX_BCK (MTK_PIN_NO(61) | 1) +#define PINMUX_GPIO61__FUNC_ANT_SEL4 (MTK_PIN_NO(61) | 2) +#define PINMUX_GPIO61__FUNC_SPINOR_CK (MTK_PIN_NO(61) | 4) +#define PINMUX_GPIO61__FUNC_CONN_MCU_DBGI_N (MTK_PIN_NO(61) | 5) + +#define PINMUX_GPIO62__FUNC_GPIO62 (MTK_PIN_NO(62) | 0) +#define PINMUX_GPIO62__FUNC_TDM_RX_MCK (MTK_PIN_NO(62) | 1) +#define PINMUX_GPIO62__FUNC_ANT_SEL5 (MTK_PIN_NO(62) | 2) +#define PINMUX_GPIO62__FUNC_SPINOR_CS (MTK_PIN_NO(62) | 4) +#define PINMUX_GPIO62__FUNC_CONN_MCU_TDI (MTK_PIN_NO(62) | 5) + +#define PINMUX_GPIO63__FUNC_GPIO63 (MTK_PIN_NO(63) | 0) +#define PINMUX_GPIO63__FUNC_TDM_RX_DATA0 (MTK_PIN_NO(63) | 1) +#define PINMUX_GPIO63__FUNC_ANT_SEL6 (MTK_PIN_NO(63) | 2) +#define PINMUX_GPIO63__FUNC_SPINOR_IO0 (MTK_PIN_NO(63) | 4) +#define PINMUX_GPIO63__FUNC_CONN_MCU_TRST_B (MTK_PIN_NO(63) | 5) + +#define PINMUX_GPIO64__FUNC_GPIO64 (MTK_PIN_NO(64) | 0) +#define PINMUX_GPIO64__FUNC_TDM_RX_DATA1 (MTK_PIN_NO(64) | 1) +#define PINMUX_GPIO64__FUNC_ANT_SEL7 (MTK_PIN_NO(64) | 2) +#define PINMUX_GPIO64__FUNC_PWM0 (MTK_PIN_NO(64) | 3) +#define PINMUX_GPIO64__FUNC_SPINOR_IO1 (MTK_PIN_NO(64) | 4) +#define PINMUX_GPIO64__FUNC_CONN_MCU_TCK (MTK_PIN_NO(64) | 5) + +#define PINMUX_GPIO65__FUNC_GPIO65 (MTK_PIN_NO(65) | 0) +#define PINMUX_GPIO65__FUNC_TDM_RX_DATA2 (MTK_PIN_NO(65) | 1) +#define PINMUX_GPIO65__FUNC_UCTS0 (MTK_PIN_NO(65) | 2) +#define PINMUX_GPIO65__FUNC_PWM1 (MTK_PIN_NO(65) | 3) +#define PINMUX_GPIO65__FUNC_SPINOR_IO2 (MTK_PIN_NO(65) | 4) +#define PINMUX_GPIO65__FUNC_CONN_MCU_TDO (MTK_PIN_NO(65) | 5) +#define PINMUX_GPIO65__FUNC_TP_UCTS1_AO (MTK_PIN_NO(65) | 6) +#define PINMUX_GPIO65__FUNC_TP_UCTS2_AO (MTK_PIN_NO(65) | 7) + +#define PINMUX_GPIO66__FUNC_GPIO66 (MTK_PIN_NO(66) | 0) +#define PINMUX_GPIO66__FUNC_TDM_RX_DATA3 (MTK_PIN_NO(66) | 1) +#define PINMUX_GPIO66__FUNC_URTS0 (MTK_PIN_NO(66) | 2) +#define PINMUX_GPIO66__FUNC_PWM2 (MTK_PIN_NO(66) | 3) +#define PINMUX_GPIO66__FUNC_SPINOR_IO3 (MTK_PIN_NO(66) | 4) +#define PINMUX_GPIO66__FUNC_CONN_MCU_TMS (MTK_PIN_NO(66) | 5) +#define PINMUX_GPIO66__FUNC_TP_URTS1_AO (MTK_PIN_NO(66) | 6) +#define PINMUX_GPIO66__FUNC_TP_URTS2_AO (MTK_PIN_NO(66) | 7) + +#define PINMUX_GPIO67__FUNC_GPIO67 (MTK_PIN_NO(67) | 0) +#define PINMUX_GPIO67__FUNC_MSDC0_DSL (MTK_PIN_NO(67) | 1) + +#define PINMUX_GPIO68__FUNC_GPIO68 (MTK_PIN_NO(68) | 0) +#define PINMUX_GPIO68__FUNC_MSDC0_CLK (MTK_PIN_NO(68) | 1) + +#define PINMUX_GPIO69__FUNC_GPIO69 (MTK_PIN_NO(69) | 0) +#define PINMUX_GPIO69__FUNC_MSDC0_CMD (MTK_PIN_NO(69) | 1) + +#define PINMUX_GPIO70__FUNC_GPIO70 (MTK_PIN_NO(70) | 0) +#define PINMUX_GPIO70__FUNC_MSDC0_RSTB (MTK_PIN_NO(70) | 1) + +#define PINMUX_GPIO71__FUNC_GPIO71 (MTK_PIN_NO(71) | 0) +#define PINMUX_GPIO71__FUNC_MSDC0_DAT0 (MTK_PIN_NO(71) | 1) + +#define PINMUX_GPIO72__FUNC_GPIO72 (MTK_PIN_NO(72) | 0) +#define PINMUX_GPIO72__FUNC_MSDC0_DAT1 (MTK_PIN_NO(72) | 1) + +#define PINMUX_GPIO73__FUNC_GPIO73 (MTK_PIN_NO(73) | 0) +#define PINMUX_GPIO73__FUNC_MSDC0_DAT2 (MTK_PIN_NO(73) | 1) + +#define PINMUX_GPIO74__FUNC_GPIO74 (MTK_PIN_NO(74) | 0) +#define PINMUX_GPIO74__FUNC_MSDC0_DAT3 (MTK_PIN_NO(74) | 1) + +#define PINMUX_GPIO75__FUNC_GPIO75 (MTK_PIN_NO(75) | 0) +#define PINMUX_GPIO75__FUNC_MSDC0_DAT4 (MTK_PIN_NO(75) | 1) + +#define PINMUX_GPIO76__FUNC_GPIO76 (MTK_PIN_NO(76) | 0) +#define PINMUX_GPIO76__FUNC_MSDC0_DAT5 (MTK_PIN_NO(76) | 1) + +#define PINMUX_GPIO77__FUNC_GPIO77 (MTK_PIN_NO(77) | 0) +#define PINMUX_GPIO77__FUNC_MSDC0_DAT6 (MTK_PIN_NO(77) | 1) + +#define PINMUX_GPIO78__FUNC_GPIO78 (MTK_PIN_NO(78) | 0) +#define PINMUX_GPIO78__FUNC_MSDC0_DAT7 (MTK_PIN_NO(78) | 1) + +#define PINMUX_GPIO79__FUNC_GPIO79 (MTK_PIN_NO(79) | 0) +#define PINMUX_GPIO79__FUNC_KPCOL0 (MTK_PIN_NO(79) | 1) + +#define PINMUX_GPIO80__FUNC_GPIO80 (MTK_PIN_NO(80) | 0) +#define PINMUX_GPIO80__FUNC_KPCOL1 (MTK_PIN_NO(80) | 1) +#define PINMUX_GPIO80__FUNC_GPS_L1_ELNA_EN (MTK_PIN_NO(80) | 2) +#define PINMUX_GPIO80__FUNC_PWM0 (MTK_PIN_NO(80) | 3) +#define PINMUX_GPIO80__FUNC_CLKM0 (MTK_PIN_NO(80) | 4) + +#define PINMUX_GPIO81__FUNC_GPIO81 (MTK_PIN_NO(81) | 0) +#define PINMUX_GPIO81__FUNC_KPROW0 (MTK_PIN_NO(81) | 1) +#define PINMUX_GPIO81__FUNC_PWM1 (MTK_PIN_NO(81) | 3) +#define PINMUX_GPIO81__FUNC_CLKM1 (MTK_PIN_NO(81) | 4) + +#define PINMUX_GPIO82__FUNC_GPIO82 (MTK_PIN_NO(82) | 0) +#define PINMUX_GPIO82__FUNC_KPROW1 (MTK_PIN_NO(82) | 1) +#define PINMUX_GPIO82__FUNC_PWM2 (MTK_PIN_NO(82) | 3) +#define PINMUX_GPIO82__FUNC_CLKM2 (MTK_PIN_NO(82) | 4) + +#define PINMUX_GPIO83__FUNC_GPIO83 (MTK_PIN_NO(83) | 0) +#define PINMUX_GPIO83__FUNC_AP_GOOD (MTK_PIN_NO(83) | 1) +#define PINMUX_GPIO83__FUNC_GPS_PPS (MTK_PIN_NO(83) | 2) +#define PINMUX_GPIO83__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(83) | 4) +#define PINMUX_GPIO83__FUNC_DBG_MON_A28 (MTK_PIN_NO(83) | 7) + +#define PINMUX_GPIO84__FUNC_GPIO84 (MTK_PIN_NO(84) | 0) +#define PINMUX_GPIO84__FUNC_MSDC1_CLK (MTK_PIN_NO(84) | 1) +#define PINMUX_GPIO84__FUNC_ADSP_JTAG_TCK (MTK_PIN_NO(84) | 2) +#define PINMUX_GPIO84__FUNC_UDI_TCK (MTK_PIN_NO(84) | 4) +#define PINMUX_GPIO84__FUNC_CONN_DSP_JCK (MTK_PIN_NO(84) | 5) +#define PINMUX_GPIO84__FUNC_SSPM_JTAG_TCK (MTK_PIN_NO(84) | 6) +#define PINMUX_GPIO84__FUNC_DFD_TCK_XI (MTK_PIN_NO(84) | 7) + +#define PINMUX_GPIO85__FUNC_GPIO85 (MTK_PIN_NO(85) | 0) +#define PINMUX_GPIO85__FUNC_MSDC1_CMD (MTK_PIN_NO(85) | 1) +#define PINMUX_GPIO85__FUNC_ADSP_JTAG_TMS (MTK_PIN_NO(85) | 2) +#define PINMUX_GPIO85__FUNC_CONN_MCU_AICE_TMSC (MTK_PIN_NO(85) | 3) +#define PINMUX_GPIO85__FUNC_UDI_TMS (MTK_PIN_NO(85) | 4) +#define PINMUX_GPIO85__FUNC_CONN_DSP_JMS (MTK_PIN_NO(85) | 5) +#define PINMUX_GPIO85__FUNC_SSPM_JTAG_TMS (MTK_PIN_NO(85) | 6) +#define PINMUX_GPIO85__FUNC_DFD_TMS (MTK_PIN_NO(85) | 7) + +#define PINMUX_GPIO86__FUNC_GPIO86 (MTK_PIN_NO(86) | 0) +#define PINMUX_GPIO86__FUNC_MSDC1_DAT0 (MTK_PIN_NO(86) | 1) +#define PINMUX_GPIO86__FUNC_ADSP_JTAG_TDI (MTK_PIN_NO(86) | 2) +#define PINMUX_GPIO86__FUNC_UDI_TDI (MTK_PIN_NO(86) | 4) +#define PINMUX_GPIO86__FUNC_CONN_DSP_JDI (MTK_PIN_NO(86) | 5) +#define PINMUX_GPIO86__FUNC_SSPM_JTAG_TDI (MTK_PIN_NO(86) | 6) +#define PINMUX_GPIO86__FUNC_DFD_TDI (MTK_PIN_NO(86) | 7) + +#define PINMUX_GPIO87__FUNC_GPIO87 (MTK_PIN_NO(87) | 0) +#define PINMUX_GPIO87__FUNC_MSDC1_DAT1 (MTK_PIN_NO(87) | 1) +#define PINMUX_GPIO87__FUNC_ADSP_JTAG_TDO (MTK_PIN_NO(87) | 2) +#define PINMUX_GPIO87__FUNC_UDI_TDO (MTK_PIN_NO(87) | 4) +#define PINMUX_GPIO87__FUNC_CONN_DSP_JDO (MTK_PIN_NO(87) | 5) +#define PINMUX_GPIO87__FUNC_SSPM_JTAG_TDO (MTK_PIN_NO(87) | 6) +#define PINMUX_GPIO87__FUNC_DFD_TDO (MTK_PIN_NO(87) | 7) + +#define PINMUX_GPIO88__FUNC_GPIO88 (MTK_PIN_NO(88) | 0) +#define PINMUX_GPIO88__FUNC_MSDC1_DAT2 (MTK_PIN_NO(88) | 1) +#define PINMUX_GPIO88__FUNC_ADSP_JTAG_TRSTN (MTK_PIN_NO(88) | 2) +#define PINMUX_GPIO88__FUNC_CONN_MCU_AICE_TCKC (MTK_PIN_NO(88) | 3) +#define PINMUX_GPIO88__FUNC_UDI_NTRST (MTK_PIN_NO(88) | 4) +#define PINMUX_GPIO88__FUNC_CONN_WIFI_TXD (MTK_PIN_NO(88) | 5) +#define PINMUX_GPIO88__FUNC_SSPM_JTAG_TRSTN (MTK_PIN_NO(88) | 6) + +#define PINMUX_GPIO89__FUNC_GPIO89 (MTK_PIN_NO(89) | 0) +#define PINMUX_GPIO89__FUNC_MSDC1_DAT3 (MTK_PIN_NO(89) | 1) +#define PINMUX_GPIO89__FUNC_CONN_DSP_JINTP (MTK_PIN_NO(89) | 5) + +#define PINMUX_GPIO90__FUNC_GPIO90 (MTK_PIN_NO(90) | 0) +#define PINMUX_GPIO90__FUNC_IDDIG_P0 (MTK_PIN_NO(90) | 1) +#define PINMUX_GPIO90__FUNC_PGD_HV_HSC_PWR4 (MTK_PIN_NO(90) | 4) +#define PINMUX_GPIO90__FUNC_GDU_SUM_TROOP2_2 (MTK_PIN_NO(90) | 5) + +#define PINMUX_GPIO91__FUNC_GPIO91 (MTK_PIN_NO(91) | 0) +#define PINMUX_GPIO91__FUNC_USB_DRVVBUS_P0 (MTK_PIN_NO(91) | 1) +#define PINMUX_GPIO91__FUNC_PGD_HV_HSC_PWR5 (MTK_PIN_NO(91) | 4) +#define PINMUX_GPIO91__FUNC_GDU_TROOPS_DET0 (MTK_PIN_NO(91) | 5) + +#define PINMUX_GPIO92__FUNC_GPIO92 (MTK_PIN_NO(92) | 0) +#define PINMUX_GPIO92__FUNC_VBUS_VALID_P0 (MTK_PIN_NO(92) | 1) +#define PINMUX_GPIO92__FUNC_PGD_DA_EFUSE_RDY (MTK_PIN_NO(92) | 4) +#define PINMUX_GPIO92__FUNC_GDU_TROOPS_DET1 (MTK_PIN_NO(92) | 5) + +#define PINMUX_GPIO93__FUNC_GPIO93 (MTK_PIN_NO(93) | 0) +#define PINMUX_GPIO93__FUNC_IDDIG_P1 (MTK_PIN_NO(93) | 1) +#define PINMUX_GPIO93__FUNC_PWM0 (MTK_PIN_NO(93) | 2) +#define PINMUX_GPIO93__FUNC_CLKM0 (MTK_PIN_NO(93) | 3) +#define PINMUX_GPIO93__FUNC_PGD_DA_EFUSE_RDY_PRE (MTK_PIN_NO(93) | 4) +#define PINMUX_GPIO93__FUNC_GDU_TROOPS_DET2 (MTK_PIN_NO(93) | 5) + +#define PINMUX_GPIO94__FUNC_GPIO94 (MTK_PIN_NO(94) | 0) +#define PINMUX_GPIO94__FUNC_USB_DRVVBUS_P1 (MTK_PIN_NO(94) | 1) +#define PINMUX_GPIO94__FUNC_PWM1 (MTK_PIN_NO(94) | 2) +#define PINMUX_GPIO94__FUNC_CLKM1 (MTK_PIN_NO(94) | 3) +#define PINMUX_GPIO94__FUNC_PGD_DA_PWRGD_RESET (MTK_PIN_NO(94) | 4) + +#define PINMUX_GPIO95__FUNC_GPIO95 (MTK_PIN_NO(95) | 0) +#define PINMUX_GPIO95__FUNC_VBUS_VALID_P1 (MTK_PIN_NO(95) | 1) +#define PINMUX_GPIO95__FUNC_PWM2 (MTK_PIN_NO(95) | 2) +#define PINMUX_GPIO95__FUNC_CLKM2 (MTK_PIN_NO(95) | 3) +#define PINMUX_GPIO95__FUNC_PGD_DA_PWRGD_ENB (MTK_PIN_NO(95) | 4) + +#define PINMUX_GPIO96__FUNC_GPIO96 (MTK_PIN_NO(96) | 0) +#define PINMUX_GPIO96__FUNC_DSI_TE (MTK_PIN_NO(96) | 1) +#define PINMUX_GPIO96__FUNC_DBG_MON_A29 (MTK_PIN_NO(96) | 7) + +#define PINMUX_GPIO97__FUNC_GPIO97 (MTK_PIN_NO(97) | 0) +#define PINMUX_GPIO97__FUNC_DISP_PWM (MTK_PIN_NO(97) | 1) +#define PINMUX_GPIO97__FUNC_DBG_MON_A30 (MTK_PIN_NO(97) | 7) + +#define PINMUX_GPIO98__FUNC_GPIO98 (MTK_PIN_NO(98) | 0) +#define PINMUX_GPIO98__FUNC_LCM_RST (MTK_PIN_NO(98) | 1) + +#define PINMUX_GPIO99__FUNC_GPIO99 (MTK_PIN_NO(99) | 0) +#define PINMUX_GPIO99__FUNC_DPI_PCLK (MTK_PIN_NO(99) | 1) +#define PINMUX_GPIO99__FUNC_GPS_L1_ELNA_EN (MTK_PIN_NO(99) | 2) +#define PINMUX_GPIO99__FUNC_SSPM_JTAG_TCK (MTK_PIN_NO(99) | 3) +#define PINMUX_GPIO99__FUNC_ANT_SEL0 (MTK_PIN_NO(99) | 5) +#define PINMUX_GPIO99__FUNC_TP_GPIO0_AO (MTK_PIN_NO(99) | 6) +#define PINMUX_GPIO99__FUNC_PGD_LV_LSC_PWR0 (MTK_PIN_NO(99) | 7) + +#define PINMUX_GPIO100__FUNC_GPIO100 (MTK_PIN_NO(100) | 0) +#define PINMUX_GPIO100__FUNC_DPI_VSYNC (MTK_PIN_NO(100) | 1) +#define PINMUX_GPIO100__FUNC_KPCOL2 (MTK_PIN_NO(100) | 2) +#define PINMUX_GPIO100__FUNC_SSPM_JTAG_TMS (MTK_PIN_NO(100) | 3) +#define PINMUX_GPIO100__FUNC_ANT_SEL1 (MTK_PIN_NO(100) | 5) +#define PINMUX_GPIO100__FUNC_TP_GPIO1_AO (MTK_PIN_NO(100) | 6) +#define PINMUX_GPIO100__FUNC_PGD_LV_LSC_PWR1 (MTK_PIN_NO(100) | 7) + +#define PINMUX_GPIO101__FUNC_GPIO101 (MTK_PIN_NO(101) | 0) +#define PINMUX_GPIO101__FUNC_DPI_HSYNC (MTK_PIN_NO(101) | 1) +#define PINMUX_GPIO101__FUNC_KPROW2 (MTK_PIN_NO(101) | 2) +#define PINMUX_GPIO101__FUNC_SSPM_JTAG_TDI (MTK_PIN_NO(101) | 3) +#define PINMUX_GPIO101__FUNC_ANT_SEL2 (MTK_PIN_NO(101) | 5) +#define PINMUX_GPIO101__FUNC_TP_GPIO2_AO (MTK_PIN_NO(101) | 6) +#define PINMUX_GPIO101__FUNC_PGD_LV_LSC_PWR2 (MTK_PIN_NO(101) | 7) + +#define PINMUX_GPIO102__FUNC_GPIO102 (MTK_PIN_NO(102) | 0) +#define PINMUX_GPIO102__FUNC_DPI_DE (MTK_PIN_NO(102) | 1) +#define PINMUX_GPIO102__FUNC_SSPM_JTAG_TDO (MTK_PIN_NO(102) | 3) +#define PINMUX_GPIO102__FUNC_ANT_SEL3 (MTK_PIN_NO(102) | 5) +#define PINMUX_GPIO102__FUNC_TP_GPIO3_AO (MTK_PIN_NO(102) | 6) +#define PINMUX_GPIO102__FUNC_PGD_LV_LSC_PWR3 (MTK_PIN_NO(102) | 7) + +#define PINMUX_GPIO103__FUNC_GPIO103 (MTK_PIN_NO(103) | 0) +#define PINMUX_GPIO103__FUNC_DPI_DATA0 (MTK_PIN_NO(103) | 1) +#define PINMUX_GPIO103__FUNC_SSPM_JTAG_TRSTN (MTK_PIN_NO(103) | 3) +#define PINMUX_GPIO103__FUNC_CLKM0 (MTK_PIN_NO(103) | 4) +#define PINMUX_GPIO103__FUNC_ANT_SEL4 (MTK_PIN_NO(103) | 5) +#define PINMUX_GPIO103__FUNC_TP_GPIO4_AO (MTK_PIN_NO(103) | 6) +#define PINMUX_GPIO103__FUNC_PGD_LV_LSC_PWR4 (MTK_PIN_NO(103) | 7) + +#define PINMUX_GPIO104__FUNC_GPIO104 (MTK_PIN_NO(104) | 0) +#define PINMUX_GPIO104__FUNC_DPI_DATA1 (MTK_PIN_NO(104) | 1) +#define PINMUX_GPIO104__FUNC_GPS_PPS (MTK_PIN_NO(104) | 2) +#define PINMUX_GPIO104__FUNC_UCTS2 (MTK_PIN_NO(104) | 3) +#define PINMUX_GPIO104__FUNC_CLKM1 (MTK_PIN_NO(104) | 4) +#define PINMUX_GPIO104__FUNC_ANT_SEL5 (MTK_PIN_NO(104) | 5) +#define PINMUX_GPIO104__FUNC_TP_GPIO5_AO (MTK_PIN_NO(104) | 6) +#define PINMUX_GPIO104__FUNC_PGD_LV_LSC_PWR5 (MTK_PIN_NO(104) | 7) + +#define PINMUX_GPIO105__FUNC_GPIO105 (MTK_PIN_NO(105) | 0) +#define PINMUX_GPIO105__FUNC_DPI_DATA2 (MTK_PIN_NO(105) | 1) +#define PINMUX_GPIO105__FUNC_CONN_TCXOENA_REQ (MTK_PIN_NO(105) | 2) +#define PINMUX_GPIO105__FUNC_URTS2 (MTK_PIN_NO(105) | 3) +#define PINMUX_GPIO105__FUNC_CLKM2 (MTK_PIN_NO(105) | 4) +#define PINMUX_GPIO105__FUNC_ANT_SEL6 (MTK_PIN_NO(105) | 5) +#define PINMUX_GPIO105__FUNC_TP_GPIO6_AO (MTK_PIN_NO(105) | 6) +#define PINMUX_GPIO105__FUNC_PGD_LV_HSC_PWR0 (MTK_PIN_NO(105) | 7) + +#define PINMUX_GPIO106__FUNC_GPIO106 (MTK_PIN_NO(106) | 0) +#define PINMUX_GPIO106__FUNC_DPI_DATA3 (MTK_PIN_NO(106) | 1) +#define PINMUX_GPIO106__FUNC_TP_UTXD1_AO (MTK_PIN_NO(106) | 2) +#define PINMUX_GPIO106__FUNC_UTXD2 (MTK_PIN_NO(106) | 3) +#define PINMUX_GPIO106__FUNC_PWM0 (MTK_PIN_NO(106) | 4) +#define PINMUX_GPIO106__FUNC_ANT_SEL7 (MTK_PIN_NO(106) | 5) +#define PINMUX_GPIO106__FUNC_TP_GPIO7_AO (MTK_PIN_NO(106) | 6) +#define PINMUX_GPIO106__FUNC_PGD_LV_HSC_PWR1 (MTK_PIN_NO(106) | 7) + +#define PINMUX_GPIO107__FUNC_GPIO107 (MTK_PIN_NO(107) | 0) +#define PINMUX_GPIO107__FUNC_DPI_DATA4 (MTK_PIN_NO(107) | 1) +#define PINMUX_GPIO107__FUNC_TP_URXD1_AO (MTK_PIN_NO(107) | 2) +#define PINMUX_GPIO107__FUNC_URXD2 (MTK_PIN_NO(107) | 3) +#define PINMUX_GPIO107__FUNC_PWM1 (MTK_PIN_NO(107) | 4) +#define PINMUX_GPIO107__FUNC_GDU_SUM_TROOP0_0 (MTK_PIN_NO(107) | 6) +#define PINMUX_GPIO107__FUNC_PGD_LV_HSC_PWR2 (MTK_PIN_NO(107) | 7) + +#define PINMUX_GPIO108__FUNC_GPIO108 (MTK_PIN_NO(108) | 0) +#define PINMUX_GPIO108__FUNC_DPI_DATA5 (MTK_PIN_NO(108) | 1) +#define PINMUX_GPIO108__FUNC_TP_UCTS1_AO (MTK_PIN_NO(108) | 2) +#define PINMUX_GPIO108__FUNC_UCTS0 (MTK_PIN_NO(108) | 3) +#define PINMUX_GPIO108__FUNC_PWM2 (MTK_PIN_NO(108) | 4) +#define PINMUX_GPIO108__FUNC_GDU_SUM_TROOP0_1 (MTK_PIN_NO(108) | 6) +#define PINMUX_GPIO108__FUNC_PGD_LV_HSC_PWR3 (MTK_PIN_NO(108) | 7) + +#define PINMUX_GPIO109__FUNC_GPIO109 (MTK_PIN_NO(109) | 0) +#define PINMUX_GPIO109__FUNC_DPI_DATA6 (MTK_PIN_NO(109) | 1) +#define PINMUX_GPIO109__FUNC_TP_URTS1_AO (MTK_PIN_NO(109) | 2) +#define PINMUX_GPIO109__FUNC_URTS0 (MTK_PIN_NO(109) | 3) +#define PINMUX_GPIO109__FUNC_I2S0_DI (MTK_PIN_NO(109) | 4) +#define PINMUX_GPIO109__FUNC_I2S2_DI (MTK_PIN_NO(109) | 5) +#define PINMUX_GPIO109__FUNC_GDU_SUM_TROOP0_2 (MTK_PIN_NO(109) | 6) +#define PINMUX_GPIO109__FUNC_PGD_LV_HSC_PWR4 (MTK_PIN_NO(109) | 7) + +#define PINMUX_GPIO110__FUNC_GPIO110 (MTK_PIN_NO(110) | 0) +#define PINMUX_GPIO110__FUNC_DPI_DATA7 (MTK_PIN_NO(110) | 1) +#define PINMUX_GPIO110__FUNC_TP_UCTS2_AO (MTK_PIN_NO(110) | 2) +#define PINMUX_GPIO110__FUNC_UCTS1 (MTK_PIN_NO(110) | 3) +#define PINMUX_GPIO110__FUNC_I2S3_BCK (MTK_PIN_NO(110) | 4) +#define PINMUX_GPIO110__FUNC_I2S1_BCK (MTK_PIN_NO(110) | 5) +#define PINMUX_GPIO110__FUNC_GDU_SUM_TROOP1_0 (MTK_PIN_NO(110) | 6) +#define PINMUX_GPIO110__FUNC_PGD_LV_HSC_PWR5 (MTK_PIN_NO(110) | 7) + +#define PINMUX_GPIO111__FUNC_GPIO111 (MTK_PIN_NO(111) | 0) +#define PINMUX_GPIO111__FUNC_DPI_DATA8 (MTK_PIN_NO(111) | 1) +#define PINMUX_GPIO111__FUNC_TP_URTS2_AO (MTK_PIN_NO(111) | 2) +#define PINMUX_GPIO111__FUNC_URTS1 (MTK_PIN_NO(111) | 3) +#define PINMUX_GPIO111__FUNC_I2S3_MCK (MTK_PIN_NO(111) | 4) +#define PINMUX_GPIO111__FUNC_I2S1_MCK (MTK_PIN_NO(111) | 5) +#define PINMUX_GPIO111__FUNC_GDU_SUM_TROOP1_1 (MTK_PIN_NO(111) | 6) +#define PINMUX_GPIO111__FUNC_PGD_HV_HSC_PWR0 (MTK_PIN_NO(111) | 7) + +#define PINMUX_GPIO112__FUNC_GPIO112 (MTK_PIN_NO(112) | 0) +#define PINMUX_GPIO112__FUNC_DPI_DATA9 (MTK_PIN_NO(112) | 1) +#define PINMUX_GPIO112__FUNC_TP_URXD2_AO (MTK_PIN_NO(112) | 2) +#define PINMUX_GPIO112__FUNC_URXD1 (MTK_PIN_NO(112) | 3) +#define PINMUX_GPIO112__FUNC_I2S3_LRCK (MTK_PIN_NO(112) | 4) +#define PINMUX_GPIO112__FUNC_I2S1_LRCK (MTK_PIN_NO(112) | 5) +#define PINMUX_GPIO112__FUNC_GDU_SUM_TROOP1_2 (MTK_PIN_NO(112) | 6) +#define PINMUX_GPIO112__FUNC_PGD_HV_HSC_PWR1 (MTK_PIN_NO(112) | 7) + +#define PINMUX_GPIO113__FUNC_GPIO113 (MTK_PIN_NO(113) | 0) +#define PINMUX_GPIO113__FUNC_DPI_DATA10 (MTK_PIN_NO(113) | 1) +#define PINMUX_GPIO113__FUNC_TP_UTXD2_AO (MTK_PIN_NO(113) | 2) +#define PINMUX_GPIO113__FUNC_UTXD1 (MTK_PIN_NO(113) | 3) +#define PINMUX_GPIO113__FUNC_I2S3_DO (MTK_PIN_NO(113) | 4) +#define PINMUX_GPIO113__FUNC_I2S1_DO (MTK_PIN_NO(113) | 5) +#define PINMUX_GPIO113__FUNC_GDU_SUM_TROOP2_0 (MTK_PIN_NO(113) | 6) +#define PINMUX_GPIO113__FUNC_PGD_HV_HSC_PWR2 (MTK_PIN_NO(113) | 7) + +#define PINMUX_GPIO114__FUNC_GPIO114 (MTK_PIN_NO(114) | 0) +#define PINMUX_GPIO114__FUNC_DPI_DATA11 (MTK_PIN_NO(114) | 1) +#define PINMUX_GPIO114__FUNC_GDU_SUM_TROOP2_1 (MTK_PIN_NO(114) | 6) +#define PINMUX_GPIO114__FUNC_PGD_HV_HSC_PWR3 (MTK_PIN_NO(114) | 7) + +#define PINMUX_GPIO115__FUNC_GPIO115 (MTK_PIN_NO(115) | 0) +#define PINMUX_GPIO115__FUNC_PCM_CLK (MTK_PIN_NO(115) | 1) +#define PINMUX_GPIO115__FUNC_I2S0_BCK (MTK_PIN_NO(115) | 2) +#define PINMUX_GPIO115__FUNC_I2S2_BCK (MTK_PIN_NO(115) | 3) + +#define PINMUX_GPIO116__FUNC_GPIO116 (MTK_PIN_NO(116) | 0) +#define PINMUX_GPIO116__FUNC_PCM_SYNC (MTK_PIN_NO(116) | 1) +#define PINMUX_GPIO116__FUNC_I2S0_LRCK (MTK_PIN_NO(116) | 2) +#define PINMUX_GPIO116__FUNC_I2S2_LRCK (MTK_PIN_NO(116) | 3) + +#define PINMUX_GPIO117__FUNC_GPIO117 (MTK_PIN_NO(117) | 0) +#define PINMUX_GPIO117__FUNC_PCM_DI (MTK_PIN_NO(117) | 1) +#define PINMUX_GPIO117__FUNC_I2S0_DI (MTK_PIN_NO(117) | 2) +#define PINMUX_GPIO117__FUNC_I2S2_DI (MTK_PIN_NO(117) | 3) + +#define PINMUX_GPIO118__FUNC_GPIO118 (MTK_PIN_NO(118) | 0) +#define PINMUX_GPIO118__FUNC_PCM_DO (MTK_PIN_NO(118) | 1) +#define PINMUX_GPIO118__FUNC_I2S0_MCK (MTK_PIN_NO(118) | 2) +#define PINMUX_GPIO118__FUNC_I2S2_MCK (MTK_PIN_NO(118) | 3) +#define PINMUX_GPIO118__FUNC_I2S3_DO (MTK_PIN_NO(118) | 4) +#define PINMUX_GPIO118__FUNC_I2S1_DO (MTK_PIN_NO(118) | 5) + +#define PINMUX_GPIO119__FUNC_GPIO119 (MTK_PIN_NO(119) | 0) +#define PINMUX_GPIO119__FUNC_JTMS_SEL1 (MTK_PIN_NO(119) | 1) +#define PINMUX_GPIO119__FUNC_UDI_TMS (MTK_PIN_NO(119) | 2) +#define PINMUX_GPIO119__FUNC_DFD_TMS (MTK_PIN_NO(119) | 3) +#define PINMUX_GPIO119__FUNC_SPM_JTAG_TMS (MTK_PIN_NO(119) | 4) +#define PINMUX_GPIO119__FUNC_SCP_JTAG_TMS (MTK_PIN_NO(119) | 5) +#define PINMUX_GPIO119__FUNC_ADSP_JTAG_TMS (MTK_PIN_NO(119) | 6) + +#define PINMUX_GPIO120__FUNC_GPIO120 (MTK_PIN_NO(120) | 0) +#define PINMUX_GPIO120__FUNC_JTCK_SEL1 (MTK_PIN_NO(120) | 1) +#define PINMUX_GPIO120__FUNC_UDI_TCK (MTK_PIN_NO(120) | 2) +#define PINMUX_GPIO120__FUNC_DFD_TCK_XI (MTK_PIN_NO(120) | 3) +#define PINMUX_GPIO120__FUNC_SPM_JTAG_TCK (MTK_PIN_NO(120) | 4) +#define PINMUX_GPIO120__FUNC_SCP_JTAG_TCK (MTK_PIN_NO(120) | 5) +#define PINMUX_GPIO120__FUNC_ADSP_JTAG_TCK (MTK_PIN_NO(120) | 6) + +#define PINMUX_GPIO121__FUNC_GPIO121 (MTK_PIN_NO(121) | 0) +#define PINMUX_GPIO121__FUNC_JTDI_SEL1 (MTK_PIN_NO(121) | 1) +#define PINMUX_GPIO121__FUNC_UDI_TDI (MTK_PIN_NO(121) | 2) +#define PINMUX_GPIO121__FUNC_DFD_TDI (MTK_PIN_NO(121) | 3) +#define PINMUX_GPIO121__FUNC_SPM_JTAG_TDI (MTK_PIN_NO(121) | 4) +#define PINMUX_GPIO121__FUNC_SCP_JTAG_TDI (MTK_PIN_NO(121) | 5) +#define PINMUX_GPIO121__FUNC_ADSP_JTAG_TDI (MTK_PIN_NO(121) | 6) + +#define PINMUX_GPIO122__FUNC_GPIO122 (MTK_PIN_NO(122) | 0) +#define PINMUX_GPIO122__FUNC_JTDO_SEL1 (MTK_PIN_NO(122) | 1) +#define PINMUX_GPIO122__FUNC_UDI_TDO (MTK_PIN_NO(122) | 2) +#define PINMUX_GPIO122__FUNC_DFD_TDO (MTK_PIN_NO(122) | 3) +#define PINMUX_GPIO122__FUNC_SPM_JTAG_TDO (MTK_PIN_NO(122) | 4) +#define PINMUX_GPIO122__FUNC_SCP_JTAG_TDO (MTK_PIN_NO(122) | 5) +#define PINMUX_GPIO122__FUNC_ADSP_JTAG_TDO (MTK_PIN_NO(122) | 6) + +#define PINMUX_GPIO123__FUNC_GPIO123 (MTK_PIN_NO(123) | 0) +#define PINMUX_GPIO123__FUNC_JTRSTN_SEL1 (MTK_PIN_NO(123) | 1) +#define PINMUX_GPIO123__FUNC_UDI_NTRST (MTK_PIN_NO(123) | 2) +#define PINMUX_GPIO123__FUNC_SPM_JTAG_TRSTN (MTK_PIN_NO(123) | 4) +#define PINMUX_GPIO123__FUNC_SCP_JTAG_TRSTN (MTK_PIN_NO(123) | 5) +#define PINMUX_GPIO123__FUNC_ADSP_JTAG_TRSTN (MTK_PIN_NO(123) | 6) + +#define PINMUX_GPIO124__FUNC_GPIO124 (MTK_PIN_NO(124) | 0) +#define PINMUX_GPIO124__FUNC_CMMCLK0 (MTK_PIN_NO(124) | 1) +#define PINMUX_GPIO124__FUNC_CLKM0 (MTK_PIN_NO(124) | 2) +#define PINMUX_GPIO124__FUNC_PWM0 (MTK_PIN_NO(124) | 3) + +#define PINMUX_GPIO125__FUNC_GPIO125 (MTK_PIN_NO(125) | 0) +#define PINMUX_GPIO125__FUNC_CMMCLK1 (MTK_PIN_NO(125) | 1) +#define PINMUX_GPIO125__FUNC_CLKM1 (MTK_PIN_NO(125) | 2) +#define PINMUX_GPIO125__FUNC_PWM1 (MTK_PIN_NO(125) | 3) +#define PINMUX_GPIO125__FUNC_DBG_MON_B0 (MTK_PIN_NO(125) | 7) + +#define PINMUX_GPIO126__FUNC_GPIO126 (MTK_PIN_NO(126) | 0) +#define PINMUX_GPIO126__FUNC_CMMCLK2 (MTK_PIN_NO(126) | 1) +#define PINMUX_GPIO126__FUNC_CLKM2 (MTK_PIN_NO(126) | 2) +#define PINMUX_GPIO126__FUNC_PWM2 (MTK_PIN_NO(126) | 3) +#define PINMUX_GPIO126__FUNC_DBG_MON_B1 (MTK_PIN_NO(126) | 7) + +#define PINMUX_GPIO127__FUNC_GPIO127 (MTK_PIN_NO(127) | 0) +#define PINMUX_GPIO127__FUNC_SCL0 (MTK_PIN_NO(127) | 1) +#define PINMUX_GPIO127__FUNC_SCP_SCL0 (MTK_PIN_NO(127) | 4) +#define PINMUX_GPIO127__FUNC_SCP_SCL1 (MTK_PIN_NO(127) | 5) + +#define PINMUX_GPIO128__FUNC_GPIO128 (MTK_PIN_NO(128) | 0) +#define PINMUX_GPIO128__FUNC_SDA0 (MTK_PIN_NO(128) | 1) +#define PINMUX_GPIO128__FUNC_SCP_SDA0 (MTK_PIN_NO(128) | 4) +#define PINMUX_GPIO128__FUNC_SCP_SDA1 (MTK_PIN_NO(128) | 5) + +#define PINMUX_GPIO129__FUNC_GPIO129 (MTK_PIN_NO(129) | 0) +#define PINMUX_GPIO129__FUNC_SCL1 (MTK_PIN_NO(129) | 1) +#define PINMUX_GPIO129__FUNC_SCP_SCL0 (MTK_PIN_NO(129) | 4) +#define PINMUX_GPIO129__FUNC_SCP_SCL1 (MTK_PIN_NO(129) | 5) +#define PINMUX_GPIO129__FUNC_DBG_MON_B4 (MTK_PIN_NO(129) | 7) + +#define PINMUX_GPIO130__FUNC_GPIO130 (MTK_PIN_NO(130) | 0) +#define PINMUX_GPIO130__FUNC_SDA1 (MTK_PIN_NO(130) | 1) +#define PINMUX_GPIO130__FUNC_SCP_SDA0 (MTK_PIN_NO(130) | 4) +#define PINMUX_GPIO130__FUNC_SCP_SDA1 (MTK_PIN_NO(130) | 5) +#define PINMUX_GPIO130__FUNC_DBG_MON_B5 (MTK_PIN_NO(130) | 7) + +#define PINMUX_GPIO131__FUNC_GPIO131 (MTK_PIN_NO(131) | 0) +#define PINMUX_GPIO131__FUNC_SCL2 (MTK_PIN_NO(131) | 1) +#define PINMUX_GPIO131__FUNC_SSPM_UTXD_AO (MTK_PIN_NO(131) | 2) +#define PINMUX_GPIO131__FUNC_CONN_UART0_TXD (MTK_PIN_NO(131) | 3) +#define PINMUX_GPIO131__FUNC_SCP_SCL0 (MTK_PIN_NO(131) | 4) +#define PINMUX_GPIO131__FUNC_SCP_SCL1 (MTK_PIN_NO(131) | 5) +#define PINMUX_GPIO131__FUNC_DBG_MON_B6 (MTK_PIN_NO(131) | 7) + +#define PINMUX_GPIO132__FUNC_GPIO132 (MTK_PIN_NO(132) | 0) +#define PINMUX_GPIO132__FUNC_SDA2 (MTK_PIN_NO(132) | 1) +#define PINMUX_GPIO132__FUNC_SSPM_URXD_AO (MTK_PIN_NO(132) | 2) +#define PINMUX_GPIO132__FUNC_CONN_UART0_RXD (MTK_PIN_NO(132) | 3) +#define PINMUX_GPIO132__FUNC_SCP_SDA0 (MTK_PIN_NO(132) | 4) +#define PINMUX_GPIO132__FUNC_SCP_SDA1 (MTK_PIN_NO(132) | 5) +#define PINMUX_GPIO132__FUNC_DBG_MON_B7 (MTK_PIN_NO(132) | 7) + +#define PINMUX_GPIO133__FUNC_GPIO133 (MTK_PIN_NO(133) | 0) +#define PINMUX_GPIO133__FUNC_SCL3 (MTK_PIN_NO(133) | 1) +#define PINMUX_GPIO133__FUNC_SCP_SCL0 (MTK_PIN_NO(133) | 4) +#define PINMUX_GPIO133__FUNC_SCP_SCL1 (MTK_PIN_NO(133) | 5) +#define PINMUX_GPIO133__FUNC_DBG_MON_B8 (MTK_PIN_NO(133) | 7) + +#define PINMUX_GPIO134__FUNC_GPIO134 (MTK_PIN_NO(134) | 0) +#define PINMUX_GPIO134__FUNC_SDA3 (MTK_PIN_NO(134) | 1) +#define PINMUX_GPIO134__FUNC_GPS_PPS (MTK_PIN_NO(134) | 3) +#define PINMUX_GPIO134__FUNC_SCP_SDA0 (MTK_PIN_NO(134) | 4) +#define PINMUX_GPIO134__FUNC_SCP_SDA1 (MTK_PIN_NO(134) | 5) +#define PINMUX_GPIO134__FUNC_DBG_MON_B9 (MTK_PIN_NO(134) | 7) + +#define PINMUX_GPIO135__FUNC_GPIO135 (MTK_PIN_NO(135) | 0) +#define PINMUX_GPIO135__FUNC_SCL4 (MTK_PIN_NO(135) | 1) +#define PINMUX_GPIO135__FUNC_TP_UTXD1_AO (MTK_PIN_NO(135) | 2) +#define PINMUX_GPIO135__FUNC_UTXD1 (MTK_PIN_NO(135) | 3) +#define PINMUX_GPIO135__FUNC_SCP_SCL0 (MTK_PIN_NO(135) | 4) +#define PINMUX_GPIO135__FUNC_SCP_SCL1 (MTK_PIN_NO(135) | 5) +#define PINMUX_GPIO135__FUNC_DBG_MON_B10 (MTK_PIN_NO(135) | 7) + +#define PINMUX_GPIO136__FUNC_GPIO136 (MTK_PIN_NO(136) | 0) +#define PINMUX_GPIO136__FUNC_SDA4 (MTK_PIN_NO(136) | 1) +#define PINMUX_GPIO136__FUNC_TP_URXD1_AO (MTK_PIN_NO(136) | 2) +#define PINMUX_GPIO136__FUNC_URXD1 (MTK_PIN_NO(136) | 3) +#define PINMUX_GPIO136__FUNC_SCP_SDA0 (MTK_PIN_NO(136) | 4) +#define PINMUX_GPIO136__FUNC_SCP_SDA1 (MTK_PIN_NO(136) | 5) +#define PINMUX_GPIO136__FUNC_DBG_MON_B11 (MTK_PIN_NO(136) | 7) + +#define PINMUX_GPIO137__FUNC_GPIO137 (MTK_PIN_NO(137) | 0) +#define PINMUX_GPIO137__FUNC_SCL5 (MTK_PIN_NO(137) | 1) +#define PINMUX_GPIO137__FUNC_UTXD2 (MTK_PIN_NO(137) | 2) +#define PINMUX_GPIO137__FUNC_UCTS1 (MTK_PIN_NO(137) | 3) +#define PINMUX_GPIO137__FUNC_SCP_SCL0 (MTK_PIN_NO(137) | 4) +#define PINMUX_GPIO137__FUNC_SCP_SCL1 (MTK_PIN_NO(137) | 5) + +#define PINMUX_GPIO138__FUNC_GPIO138 (MTK_PIN_NO(138) | 0) +#define PINMUX_GPIO138__FUNC_SDA5 (MTK_PIN_NO(138) | 1) +#define PINMUX_GPIO138__FUNC_URXD2 (MTK_PIN_NO(138) | 2) +#define PINMUX_GPIO138__FUNC_URTS1 (MTK_PIN_NO(138) | 3) +#define PINMUX_GPIO138__FUNC_SCP_SDA0 (MTK_PIN_NO(138) | 4) +#define PINMUX_GPIO138__FUNC_SCP_SDA1 (MTK_PIN_NO(138) | 5) + +#define PINMUX_GPIO139__FUNC_GPIO139 (MTK_PIN_NO(139) | 0) +#define PINMUX_GPIO139__FUNC_SCL6 (MTK_PIN_NO(139) | 1) +#define PINMUX_GPIO139__FUNC_UTXD1 (MTK_PIN_NO(139) | 2) +#define PINMUX_GPIO139__FUNC_TP_UTXD1_AO (MTK_PIN_NO(139) | 3) +#define PINMUX_GPIO139__FUNC_SCP_SCL0 (MTK_PIN_NO(139) | 4) +#define PINMUX_GPIO139__FUNC_SCP_SCL1 (MTK_PIN_NO(139) | 5) +#define PINMUX_GPIO139__FUNC_DBG_MON_B12 (MTK_PIN_NO(139) | 7) + +#define PINMUX_GPIO140__FUNC_GPIO140 (MTK_PIN_NO(140) | 0) +#define PINMUX_GPIO140__FUNC_SDA6 (MTK_PIN_NO(140) | 1) +#define PINMUX_GPIO140__FUNC_URXD1 (MTK_PIN_NO(140) | 2) +#define PINMUX_GPIO140__FUNC_TP_URXD1_AO (MTK_PIN_NO(140) | 3) +#define PINMUX_GPIO140__FUNC_SCP_SDA0 (MTK_PIN_NO(140) | 4) +#define PINMUX_GPIO140__FUNC_SCP_SDA1 (MTK_PIN_NO(140) | 5) +#define PINMUX_GPIO140__FUNC_DBG_MON_B13 (MTK_PIN_NO(140) | 7) + +#define PINMUX_GPIO141__FUNC_GPIO141 (MTK_PIN_NO(141) | 0) +#define PINMUX_GPIO141__FUNC_SCL7 (MTK_PIN_NO(141) | 1) +#define PINMUX_GPIO141__FUNC_URTS0 (MTK_PIN_NO(141) | 2) +#define PINMUX_GPIO141__FUNC_TP_URTS1_AO (MTK_PIN_NO(141) | 3) +#define PINMUX_GPIO141__FUNC_SCP_SCL0 (MTK_PIN_NO(141) | 4) +#define PINMUX_GPIO141__FUNC_SCP_SCL1 (MTK_PIN_NO(141) | 5) +#define PINMUX_GPIO141__FUNC_UDI_TCK (MTK_PIN_NO(141) | 6) +#define PINMUX_GPIO141__FUNC_DBG_MON_B14 (MTK_PIN_NO(141) | 7) + +#define PINMUX_GPIO142__FUNC_GPIO142 (MTK_PIN_NO(142) | 0) +#define PINMUX_GPIO142__FUNC_SDA7 (MTK_PIN_NO(142) | 1) +#define PINMUX_GPIO142__FUNC_UCTS0 (MTK_PIN_NO(142) | 2) +#define PINMUX_GPIO142__FUNC_TP_UCTS1_AO (MTK_PIN_NO(142) | 3) +#define PINMUX_GPIO142__FUNC_SCP_SDA0 (MTK_PIN_NO(142) | 4) +#define PINMUX_GPIO142__FUNC_SCP_SDA1 (MTK_PIN_NO(142) | 5) + +#define PINMUX_GPIO143__FUNC_GPIO143 (MTK_PIN_NO(143) | 0) +#define PINMUX_GPIO143__FUNC_SCL8 (MTK_PIN_NO(143) | 1) +#define PINMUX_GPIO143__FUNC_SCP_SCL0 (MTK_PIN_NO(143) | 4) +#define PINMUX_GPIO143__FUNC_SCP_SCL1 (MTK_PIN_NO(143) | 5) +#define PINMUX_GPIO143__FUNC_DBG_MON_B16 (MTK_PIN_NO(143) | 7) + +#define PINMUX_GPIO144__FUNC_GPIO144 (MTK_PIN_NO(144) | 0) +#define PINMUX_GPIO144__FUNC_SDA8 (MTK_PIN_NO(144) | 1) +#define PINMUX_GPIO144__FUNC_SCP_SDA0 (MTK_PIN_NO(144) | 4) +#define PINMUX_GPIO144__FUNC_SCP_SDA1 (MTK_PIN_NO(144) | 5) +#define PINMUX_GPIO144__FUNC_DBG_MON_B17 (MTK_PIN_NO(144) | 7) + +#define PINMUX_GPIO145__FUNC_GPIO145 (MTK_PIN_NO(145) | 0) +#define PINMUX_GPIO145__FUNC_SCL9 (MTK_PIN_NO(145) | 1) +#define PINMUX_GPIO145__FUNC_CMVREF1 (MTK_PIN_NO(145) | 2) +#define PINMUX_GPIO145__FUNC_GPS_PPS (MTK_PIN_NO(145) | 3) +#define PINMUX_GPIO145__FUNC_SCP_SCL0 (MTK_PIN_NO(145) | 4) +#define PINMUX_GPIO145__FUNC_SCP_SCL1 (MTK_PIN_NO(145) | 5) +#define PINMUX_GPIO145__FUNC_DBG_MON_B18 (MTK_PIN_NO(145) | 7) + +#define PINMUX_GPIO146__FUNC_GPIO146 (MTK_PIN_NO(146) | 0) +#define PINMUX_GPIO146__FUNC_SDA9 (MTK_PIN_NO(146) | 1) +#define PINMUX_GPIO146__FUNC_CMVREF0 (MTK_PIN_NO(146) | 2) +#define PINMUX_GPIO146__FUNC_SCP_SDA0 (MTK_PIN_NO(146) | 4) +#define PINMUX_GPIO146__FUNC_SCP_SDA1 (MTK_PIN_NO(146) | 5) +#define PINMUX_GPIO146__FUNC_DBG_MON_B19 (MTK_PIN_NO(146) | 7) + +#define PINMUX_GPIO147__FUNC_GPIO147 (MTK_PIN_NO(147) | 0) +#define PINMUX_GPIO147__FUNC_CMFLASH0 (MTK_PIN_NO(147) | 1) +#define PINMUX_GPIO147__FUNC_LVTS_SDI (MTK_PIN_NO(147) | 2) +#define PINMUX_GPIO147__FUNC_DPI_DATA12 (MTK_PIN_NO(147) | 3) +#define PINMUX_GPIO147__FUNC_TP_GPIO0_AO (MTK_PIN_NO(147) | 4) +#define PINMUX_GPIO147__FUNC_ANT_SEL3 (MTK_PIN_NO(147) | 5) +#define PINMUX_GPIO147__FUNC_DFD_TCK_XI (MTK_PIN_NO(147) | 6) +#define PINMUX_GPIO147__FUNC_DBG_MON_B20 (MTK_PIN_NO(147) | 7) + +#define PINMUX_GPIO148__FUNC_GPIO148 (MTK_PIN_NO(148) | 0) +#define PINMUX_GPIO148__FUNC_CMFLASH1 (MTK_PIN_NO(148) | 1) +#define PINMUX_GPIO148__FUNC_LVTS_SCF (MTK_PIN_NO(148) | 2) +#define PINMUX_GPIO148__FUNC_DPI_DATA13 (MTK_PIN_NO(148) | 3) +#define PINMUX_GPIO148__FUNC_TP_GPIO1_AO (MTK_PIN_NO(148) | 4) +#define PINMUX_GPIO148__FUNC_ANT_SEL4 (MTK_PIN_NO(148) | 5) +#define PINMUX_GPIO148__FUNC_DFD_TMS (MTK_PIN_NO(148) | 6) +#define PINMUX_GPIO148__FUNC_DBG_MON_B21 (MTK_PIN_NO(148) | 7) + +#define PINMUX_GPIO149__FUNC_GPIO149 (MTK_PIN_NO(149) | 0) +#define PINMUX_GPIO149__FUNC_CMFLASH2 (MTK_PIN_NO(149) | 1) +#define PINMUX_GPIO149__FUNC_CLKM0 (MTK_PIN_NO(149) | 2) +#define PINMUX_GPIO149__FUNC_DPI_DATA14 (MTK_PIN_NO(149) | 3) +#define PINMUX_GPIO149__FUNC_TP_GPIO2_AO (MTK_PIN_NO(149) | 4) +#define PINMUX_GPIO149__FUNC_ANT_SEL5 (MTK_PIN_NO(149) | 5) +#define PINMUX_GPIO149__FUNC_DFD_TDI (MTK_PIN_NO(149) | 6) +#define PINMUX_GPIO149__FUNC_DBG_MON_B22 (MTK_PIN_NO(149) | 7) + +#define PINMUX_GPIO150__FUNC_GPIO150 (MTK_PIN_NO(150) | 0) +#define PINMUX_GPIO150__FUNC_CLKM1 (MTK_PIN_NO(150) | 2) +#define PINMUX_GPIO150__FUNC_DPI_DATA15 (MTK_PIN_NO(150) | 3) +#define PINMUX_GPIO150__FUNC_TP_GPIO3_AO (MTK_PIN_NO(150) | 4) +#define PINMUX_GPIO150__FUNC_ANT_SEL6 (MTK_PIN_NO(150) | 5) +#define PINMUX_GPIO150__FUNC_DFD_TDO (MTK_PIN_NO(150) | 6) +#define PINMUX_GPIO150__FUNC_DBG_MON_B23 (MTK_PIN_NO(150) | 7) + +#define PINMUX_GPIO151__FUNC_GPIO151 (MTK_PIN_NO(151) | 0) +#define PINMUX_GPIO151__FUNC_GPS_L1_ELNA_EN (MTK_PIN_NO(151) | 1) +#define PINMUX_GPIO151__FUNC_CLKM2 (MTK_PIN_NO(151) | 2) +#define PINMUX_GPIO151__FUNC_DPI_DATA16 (MTK_PIN_NO(151) | 3) +#define PINMUX_GPIO151__FUNC_TP_GPIO4_AO (MTK_PIN_NO(151) | 4) +#define PINMUX_GPIO151__FUNC_ANT_SEL7 (MTK_PIN_NO(151) | 5) +#define PINMUX_GPIO151__FUNC_UDI_TMS (MTK_PIN_NO(151) | 6) +#define PINMUX_GPIO151__FUNC_DBG_MON_B24 (MTK_PIN_NO(151) | 7) + +#define PINMUX_GPIO152__FUNC_GPIO152 (MTK_PIN_NO(152) | 0) +#define PINMUX_GPIO152__FUNC_CLKM3 (MTK_PIN_NO(152) | 2) +#define PINMUX_GPIO152__FUNC_DPI_DATA17 (MTK_PIN_NO(152) | 3) +#define PINMUX_GPIO152__FUNC_TP_GPIO5_AO (MTK_PIN_NO(152) | 4) + +#define PINMUX_GPIO153__FUNC_GPIO153 (MTK_PIN_NO(153) | 0) +#define PINMUX_GPIO153__FUNC_CONN_TCXOENA_REQ (MTK_PIN_NO(153) | 1) +#define PINMUX_GPIO153__FUNC_DPI_DATA18 (MTK_PIN_NO(153) | 3) +#define PINMUX_GPIO153__FUNC_TP_GPIO6_AO (MTK_PIN_NO(153) | 4) +#define PINMUX_GPIO153__FUNC_UDI_TDI (MTK_PIN_NO(153) | 6) +#define PINMUX_GPIO153__FUNC_DBG_MON_B26 (MTK_PIN_NO(153) | 7) + +#define PINMUX_GPIO154__FUNC_GPIO154 (MTK_PIN_NO(154) | 0) +#define PINMUX_GPIO154__FUNC_PWM0 (MTK_PIN_NO(154) | 1) +#define PINMUX_GPIO154__FUNC_CMVREF2 (MTK_PIN_NO(154) | 2) +#define PINMUX_GPIO154__FUNC_DPI_DATA19 (MTK_PIN_NO(154) | 3) +#define PINMUX_GPIO154__FUNC_TP_GPIO7_AO (MTK_PIN_NO(154) | 4) +#define PINMUX_GPIO154__FUNC_UDI_TDO (MTK_PIN_NO(154) | 6) +#define PINMUX_GPIO154__FUNC_DBG_MON_B27 (MTK_PIN_NO(154) | 7) + +#define PINMUX_GPIO155__FUNC_GPIO155 (MTK_PIN_NO(155) | 0) +#define PINMUX_GPIO155__FUNC_PWM1 (MTK_PIN_NO(155) | 1) +#define PINMUX_GPIO155__FUNC_CMVREF1 (MTK_PIN_NO(155) | 2) +#define PINMUX_GPIO155__FUNC_DPI_DATA20 (MTK_PIN_NO(155) | 3) +#define PINMUX_GPIO155__FUNC_UDI_NTRST (MTK_PIN_NO(155) | 6) +#define PINMUX_GPIO155__FUNC_DBG_MON_B28 (MTK_PIN_NO(155) | 7) + +#define PINMUX_GPIO156__FUNC_GPIO156 (MTK_PIN_NO(156) | 0) +#define PINMUX_GPIO156__FUNC_PWM2 (MTK_PIN_NO(156) | 1) +#define PINMUX_GPIO156__FUNC_CMVREF0 (MTK_PIN_NO(156) | 2) +#define PINMUX_GPIO156__FUNC_DPI_DATA21 (MTK_PIN_NO(156) | 3) + +#define PINMUX_GPIO157__FUNC_GPIO157 (MTK_PIN_NO(157) | 0) +#define PINMUX_GPIO157__FUNC_PWRAP_SPI0_CSN (MTK_PIN_NO(157) | 1) + +#define PINMUX_GPIO158__FUNC_GPIO158 (MTK_PIN_NO(158) | 0) +#define PINMUX_GPIO158__FUNC_PWRAP_SPI0_CK (MTK_PIN_NO(158) | 1) + +#define PINMUX_GPIO159__FUNC_GPIO159 (MTK_PIN_NO(159) | 0) +#define PINMUX_GPIO159__FUNC_PWRAP_SPI0_MO (MTK_PIN_NO(159) | 1) +#define PINMUX_GPIO159__FUNC_PWRAP_SPI0_MI (MTK_PIN_NO(159) | 2) + +#define PINMUX_GPIO160__FUNC_GPIO160 (MTK_PIN_NO(160) | 0) +#define PINMUX_GPIO160__FUNC_PWRAP_SPI0_MI (MTK_PIN_NO(160) | 1) +#define PINMUX_GPIO160__FUNC_PWRAP_SPI0_MO (MTK_PIN_NO(160) | 2) + +#define PINMUX_GPIO161__FUNC_GPIO161 (MTK_PIN_NO(161) | 0) +#define PINMUX_GPIO161__FUNC_SRCLKENA0 (MTK_PIN_NO(161) | 1) + +#define PINMUX_GPIO162__FUNC_GPIO162 (MTK_PIN_NO(162) | 0) +#define PINMUX_GPIO162__FUNC_SRCLKENA1 (MTK_PIN_NO(162) | 1) +#define PINMUX_GPIO162__FUNC_DBG_MON_A31 (MTK_PIN_NO(162) | 7) + +#define PINMUX_GPIO163__FUNC_GPIO163 (MTK_PIN_NO(163) | 0) +#define PINMUX_GPIO163__FUNC_SCP_VREQ_VAO (MTK_PIN_NO(163) | 1) +#define PINMUX_GPIO163__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(163) | 2) + +#define PINMUX_GPIO164__FUNC_GPIO164 (MTK_PIN_NO(164) | 0) +#define PINMUX_GPIO164__FUNC_RTC32K_CK (MTK_PIN_NO(164) | 1) + +#define PINMUX_GPIO165__FUNC_GPIO165 (MTK_PIN_NO(165) | 0) +#define PINMUX_GPIO165__FUNC_WATCHDOG (MTK_PIN_NO(165) | 1) + +#define PINMUX_GPIO166__FUNC_GPIO166 (MTK_PIN_NO(166) | 0) +#define PINMUX_GPIO166__FUNC_AUD_CLK_MOSI (MTK_PIN_NO(166) | 1) +#define PINMUX_GPIO166__FUNC_AUD_CLK_MISO (MTK_PIN_NO(166) | 2) +#define PINMUX_GPIO166__FUNC_I2S1_MCK (MTK_PIN_NO(166) | 3) + +#define PINMUX_GPIO167__FUNC_GPIO167 (MTK_PIN_NO(167) | 0) +#define PINMUX_GPIO167__FUNC_AUD_SYNC_MOSI (MTK_PIN_NO(167) | 1) +#define PINMUX_GPIO167__FUNC_AUD_SYNC_MISO (MTK_PIN_NO(167) | 2) +#define PINMUX_GPIO167__FUNC_I2S1_BCK (MTK_PIN_NO(167) | 3) + +#define PINMUX_GPIO168__FUNC_GPIO168 (MTK_PIN_NO(168) | 0) +#define PINMUX_GPIO168__FUNC_AUD_DAT_MOSI0 (MTK_PIN_NO(168) | 1) +#define PINMUX_GPIO168__FUNC_AUD_DAT_MISO0 (MTK_PIN_NO(168) | 2) +#define PINMUX_GPIO168__FUNC_I2S1_LRCK (MTK_PIN_NO(168) | 3) + +#define PINMUX_GPIO169__FUNC_GPIO169 (MTK_PIN_NO(169) | 0) +#define PINMUX_GPIO169__FUNC_AUD_DAT_MOSI1 (MTK_PIN_NO(169) | 1) +#define PINMUX_GPIO169__FUNC_AUD_DAT_MISO1 (MTK_PIN_NO(169) | 2) +#define PINMUX_GPIO169__FUNC_I2S1_DO (MTK_PIN_NO(169) | 3) + +#define PINMUX_GPIO170__FUNC_GPIO170 (MTK_PIN_NO(170) | 0) +#define PINMUX_GPIO170__FUNC_AUD_CLK_MISO (MTK_PIN_NO(170) | 1) +#define PINMUX_GPIO170__FUNC_AUD_CLK_MOSI (MTK_PIN_NO(170) | 2) +#define PINMUX_GPIO170__FUNC_I2S2_MCK (MTK_PIN_NO(170) | 3) + +#define PINMUX_GPIO171__FUNC_GPIO171 (MTK_PIN_NO(171) | 0) +#define PINMUX_GPIO171__FUNC_AUD_SYNC_MISO (MTK_PIN_NO(171) | 1) +#define PINMUX_GPIO171__FUNC_AUD_SYNC_MOSI (MTK_PIN_NO(171) | 2) +#define PINMUX_GPIO171__FUNC_I2S2_BCK (MTK_PIN_NO(171) | 3) + +#define PINMUX_GPIO172__FUNC_GPIO172 (MTK_PIN_NO(172) | 0) +#define PINMUX_GPIO172__FUNC_AUD_DAT_MISO0 (MTK_PIN_NO(172) | 1) +#define PINMUX_GPIO172__FUNC_AUD_DAT_MOSI0 (MTK_PIN_NO(172) | 2) +#define PINMUX_GPIO172__FUNC_I2S2_LRCK (MTK_PIN_NO(172) | 3) +#define PINMUX_GPIO172__FUNC_VOW_DAT_MISO (MTK_PIN_NO(172) | 4) + +#define PINMUX_GPIO173__FUNC_GPIO173 (MTK_PIN_NO(173) | 0) +#define PINMUX_GPIO173__FUNC_AUD_DAT_MISO1 (MTK_PIN_NO(173) | 1) +#define PINMUX_GPIO173__FUNC_AUD_DAT_MOSI1 (MTK_PIN_NO(173) | 2) +#define PINMUX_GPIO173__FUNC_I2S2_DI (MTK_PIN_NO(173) | 3) +#define PINMUX_GPIO173__FUNC_VOW_CLK_MISO (MTK_PIN_NO(173) | 4) + +#define PINMUX_GPIO174__FUNC_GPIO174 (MTK_PIN_NO(174) | 0) +#define PINMUX_GPIO174__FUNC_CONN_TOP_CLK (MTK_PIN_NO(174) | 1) +#define PINMUX_GPIO174__FUNC_AUXIF_CLK (MTK_PIN_NO(174) | 2) +#define PINMUX_GPIO174__FUNC_DFD_TCK_XI (MTK_PIN_NO(174) | 3) +#define PINMUX_GPIO174__FUNC_DBG_MON_B3 (MTK_PIN_NO(174) | 7) + +#define PINMUX_GPIO175__FUNC_GPIO175 (MTK_PIN_NO(175) | 0) +#define PINMUX_GPIO175__FUNC_CONN_TOP_DATA (MTK_PIN_NO(175) | 1) +#define PINMUX_GPIO175__FUNC_AUXIF_ST (MTK_PIN_NO(175) | 2) +#define PINMUX_GPIO175__FUNC_DFD_TMS (MTK_PIN_NO(175) | 3) +#define PINMUX_GPIO175__FUNC_DBG_MON_B15 (MTK_PIN_NO(175) | 7) + +#define PINMUX_GPIO176__FUNC_GPIO176 (MTK_PIN_NO(176) | 0) +#define PINMUX_GPIO176__FUNC_CONN_BT_CLK (MTK_PIN_NO(176) | 1) +#define PINMUX_GPIO176__FUNC_DFD_TDI (MTK_PIN_NO(176) | 3) +#define PINMUX_GPIO176__FUNC_DBG_MON_B2 (MTK_PIN_NO(176) | 7) + +#define PINMUX_GPIO177__FUNC_GPIO177 (MTK_PIN_NO(177) | 0) +#define PINMUX_GPIO177__FUNC_CONN_BT_DATA (MTK_PIN_NO(177) | 1) +#define PINMUX_GPIO177__FUNC_DFD_TDO (MTK_PIN_NO(177) | 3) + +#define PINMUX_GPIO178__FUNC_GPIO178 (MTK_PIN_NO(178) | 0) +#define PINMUX_GPIO178__FUNC_CONN_HRST_B (MTK_PIN_NO(178) | 1) +#define PINMUX_GPIO178__FUNC_UDI_TMS (MTK_PIN_NO(178) | 3) +#define PINMUX_GPIO178__FUNC_DBG_MON_B25 (MTK_PIN_NO(178) | 7) + +#define PINMUX_GPIO179__FUNC_GPIO179 (MTK_PIN_NO(179) | 0) +#define PINMUX_GPIO179__FUNC_CONN_WB_PTA (MTK_PIN_NO(179) | 1) +#define PINMUX_GPIO179__FUNC_UDI_TCK (MTK_PIN_NO(179) | 3) +#define PINMUX_GPIO179__FUNC_DBG_MON_B29 (MTK_PIN_NO(179) | 7) + +#define PINMUX_GPIO180__FUNC_GPIO180 (MTK_PIN_NO(180) | 0) +#define PINMUX_GPIO180__FUNC_CONN_WF_CTRL0 (MTK_PIN_NO(180) | 1) +#define PINMUX_GPIO180__FUNC_UDI_TDI (MTK_PIN_NO(180) | 3) + +#define PINMUX_GPIO181__FUNC_GPIO181 (MTK_PIN_NO(181) | 0) +#define PINMUX_GPIO181__FUNC_CONN_WF_CTRL1 (MTK_PIN_NO(181) | 1) +#define PINMUX_GPIO181__FUNC_UDI_TDO (MTK_PIN_NO(181) | 3) + +#define PINMUX_GPIO182__FUNC_GPIO182 (MTK_PIN_NO(182) | 0) +#define PINMUX_GPIO182__FUNC_CONN_WF_CTRL2 (MTK_PIN_NO(182) | 1) +#define PINMUX_GPIO182__FUNC_UDI_NTRST (MTK_PIN_NO(182) | 3) + +#define PINMUX_GPIO183__FUNC_GPIO183 (MTK_PIN_NO(183) | 0) +#define PINMUX_GPIO183__FUNC_SPMI_SCL (MTK_PIN_NO(183) | 1) + +#define PINMUX_GPIO184__FUNC_GPIO184 (MTK_PIN_NO(184) | 0) +#define PINMUX_GPIO184__FUNC_SPMI_SDA (MTK_PIN_NO(184) | 1) + +#endif /* __MT8186_PINFUNC_H */ -- cgit v1.2.3 From 5c9bf3635b66add7d829b4d8d538ae0d770d9d89 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 11 Mar 2022 20:23:49 +0800 Subject: scsi: libsas: Add sas_execute_internal_abort_single() The internal abort feature is common to hisi_sas and pm8001 HBAs, and the driver support is similar also, so add a common handler. Two modes of operation will be supported: - single: Abort a single tagged command - device: Abort all commands associated with a specific domain device A new protocol is added, SAS_PROTOCOL_INTERNAL_ABORT, so the common queue command API may be re-used. Only add "single" support as a first step. Link: https://lore.kernel.org/r/1647001432-239276-2-git-send-email-john.garry@huawei.com Tested-by: Damien Le Moal Acked-by: Jack Wang Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 14 ++++++++++++++ include/scsi/sas.h | 2 ++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index df2c8fc43429..2d30d57916e5 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -557,6 +557,16 @@ struct sas_ata_task { int force_phy_id; }; +/* LLDDs rely on these values */ +enum sas_internal_abort { + SAS_INTERNAL_ABORT_SINGLE = 0, +}; + +struct sas_internal_abort_task { + enum sas_internal_abort type; + u16 tag; +}; + struct sas_smp_task { struct scatterlist smp_req; struct scatterlist smp_resp; @@ -596,6 +606,7 @@ struct sas_task { struct sas_ata_task ata_task; struct sas_smp_task smp_task; struct sas_ssp_task ssp_task; + struct sas_internal_abort_task abort_task; }; struct scatterlist *scatter; @@ -683,6 +694,9 @@ extern int sas_slave_configure(struct scsi_device *); extern int sas_change_queue_depth(struct scsi_device *, int new_depth); extern int sas_bios_param(struct scsi_device *, struct block_device *, sector_t capacity, int *hsc); +int sas_execute_internal_abort_single(struct domain_device *device, + u16 tag, unsigned int qid, + void *data); extern struct scsi_transport_template * sas_domain_attach_transport(struct sas_domain_function_template *); extern struct device_attribute dev_attr_phy_event_threshold; diff --git a/include/scsi/sas.h b/include/scsi/sas.h index 332a463d08ef..acfc69fd72d0 100644 --- a/include/scsi/sas.h +++ b/include/scsi/sas.h @@ -95,6 +95,8 @@ enum sas_protocol { SAS_PROTOCOL_SSP = 0x08, SAS_PROTOCOL_ALL = 0x0E, SAS_PROTOCOL_STP_ALL = SAS_PROTOCOL_STP|SAS_PROTOCOL_SATA, + /* these are internal to libsas */ + SAS_PROTOCOL_INTERNAL_ABORT = 0x10, }; /* From the spec; local phys only */ -- cgit v1.2.3 From 6a91c3e31578979a93b500efc76af4d3499f75ad Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 11 Mar 2022 20:23:50 +0800 Subject: scsi: libsas: Add sas_execute_internal_abort_dev() Add support for a "device" variant of internal abort, which will abort all pending I/Os for a specific device. Link: https://lore.kernel.org/r/1647001432-239276-3-git-send-email-john.garry@huawei.com Tested-by: Damien Le Moal Acked-by: Jack Wang Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 2d30d57916e5..71f632b2d2bd 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -560,6 +560,7 @@ struct sas_ata_task { /* LLDDs rely on these values */ enum sas_internal_abort { SAS_INTERNAL_ABORT_SINGLE = 0, + SAS_INTERNAL_ABORT_DEV = 1, }; struct sas_internal_abort_task { @@ -641,6 +642,11 @@ extern struct sas_task *sas_alloc_task(gfp_t flags); extern struct sas_task *sas_alloc_slow_task(gfp_t flags); extern void sas_free_task(struct sas_task *task); +static inline bool sas_is_internal_abort(struct sas_task *task) +{ + return task->task_proto == SAS_PROTOCOL_INTERNAL_ABORT; +} + struct sas_domain_function_template { /* The class calls these to notify the LLDD of an event. */ void (*lldd_port_formed)(struct asd_sas_phy *); @@ -697,6 +703,8 @@ extern int sas_bios_param(struct scsi_device *, struct block_device *, int sas_execute_internal_abort_single(struct domain_device *device, u16 tag, unsigned int qid, void *data); +int sas_execute_internal_abort_dev(struct domain_device *device, + unsigned int qid, void *data); extern struct scsi_transport_template * sas_domain_attach_transport(struct sas_domain_function_template *); extern struct device_attribute dev_attr_phy_event_threshold; -- cgit v1.2.3 From 095478a6e5bf590f2bbf341569eb25173c9c5f32 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 11 Mar 2022 20:23:52 +0800 Subject: scsi: hisi_sas: Use libsas internal abort support Use the common libsas internal abort functionality. In addition, this driver has special handling for internal abort timeouts - specifically whether to reset the controller in that instance, so extend the API for that. Timeout is now increased to 20 * Hz from 6 * Hz. We also retry for failure now, but this should not make a difference. Link: https://lore.kernel.org/r/1647001432-239276-5-git-send-email-john.garry@huawei.com Tested-by: Damien Le Moal Acked-by: Jack Wang Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 71f632b2d2bd..ff04eb6d250b 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -565,6 +565,7 @@ enum sas_internal_abort { struct sas_internal_abort_task { enum sas_internal_abort type; + unsigned int qid; u16 tag; }; @@ -671,6 +672,7 @@ struct sas_domain_function_template { /* Special TMF callbacks */ void (*lldd_tmf_exec_complete)(struct domain_device *dev); void (*lldd_tmf_aborted)(struct sas_task *task); + bool (*lldd_abort_timeout)(struct sas_task *task, void *data); /* Port and Adapter management */ int (*lldd_clear_nexus_port)(struct asd_sas_port *); -- cgit v1.2.3 From d72d827f2f2636d8d72f0f3ebe5b661c9a24d343 Mon Sep 17 00:00:00 2001 From: Mingzhe Zou Date: Tue, 1 Mar 2022 15:55:00 +0800 Subject: scsi: target: Add iscsi/cpus_allowed_list in configfs The RX/TX threads for iSCSI connection can be scheduled to any online CPUs, and will not be rescheduled. When binding other heavy load threads along with iSCSI connection RX/TX thread to the same CPU, the iSCSI performance will be worse. Add iscsi/cpus_allowed_list in configfs. The available CPU set of iSCSI connection RX/TX threads is allowed_cpus & online_cpus. If it is modified, all RX/TX threads will be rescheduled. Link: https://lore.kernel.org/r/20220301075500.14266-1-mingzhe.zou@easystack.cn Reviewed-by: Mike Christie Signed-off-by: Mingzhe Zou Signed-off-by: Martin K. Petersen --- include/target/iscsi/iscsi_target_core.h | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 1eccb2ac7d02..adc87de0362b 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -580,6 +580,7 @@ struct iscsi_conn { struct ahash_request *conn_tx_hash; /* Used for scheduling TX and RX connection kthreads */ cpumask_var_t conn_cpumask; + cpumask_var_t allowed_cpumask; unsigned int conn_rx_reset_cpumask:1; unsigned int conn_tx_reset_cpumask:1; /* list_head of struct iscsi_cmd for this connection */ @@ -878,6 +879,7 @@ struct iscsit_global { /* Thread Set bitmap pointer */ unsigned long *ts_bitmap; spinlock_t ts_bitmap_lock; + cpumask_var_t allowed_cpumask; /* Used for iSCSI discovery session authentication */ struct iscsi_node_acl discovery_acl; struct iscsi_portal_group *discovery_tpg; @@ -898,29 +900,8 @@ static inline u32 session_get_next_ttt(struct iscsi_session *session) extern struct iscsi_cmd *iscsit_find_cmd_from_itt(struct iscsi_conn *, itt_t); -static inline void iscsit_thread_check_cpumask( - struct iscsi_conn *conn, - struct task_struct *p, - int mode) -{ - /* - * mode == 1 signals iscsi_target_tx_thread() usage. - * mode == 0 signals iscsi_target_rx_thread() usage. - */ - if (mode == 1) { - if (!conn->conn_tx_reset_cpumask) - return; - conn->conn_tx_reset_cpumask = 0; - } else { - if (!conn->conn_rx_reset_cpumask) - return; - conn->conn_rx_reset_cpumask = 0; - } - /* - * Update the CPU mask for this single kthread so that - * both TX and RX kthreads are scheduled to run on the - * same CPU. - */ - set_cpus_allowed_ptr(p, conn->conn_cpumask); -} +extern void iscsit_thread_check_cpumask(struct iscsi_conn *conn, + struct task_struct *p, + int mode); + #endif /* ISCSI_TARGET_CORE_H */ -- cgit v1.2.3 From 45ceaf14d53a123e5955477da501bc6f26b99039 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 14 Mar 2022 19:45:37 -0700 Subject: Input: extract ChromeOS vivaldi physmap show function Let's introduce a common library file for the physmap show function duplicated between three different keyboard drivers. This largely copies the code from cros_ec_keyb.c which has the most recent version of the show function, while using the vivaldi_data struct from the hid-vivaldi driver. This saves a small amount of space in an allyesconfig build. $ ./scripts/bloat-o-meter vmlinux.before vmlinux.after add/remove: 3/0 grow/shrink: 2/3 up/down: 412/-720 (-308) Function old new delta vivaldi_function_row_physmap_show - 292 +292 _sub_I_65535_1 1057564 1057616 +52 _sub_D_65535_0 1057564 1057616 +52 e843419@49f2_00062737_9b04 - 8 +8 e843419@20f6_0002a34d_35bc - 8 +8 atkbd_parse_fwnode_data 480 472 -8 atkbd_do_show_function_row_physmap 316 76 -240 function_row_physmap_show 620 148 -472 Total: Before=285581925, After=285581617, chg -0.00% Signed-off-by: Stephen Boyd Tested-by: Stephen Boyd # coachz, wormdingler Link: https://lore.kernel.org/r/20220228075446.466016-3-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- include/linux/input/vivaldi-fmap.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/linux/input/vivaldi-fmap.h (limited to 'include') diff --git a/include/linux/input/vivaldi-fmap.h b/include/linux/input/vivaldi-fmap.h new file mode 100644 index 000000000000..7e4b7023bf04 --- /dev/null +++ b/include/linux/input/vivaldi-fmap.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _VIVALDI_FMAP_H +#define _VIVALDI_FMAP_H + +#include + +#define VIVALDI_MAX_FUNCTION_ROW_KEYS 24 + +/** + * struct vivaldi_data - Function row map data for ChromeOS Vivaldi keyboards + * @function_row_physmap: An array of scancodes or their equivalent (HID usage + * codes, encoded rows/columns, etc) for the top + * row function keys, in an order from left to right + * @num_function_row_keys: The number of top row keys in a custom keyboard + * + * This structure is supposed to be used by ChromeOS keyboards using + * the Vivaldi keyboard function row design. + */ +struct vivaldi_data { + u32 function_row_physmap[VIVALDI_MAX_FUNCTION_ROW_KEYS]; + unsigned int num_function_row_keys; +}; + +ssize_t vivaldi_function_row_physmap_show(const struct vivaldi_data *data, + char *buf); + +#endif /* _VIVALDI_FMAP_H */ -- cgit v1.2.3 From ad515cada7dac3cdf5e1ad77a0ed696f5f34e0ab Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Wed, 9 Mar 2022 20:57:57 -0500 Subject: scsi: iscsi: Add helper functions to manage iscsi_cls_conn - iscsi_alloc_conn(): Allocate and initialize iscsi_cls_conn - iscsi_add_conn(): Expose iscsi_cls_conn to userspace via sysfs - iscsi_remove_conn(): Remove iscsi_cls_conn from sysfs Link: https://lore.kernel.org/r/20220310015759.3296841-2-haowenchao@huawei.com Reviewed-by: Mike Christie Signed-off-by: Wenchao Hao Signed-off-by: Wu Bo Signed-off-by: Martin K. Petersen --- include/scsi/scsi_transport_iscsi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 7a0d24d3b916..ad341835e847 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -442,6 +442,10 @@ extern struct iscsi_cls_session *iscsi_create_session(struct Scsi_Host *shost, unsigned int target_id); extern void iscsi_remove_session(struct iscsi_cls_session *session); extern void iscsi_free_session(struct iscsi_cls_session *session); +extern struct iscsi_cls_conn *iscsi_alloc_conn(struct iscsi_cls_session *sess, + int dd_size, uint32_t cid); +extern int iscsi_add_conn(struct iscsi_cls_conn *conn); +extern void iscsi_remove_conn(struct iscsi_cls_conn *conn); extern struct iscsi_cls_conn *iscsi_create_conn(struct iscsi_cls_session *sess, int dd_size, uint32_t cid); extern void iscsi_put_conn(struct iscsi_cls_conn *conn); -- cgit v1.2.3 From 7dae459f5e56a89ab01413ae055595c982713349 Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Wed, 9 Mar 2022 20:57:58 -0500 Subject: scsi: libiscsi: Add iscsi_cls_conn to sysfs after initialization iscsi_create_conn() exposed iscsi_cls_conn to sysfs prior to initialization of iscsi_conn's dd_data. When userspace tried to access an attribute such as the connect address, a NULL pointer dereference was observed. Do not add iscsi_cls_conn to sysfs until it has been initialized. Remove iscsi_create_conn() since it is no longer used. Link: https://lore.kernel.org/r/20220310015759.3296841-3-haowenchao@huawei.com Reviewed-by: Mike Christie Signed-off-by: Wenchao Hao Signed-off-by: Wu Bo Signed-off-by: Martin K. Petersen --- include/scsi/scsi_transport_iscsi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index ad341835e847..97e947962b52 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -446,8 +446,6 @@ extern struct iscsi_cls_conn *iscsi_alloc_conn(struct iscsi_cls_session *sess, int dd_size, uint32_t cid); extern int iscsi_add_conn(struct iscsi_cls_conn *conn); extern void iscsi_remove_conn(struct iscsi_cls_conn *conn); -extern struct iscsi_cls_conn *iscsi_create_conn(struct iscsi_cls_session *sess, - int dd_size, uint32_t cid); extern void iscsi_put_conn(struct iscsi_cls_conn *conn); extern void iscsi_get_conn(struct iscsi_cls_conn *conn); extern int iscsi_destroy_conn(struct iscsi_cls_conn *conn); -- cgit v1.2.3 From 8709c323091be019f76a49cf783052a5636aca85 Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Wed, 9 Mar 2022 20:57:59 -0500 Subject: scsi: libiscsi: Teardown iscsi_cls_conn gracefully Commit 1b8d0300a3e9 ("scsi: libiscsi: Fix UAF in iscsi_conn_get_param()/iscsi_conn_teardown()") fixed an UAF in iscsi_conn_get_param() and introduced 2 tmp_xxx varibles. We can gracefully fix this UAF with the help of device_del(). Calling iscsi_remove_conn() at the beginning of iscsi_conn_teardown would make userspace unable to see iscsi_cls_conn. This way we we can free memory safely. Remove iscsi_destroy_conn() since it is no longer used. Link: https://lore.kernel.org/r/20220310015759.3296841-4-haowenchao@huawei.com Reviewed-by: Mike Christie Signed-off-by: Wenchao Hao Signed-off-by: Wu Bo Signed-off-by: Martin K. Petersen --- include/scsi/scsi_transport_iscsi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 97e947962b52..38e4a67f5922 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -448,7 +448,6 @@ extern int iscsi_add_conn(struct iscsi_cls_conn *conn); extern void iscsi_remove_conn(struct iscsi_cls_conn *conn); extern void iscsi_put_conn(struct iscsi_cls_conn *conn); extern void iscsi_get_conn(struct iscsi_cls_conn *conn); -extern int iscsi_destroy_conn(struct iscsi_cls_conn *conn); extern void iscsi_unblock_session(struct iscsi_cls_session *session); extern void iscsi_block_session(struct iscsi_cls_session *session); extern struct iscsi_endpoint *iscsi_create_endpoint(int dd_size); -- cgit v1.2.3 From b44544fe0298ee2224960a31f795e317029e2a60 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:12 +0100 Subject: static_call: Avoid building empty .static_call_sites Without CONFIG_HAVE_STATIC_CALL_INLINE there's no point in creating the .static_call_sites section and it's related symbols. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.223798256@infradead.org --- include/asm-generic/vmlinux.lds.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 42f3866bca69..a41e62355160 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -404,6 +404,7 @@ KEEP(*(__jump_table)) \ __stop___jump_table = .; +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE #define STATIC_CALL_DATA \ . = ALIGN(8); \ __start_static_call_sites = .; \ @@ -412,6 +413,9 @@ __start_static_call_tramp_key = .; \ KEEP(*(.static_call_tramp_key)) \ __stop_static_call_tramp_key = .; +#else +#define STATIC_CALL_DATA +#endif /* * Allow architectures to handle ro_after_init data on their -- cgit v1.2.3 From c8c301abeae58ec756b8fcb2178a632bd3c9e284 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:18 +0100 Subject: x86/ibt: Add ANNOTATE_NOENDBR In order to have objtool warn about code references to !ENDBR instruction, we need an annotation to allow this for non-control-flow instances -- consider text range checks, text patching, or return trampolines etc. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.578968224@infradead.org --- include/linux/objtool.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index aca52db2f3f3..f797368820c8 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -77,6 +77,12 @@ struct unwind_hint { #define STACK_FRAME_NON_STANDARD_FP(func) #endif +#define ANNOTATE_NOENDBR \ + "986: \n\t" \ + ".pushsection .discard.noendbr\n\t" \ + _ASM_PTR " 986b\n\t" \ + ".popsection\n\t" + #else /* __ASSEMBLY__ */ /* @@ -129,6 +135,13 @@ struct unwind_hint { .popsection .endm +.macro ANNOTATE_NOENDBR +.Lhere_\@: + .pushsection .discard.noendbr + .quad .Lhere_\@ + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ @@ -139,12 +152,15 @@ struct unwind_hint { "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) +#define ANNOTATE_NOENDBR #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm +.macro ANNOTATE_NOENDBR +.endm #endif #endif /* CONFIG_STACK_VALIDATION */ -- cgit v1.2.3 From cc66bb91457827f62e2b6cb2518666820f0a6c48 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:32 +0100 Subject: x86/ibt,kprobes: Cure sym+0 equals fentry woes In order to allow kprobes to skip the ENDBR instructions at sym+0 for X86_KERNEL_IBT builds, change _kprobe_addr() to take an architecture callback to inspect the function at hand and modify the offset if needed. This streamlines the existing interface to cover more cases and require less hooks. Once PowerPC gets fully converted there will only be the one arch hook. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.405947704@infradead.org --- include/linux/kprobes.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 19b884353b15..9c28f7a0ef42 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -265,7 +265,6 @@ extern int arch_init_kprobes(void); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); extern int arch_populate_kprobe_blacklist(void); -extern bool arch_kprobe_on_func_entry(unsigned long offset); extern int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); @@ -384,6 +383,8 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) } kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset); +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry); + int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int register_kprobes(struct kprobe **kps, int num); -- cgit v1.2.3 From cb9010f87dcbcdbb51cc96b922c6260848cecbd1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:44 +0100 Subject: x86/ibt: Ensure module init/exit points have references Since the references to the module init/exit points only have external references, a module LTO run will consider them 'unused' and seal them, leading to an immediate fail on module load. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.113767246@infradead.org --- include/linux/cfi.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 879744aaa6e0..c6dfc1ed0626 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -34,8 +34,17 @@ static inline void cfi_module_remove(struct module *mod, unsigned long base_addr #else /* !CONFIG_CFI_CLANG */ -#define __CFI_ADDRESSABLE(fn, __attr) +#ifdef CONFIG_X86_KERNEL_IBT + +#define __CFI_ADDRESSABLE(fn, __attr) \ + const void *__cfi_jt_ ## fn __visible __attr = (void *)&fn + +#endif /* CONFIG_X86_KERNEL_IBT */ #endif /* CONFIG_CFI_CLANG */ +#ifndef __CFI_ADDRESSABLE +#define __CFI_ADDRESSABLE(fn, __attr) +#endif + #endif /* _LINUX_CFI_H */ -- cgit v1.2.3 From eae654f1c21216daa9fbb92591c0d9f5ae46cfc5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:48 +0100 Subject: exit: Mark do_group_exit() __noreturn vmlinux.o: warning: objtool: get_signal()+0x108: unreachable instruction 0000 000000000007f930 : ... 0103 7fa33: e8 00 00 00 00 call 7fa38 7fa34: R_X86_64_PLT32 do_group_exit-0x4 0108 7fa38: 41 8b 45 74 mov 0x74(%r13),%eax Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.351270711@infradead.org --- include/linux/sched/task.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index e84e54d1b490..719c9a6cac8d 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -79,7 +79,7 @@ static inline void exit_thread(struct task_struct *tsk) { } #endif -extern void do_group_exit(int); +extern __noreturn void do_group_exit(int); extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); -- cgit v1.2.3 From 105cd68596392cfe15056a891b0723609dcad247 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Mar 2022 17:58:35 +0100 Subject: x86: Mark __invalid_creds() __noreturn vmlinux.o: warning: objtool: ksys_unshare()+0x36c: unreachable instruction 0000 0000000000067040 : ... 0364 673a4: 4c 89 ef mov %r13,%rdi 0367 673a7: e8 00 00 00 00 call 673ac 673a8: R_X86_64_PLT32 __invalid_creds-0x4 036c 673ac: e9 28 ff ff ff jmp 672d9 0371 673b1: 41 bc f4 ff ff ff mov $0xfffffff4,%r12d 0377 673b7: e9 80 fd ff ff jmp 6713c Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Yi9gOW9f1GGwwUD6@hirez.programming.kicks-ass.net --- include/linux/cred.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index fcbc6885cc09..9ed9232af934 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -176,7 +176,7 @@ extern int set_cred_ucounts(struct cred *); * check for validity of credentials */ #ifdef CONFIG_DEBUG_CREDENTIALS -extern void __invalid_creds(const struct cred *, const char *, unsigned); +extern void __noreturn __invalid_creds(const struct cred *, const char *, unsigned); extern void __validate_process_creds(struct task_struct *, const char *, unsigned); -- cgit v1.2.3 From dca5da2abe406168b85f97e22109710ebe0bda08 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Mar 2022 18:05:52 +0100 Subject: x86,objtool: Move the ASM_REACHABLE annotation to objtool.h Because we need a variant for .S files too. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Yi9gOW9f1GGwwUD6@hirez.programming.kicks-ass.net --- include/linux/compiler.h | 7 ------- include/linux/objtool.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 0f7fd205ab7e..219aa5ddbc73 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -125,18 +125,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, }) #define annotate_unreachable() __annotate_unreachable(__COUNTER__) -#define ASM_REACHABLE \ - "998:\n\t" \ - ".pushsection .discard.reachable\n\t" \ - ".long 998b - .\n\t" \ - ".popsection\n\t" - /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(".rodata..c_jump_table") #else #define annotate_unreachable() -# define ASM_REACHABLE #define __annotate_jump_table #endif diff --git a/include/linux/objtool.h b/include/linux/objtool.h index f797368820c8..586d35720f13 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -83,6 +83,12 @@ struct unwind_hint { _ASM_PTR " 986b\n\t" \ ".popsection\n\t" +#define ASM_REACHABLE \ + "998:\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long 998b - .\n\t" \ + ".popsection\n\t" + #else /* __ASSEMBLY__ */ /* @@ -142,6 +148,13 @@ struct unwind_hint { .popsection .endm +.macro REACHABLE +.Lhere_\@: + .pushsection .discard.reachable + .long .Lhere_\@ - . + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ @@ -153,6 +166,7 @@ struct unwind_hint { #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) #define ANNOTATE_NOENDBR +#define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 @@ -161,6 +175,8 @@ struct unwind_hint { .endm .macro ANNOTATE_NOENDBR .endm +.macro REACHABLE +.endm #endif #endif /* CONFIG_STACK_VALIDATION */ -- cgit v1.2.3 From 2b3171c6fe0af24b5506e061525e08917a2f744a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 24 Feb 2022 12:54:58 +0100 Subject: mac80211: MBSSID beacon handling in AP mode Add new fields in struct beacon_data to store all MBSSID elements. Generate a beacon template which includes all MBSSID elements. Move CSA offset to reflect the MBSSID element length. Co-developed-by: Aloka Dixit Signed-off-by: Aloka Dixit Co-developed-by: John Crispin Signed-off-by: John Crispin Signed-off-by: Lorenzo Bianconi Tested-by: Money Wang Link: https://lore.kernel.org/r/5322db3c303f431adaf191ab31c45e151dde5465.1645702516.git.lorenzo@kernel.org [small cleanups] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b8e8c82b53aa..382ebb862ea8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4948,12 +4948,14 @@ void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets); * @cntdwn_counter_offs: array of IEEE80211_MAX_CNTDWN_COUNTERS_NUM offsets * to countdown counters. This array can contain zero values which * should be ignored. + * @mbssid_off: position of the multiple bssid element */ struct ieee80211_mutable_offsets { u16 tim_offset; u16 tim_length; u16 cntdwn_counter_offs[IEEE80211_MAX_CNTDWN_COUNTERS_NUM]; + u16 mbssid_off; }; /** -- cgit v1.2.3 From 5ad6b2bdaaea712486145fa5a78ec24d25289071 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:28 +0000 Subject: fs: Turn do_invalidatepage() into folio_invalidate() Take a folio instead of a page, fix the types of the offset & length, and export it to filesystems. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/mm.h | 3 --- include/linux/pagemap.h | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 213cc569b192..7808a7959066 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1939,9 +1939,6 @@ int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, struct page **pages); struct page *get_dump_page(unsigned long addr); -extern void do_invalidatepage(struct page *page, unsigned int offset, - unsigned int length); - bool folio_mark_dirty(struct folio *folio); bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 55a80d8f0e9c..4503d5baa252 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -893,6 +893,7 @@ static inline void cancel_dirty_page(struct page *page) } bool folio_clear_dirty_for_io(struct folio *folio); bool clear_page_dirty_for_io(struct page *page); +void folio_invalidate(struct folio *folio, size_t offset, size_t length); int __must_check folio_write_one(struct folio *folio); static inline int __must_check write_one_page(struct page *page) { -- cgit v1.2.3 From 128d1f8241d62ab014eef6dd4ef9bb977dbeadb2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:32 +0000 Subject: fs: Add invalidate_folio() aops method This is used in preference to invalidatepage, if defined. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5939e6694ada..bcdb613cd652 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -387,6 +387,7 @@ struct address_space_operations { /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); + void (*invalidate_folio) (struct folio *, size_t offset, size_t len); void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); -- cgit v1.2.3 From d82354f6b05fc3b35029b3f75ddbf41b82af3bc8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:33 +0000 Subject: iomap: Remove iomap_invalidatepage() Use iomap_invalidate_folio() in all the iomap-based filesystems and rename the iomap_invalidatepage tracepoint. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/iomap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 3bcbb264f83f..b76f0dd149fb 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -230,8 +230,6 @@ void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); int iomap_releasepage(struct page *page, gfp_t gfp_mask); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); -void iomap_invalidatepage(struct page *page, unsigned int offset, - unsigned int len); #ifdef CONFIG_MIGRATION int iomap_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode); -- cgit v1.2.3 From 7ba13abbd31ee9265e88d7dc029c0f786e665192 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:34 +0000 Subject: fs: Turn block_invalidatepage into block_invalidate_folio Remove special-casing of a NULL invalidatepage, since there is no more block_invalidatepage. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/buffer_head.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 79d465057889..9ee9d003d736 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -217,8 +217,7 @@ extern int buffer_heads_over_limit; * Generic address_space_operations implementations for buffer_head-backed * address_spaces. */ -void block_invalidatepage(struct page *page, unsigned int offset, - unsigned int length); +void block_invalidate_folio(struct folio *folio, size_t offset, size_t length); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); int __block_write_full_page(struct inode *inode, struct page *page, -- cgit v1.2.3 From 5660a8630dab61a28e07ec00c42bf605b182d725 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:35 +0000 Subject: fs: Remove noop_invalidatepage() We used to have to use noop_invalidatepage() to prevent block_invalidatepage() from being called, but that behaviour is now gone. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index bcdb613cd652..a40ea82248da 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3323,8 +3323,6 @@ extern int simple_rename(struct user_namespace *, struct inode *, extern void simple_recursive_removal(struct dentry *, void (*callback)(struct dentry *)); extern int noop_fsync(struct file *, loff_t, loff_t, int); -extern void noop_invalidatepage(struct page *page, unsigned int offset, - unsigned int length); extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); extern int simple_write_begin(struct file *file, struct address_space *mapping, -- cgit v1.2.3 From ccd16945dba091fdf1036d7711b9f6cbd287ae28 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:43 +0000 Subject: ext4: Convert invalidatepage to invalidate_folio Extensive changes, but fairly mechanical. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/jbd2.h | 4 ++-- include/linux/pagemap.h | 18 ++++++++++++++++++ include/trace/events/ext4.h | 30 +++++++++++++++--------------- 3 files changed, 35 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 9c3ada74ffb1..1b9d1e205a2f 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1530,8 +1530,8 @@ void jbd2_journal_set_triggers(struct buffer_head *, struct jbd2_buffer_trigger_type *type); extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); -extern int jbd2_journal_invalidatepage(journal_t *, - struct page *, unsigned int, unsigned int); +int jbd2_journal_invalidate_folio(journal_t *, struct folio *, + size_t offset, size_t length); extern int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page); extern int jbd2_journal_stop(handle_t *); extern int jbd2_journal_flush(journal_t *journal, unsigned int flags); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4503d5baa252..6a9617e9c6bc 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -422,6 +422,24 @@ static inline struct folio *filemap_get_folio(struct address_space *mapping, return __filemap_get_folio(mapping, index, 0, 0); } +/** + * filemap_lock_folio - Find and lock a folio. + * @mapping: The address_space to search. + * @index: The page index. + * + * Looks up the page cache entry at @mapping & @index. If a folio is + * present, it is returned locked with an increased refcount. + * + * Context: May sleep. + * Return: A folio or %NULL if there is no folio in the cache for this + * index. Will not return a shadow, swap or DAX entry. + */ +static inline struct folio *filemap_lock_folio(struct address_space *mapping, + pgoff_t index) +{ + return __filemap_get_folio(mapping, index, FGP_LOCK, 0); +} + /** * find_get_page - find and get a page reference * @mapping: the address_space to search diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 19e957b7f941..40cca0e5a811 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -597,44 +597,44 @@ DEFINE_EVENT(ext4__page_op, ext4_releasepage, TP_ARGS(page) ); -DECLARE_EVENT_CLASS(ext4_invalidatepage_op, - TP_PROTO(struct page *page, unsigned int offset, unsigned int length), +DECLARE_EVENT_CLASS(ext4_invalidate_folio_op, + TP_PROTO(struct folio *folio, size_t offset, size_t length), - TP_ARGS(page, offset, length), + TP_ARGS(folio, offset, length), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( pgoff_t, index ) - __field( unsigned int, offset ) - __field( unsigned int, length ) + __field( size_t, offset ) + __field( size_t, length ) ), TP_fast_assign( - __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->ino = page->mapping->host->i_ino; - __entry->index = page->index; + __entry->dev = folio->mapping->host->i_sb->s_dev; + __entry->ino = folio->mapping->host->i_ino; + __entry->index = folio->index; __entry->offset = offset; __entry->length = length; ), - TP_printk("dev %d,%d ino %lu page_index %lu offset %u length %u", + TP_printk("dev %d,%d ino %lu folio_index %lu offset %zu length %zu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->index, __entry->offset, __entry->length) ); -DEFINE_EVENT(ext4_invalidatepage_op, ext4_invalidatepage, - TP_PROTO(struct page *page, unsigned int offset, unsigned int length), +DEFINE_EVENT(ext4_invalidate_folio_op, ext4_invalidate_folio, + TP_PROTO(struct folio *folio, size_t offset, size_t length), - TP_ARGS(page, offset, length) + TP_ARGS(folio, offset, length) ); -DEFINE_EVENT(ext4_invalidatepage_op, ext4_journalled_invalidatepage, - TP_PROTO(struct page *page, unsigned int offset, unsigned int length), +DEFINE_EVENT(ext4_invalidate_folio_op, ext4_journalled_invalidate_folio, + TP_PROTO(struct folio *folio, size_t offset, size_t length), - TP_ARGS(page, offset, length) + TP_ARGS(folio, offset, length) ); TRACE_EVENT(ext4_discard_blocks, -- cgit v1.2.3 From 6d740c76ea86053208b20c41fb5ec1de07acb996 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:47 +0000 Subject: nfs: Convert from invalidatepage to invalidate_folio Print the folio index instead of the pointer, since this is more useful. We also don't need to use page_file_mapping() as we do not invalidate swapcache pages. Since this is the only caller of nfs_wb_page_cancel(), convert it to nfs_wb_folio_cancel(). Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/nfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 68f81d8d36de..784120cc217e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -583,7 +583,7 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned extern int nfs_sync_inode(struct inode *inode); extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page *page); -extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); +int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); extern void nfs_commit_free(struct nfs_commit_data *data); -- cgit v1.2.3 From f50015a596fa106bf642bd85fbf6e6b52cc913d0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:51 +0000 Subject: fs: Remove aops->invalidatepage With all users migrated to ->invalidate_folio, remove the old operation. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index a40ea82248da..af9ae091bd82 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -388,7 +388,6 @@ struct address_space_operations { /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); void (*invalidate_folio) (struct folio *, size_t offset, size_t len); - void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); -- cgit v1.2.3 From affa80e8c6a1df473694c2087259901872309cc4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:52 +0000 Subject: fs: Add aops->launder_folio Since the only difference between ->launder_page and ->launder_folio is the type of the pointer, these can safely use a union without affecting bisectability. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/fs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index af9ae091bd82..0af3075cdff2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -399,7 +399,10 @@ struct address_space_operations { struct page *, struct page *, enum migrate_mode); bool (*isolate_page)(struct page *, isolate_mode_t); void (*putback_page)(struct page *); - int (*launder_page) (struct page *); + union { + int (*launder_page) (struct page *); + int (*launder_folio) (struct folio *); + }; bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); void (*is_dirty_writeback) (struct page *, bool *, bool *); -- cgit v1.2.3 From 072acba6d08730beba5bad293c7ce6d0c4b0624c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:59 +0000 Subject: fs: Remove aops->launder_page With all users converted to ->launder_folio, remove ->launder_page. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/fs.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0af3075cdff2..055be40084f1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -399,10 +399,7 @@ struct address_space_operations { struct page *, struct page *, enum migrate_mode); bool (*isolate_page)(struct page *, isolate_mode_t); void (*putback_page)(struct page *); - union { - int (*launder_page) (struct page *); - int (*launder_folio) (struct folio *); - }; + int (*launder_folio)(struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); void (*is_dirty_writeback) (struct page *, bool *, bool *); -- cgit v1.2.3 From 6f31a5a261dbbe7bf7f585dfe81f8acd4b25ec3b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:22:00 +0000 Subject: fs: Add aops->dirty_folio This replaces ->set_page_dirty(). It returns a bool instead of an int and takes the address_space as a parameter instead of expecting the implementations to retrieve the address_space from the page. This is particularly important for filesystems which use FS_OPS for swap. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 055be40084f1..c3d5db8851ae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -369,6 +369,7 @@ struct address_space_operations { /* Set a page dirty. Return true if this dirtied it */ int (*set_page_dirty)(struct page *page); + bool (*dirty_folio)(struct address_space *, struct folio *); /* * Reads in the requested pages. Unlike ->readpage(), this is -- cgit v1.2.3 From 8fb72b4a76933ae6f86725cc8e4a8190ba84d755 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:22:01 +0000 Subject: fscache: Convert fscache_set_page_dirty() to fscache_dirty_folio() Convert all users of fscache_set_page_dirty to use fscache_dirty_folio. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/fscache.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 296c5f1d9f35..d44ff747a657 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -616,9 +616,11 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, } #if __fscache_available -extern int fscache_set_page_dirty(struct page *page, struct fscache_cookie *cookie); +bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio, + struct fscache_cookie *cookie); #else -#define fscache_set_page_dirty(PAGE, COOKIE) (__set_page_dirty_nobuffers((PAGE))) +#define fscache_dirty_folio(MAPPING, FOLIO, COOKIE) \ + filemap_dirty_folio(MAPPING, FOLIO) #endif /** @@ -626,7 +628,7 @@ extern int fscache_set_page_dirty(struct page *page, struct fscache_cookie *cook * @wbc: The writeback control * @cookie: The cookie referring to the cache object * - * Unpin the writeback resources pinned by fscache_set_page_dirty(). This is + * Unpin the writeback resources pinned by fscache_dirty_folio(). This is * intended to be called by the netfs's ->write_inode() method. */ static inline void fscache_unpin_writeback(struct writeback_control *wbc, -- cgit v1.2.3 From 7e63df00cf5e609ebbee5ffbc3df1900d8a4443c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:22:10 +0000 Subject: mm: Convert swap_set_page_dirty() to swap_dirty_folio() Straightforward conversion. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 1d38d9475c4d..65a37e555124 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -427,7 +427,7 @@ extern int swap_writepage(struct page *page, struct writeback_control *wbc); extern void end_swap_bio_write(struct bio *bio); extern int __swap_writepage(struct page *page, struct writeback_control *wbc, bio_end_io_t end_write_func); -extern int swap_set_page_dirty(struct page *page); +bool swap_dirty_folio(struct address_space *mapping, struct folio *folio); int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block); -- cgit v1.2.3 From 938d3480b92fa5e454b7734294f12a7b75126f09 Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Fri, 4 Mar 2022 16:11:42 +0800 Subject: bpf, sockmap: Fix memleak in sk_psock_queue_msg If tcp_bpf_sendmsg is running during a tear down operation we may enqueue data on the ingress msg queue while tear down is trying to free it. sk1 (redirect sk2) sk2 ------------------- --------------- tcp_bpf_sendmsg() tcp_bpf_send_verdict() tcp_bpf_sendmsg_redir() bpf_tcp_ingress() sock_map_close() lock_sock() lock_sock() ... blocking sk_psock_stop sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); release_sock(sk); lock_sock() sk_mem_charge() get_page() sk_psock_queue_msg() sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED); drop_sk_msg() release_sock() While drop_sk_msg(), the msg has charged memory form sk by sk_mem_charge and has sg pages need to put. To fix we use sk_msg_free() and then kfee() msg. This issue can cause the following info: WARNING: CPU: 0 PID: 9202 at net/core/stream.c:205 sk_stream_kill_queues+0xc8/0xe0 Call Trace: inet_csk_destroy_sock+0x55/0x110 tcp_rcv_state_process+0xe5f/0xe90 ? sk_filter_trim_cap+0x10d/0x230 ? tcp_v4_do_rcv+0x161/0x250 tcp_v4_do_rcv+0x161/0x250 tcp_v4_rcv+0xc3a/0xce0 ip_protocol_deliver_rcu+0x3d/0x230 ip_local_deliver_finish+0x54/0x60 ip_local_deliver+0xfd/0x110 ? ip_protocol_deliver_rcu+0x230/0x230 ip_rcv+0xd6/0x100 ? ip_local_deliver+0x110/0x110 __netif_receive_skb_one_core+0x85/0xa0 process_backlog+0xa4/0x160 __napi_poll+0x29/0x1b0 net_rx_action+0x287/0x300 __do_softirq+0xff/0x2fc do_softirq+0x79/0x90 WARNING: CPU: 0 PID: 531 at net/ipv4/af_inet.c:154 inet_sock_destruct+0x175/0x1b0 Call Trace: __sk_destruct+0x24/0x1f0 sk_psock_destroy+0x19b/0x1c0 process_one_work+0x1b3/0x3c0 ? process_one_work+0x3c0/0x3c0 worker_thread+0x30/0x350 ? process_one_work+0x3c0/0x3c0 kthread+0xe6/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 Fixes: 9635720b7c88 ("bpf, sockmap: Fix memleak on ingress msg enqueue") Signed-off-by: Wang Yufen Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20220304081145.2037182-2-wangyufen@huawei.com --- include/linux/skmsg.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index fdb5375f0562..c5a2d6f50f25 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -304,21 +304,16 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } -static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg) -{ - if (msg->skb) - sock_drop(psock->sk, msg->skb); - kfree(msg); -} - static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { spin_lock_bh(&psock->ingress_lock); if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) list_add_tail(&msg->list, &psock->ingress_msg); - else - drop_sk_msg(psock, msg); + else { + sk_msg_free(psock->sk, msg); + kfree(msg); + } spin_unlock_bh(&psock->ingress_lock); } -- cgit v1.2.3 From 23a9dbbe0faf124fc4c139615633b9d12a3a89ef Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 15 Mar 2022 18:34:06 +0300 Subject: NFSD: prevent integer overflow on 32 bit systems On a 32 bit system, the "len * sizeof(*p)" operation can have an integer overflow. Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index b519609af1d0..4417f667c757 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -731,6 +731,8 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; + if (len > SIZE_MAX / sizeof(*p)) + return -EBADMSG; p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) return -EBADMSG; -- cgit v1.2.3 From b0ae33a2d2fb6c55117b377ec4ae3f2c84eab6a2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 4 Mar 2022 16:19:55 +0100 Subject: usb: early: xhci-dbc: Remove duplicate keep parsing The generic earlyprintk= parsing already parses the optional ",keep", no need to duplicate that in the xdbc driver. Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220304152135.975568860@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/xhci-dbgp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/xhci-dbgp.h b/include/linux/usb/xhci-dbgp.h index 0a37f1283bf0..01fe768873f9 100644 --- a/include/linux/usb/xhci-dbgp.h +++ b/include/linux/usb/xhci-dbgp.h @@ -15,7 +15,7 @@ #define __LINUX_XHCI_DBGP_H #ifdef CONFIG_EARLY_PRINTK_USB_XDBC -int __init early_xdbc_parse_parameter(char *s); +int __init early_xdbc_parse_parameter(char *s, int keep_early); int __init early_xdbc_setup_hardware(void); void __init early_xdbc_register_console(void); #else -- cgit v1.2.3 From ff5812e00d5e7bdeae68784d99686eff67896931 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Tue, 8 Mar 2022 18:48:54 +0000 Subject: crypto: hisilicon/qm: Move the QM header to include/linux Since we are going to introduce VFIO PCI HiSilicon ACC driver for live migration in subsequent patches, move the ACC QM header file to a common include dir. Acked-by: Zhou Wang Acked-by: Longfang Liu Acked-by: Kai Ye Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20220308184902.2242-2-shameerali.kolothum.thodi@huawei.com Signed-off-by: Alex Williamson --- include/linux/hisi_acc_qm.h | 441 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 441 insertions(+) create mode 100644 include/linux/hisi_acc_qm.h (limited to 'include') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h new file mode 100644 index 000000000000..3068093229a5 --- /dev/null +++ b/include/linux/hisi_acc_qm.h @@ -0,0 +1,441 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 HiSilicon Limited. */ +#ifndef HISI_ACC_QM_H +#define HISI_ACC_QM_H + +#include +#include +#include +#include +#include + +#define QM_QNUM_V1 4096 +#define QM_QNUM_V2 1024 +#define QM_MAX_VFS_NUM_V2 63 + +/* qm user domain */ +#define QM_ARUSER_M_CFG_1 0x100088 +#define AXUSER_SNOOP_ENABLE BIT(30) +#define AXUSER_CMD_TYPE GENMASK(14, 12) +#define AXUSER_CMD_SMMU_NORMAL 1 +#define AXUSER_NS BIT(6) +#define AXUSER_NO BIT(5) +#define AXUSER_FP BIT(4) +#define AXUSER_SSV BIT(0) +#define AXUSER_BASE (AXUSER_SNOOP_ENABLE | \ + FIELD_PREP(AXUSER_CMD_TYPE, \ + AXUSER_CMD_SMMU_NORMAL) | \ + AXUSER_NS | AXUSER_NO | AXUSER_FP) +#define QM_ARUSER_M_CFG_ENABLE 0x100090 +#define ARUSER_M_CFG_ENABLE 0xfffffffe +#define QM_AWUSER_M_CFG_1 0x100098 +#define QM_AWUSER_M_CFG_ENABLE 0x1000a0 +#define AWUSER_M_CFG_ENABLE 0xfffffffe +#define QM_WUSER_M_CFG_ENABLE 0x1000a8 +#define WUSER_M_CFG_ENABLE 0xffffffff + +/* qm cache */ +#define QM_CACHE_CTL 0x100050 +#define SQC_CACHE_ENABLE BIT(0) +#define CQC_CACHE_ENABLE BIT(1) +#define SQC_CACHE_WB_ENABLE BIT(4) +#define SQC_CACHE_WB_THRD GENMASK(10, 5) +#define CQC_CACHE_WB_ENABLE BIT(11) +#define CQC_CACHE_WB_THRD GENMASK(17, 12) +#define QM_AXI_M_CFG 0x1000ac +#define AXI_M_CFG 0xffff +#define QM_AXI_M_CFG_ENABLE 0x1000b0 +#define AM_CFG_SINGLE_PORT_MAX_TRANS 0x300014 +#define AXI_M_CFG_ENABLE 0xffffffff +#define QM_PEH_AXUSER_CFG 0x1000cc +#define QM_PEH_AXUSER_CFG_ENABLE 0x1000d0 +#define PEH_AXUSER_CFG 0x401001 +#define PEH_AXUSER_CFG_ENABLE 0xffffffff + +#define QM_AXI_RRESP BIT(0) +#define QM_AXI_BRESP BIT(1) +#define QM_ECC_MBIT BIT(2) +#define QM_ECC_1BIT BIT(3) +#define QM_ACC_GET_TASK_TIMEOUT BIT(4) +#define QM_ACC_DO_TASK_TIMEOUT BIT(5) +#define QM_ACC_WB_NOT_READY_TIMEOUT BIT(6) +#define QM_SQ_CQ_VF_INVALID BIT(7) +#define QM_CQ_VF_INVALID BIT(8) +#define QM_SQ_VF_INVALID BIT(9) +#define QM_DB_TIMEOUT BIT(10) +#define QM_OF_FIFO_OF BIT(11) +#define QM_DB_RANDOM_INVALID BIT(12) +#define QM_MAILBOX_TIMEOUT BIT(13) +#define QM_FLR_TIMEOUT BIT(14) + +#define QM_BASE_NFE (QM_AXI_RRESP | QM_AXI_BRESP | QM_ECC_MBIT | \ + QM_ACC_GET_TASK_TIMEOUT | QM_DB_TIMEOUT | \ + QM_OF_FIFO_OF | QM_DB_RANDOM_INVALID | \ + QM_MAILBOX_TIMEOUT | QM_FLR_TIMEOUT) +#define QM_BASE_CE QM_ECC_1BIT + +#define QM_Q_DEPTH 1024 +#define QM_MIN_QNUM 2 +#define HISI_ACC_SGL_SGE_NR_MAX 255 +#define QM_SHAPER_CFG 0x100164 +#define QM_SHAPER_ENABLE BIT(30) +#define QM_SHAPER_TYPE1_OFFSET 10 + +/* page number for queue file region */ +#define QM_DOORBELL_PAGE_NR 1 + +/* uacce mode of the driver */ +#define UACCE_MODE_NOUACCE 0 /* don't use uacce */ +#define UACCE_MODE_SVA 1 /* use uacce sva mode */ +#define UACCE_MODE_DESC "0(default) means only register to crypto, 1 means both register to crypto and uacce" + +enum qm_stop_reason { + QM_NORMAL, + QM_SOFT_RESET, + QM_FLR, +}; + +enum qm_state { + QM_INIT = 0, + QM_START, + QM_CLOSE, + QM_STOP, +}; + +enum qp_state { + QP_INIT = 1, + QP_START, + QP_STOP, + QP_CLOSE, +}; + +enum qm_hw_ver { + QM_HW_UNKNOWN = -1, + QM_HW_V1 = 0x20, + QM_HW_V2 = 0x21, + QM_HW_V3 = 0x30, +}; + +enum qm_fun_type { + QM_HW_PF, + QM_HW_VF, +}; + +enum qm_debug_file { + CURRENT_QM, + CURRENT_Q, + CLEAR_ENABLE, + DEBUG_FILE_NUM, +}; + +struct qm_dfx { + atomic64_t err_irq_cnt; + atomic64_t aeq_irq_cnt; + atomic64_t abnormal_irq_cnt; + atomic64_t create_qp_err_cnt; + atomic64_t mb_err_cnt; +}; + +struct debugfs_file { + enum qm_debug_file index; + struct mutex lock; + struct qm_debug *debug; +}; + +struct qm_debug { + u32 curr_qm_qp_num; + u32 sqe_mask_offset; + u32 sqe_mask_len; + struct qm_dfx dfx; + struct dentry *debug_root; + struct dentry *qm_d; + struct debugfs_file files[DEBUG_FILE_NUM]; +}; + +struct qm_shaper_factor { + u32 func_qos; + u64 cir_b; + u64 cir_u; + u64 cir_s; + u64 cbs_s; +}; + +struct qm_dma { + void *va; + dma_addr_t dma; + size_t size; +}; + +struct hisi_qm_status { + u32 eq_head; + bool eqc_phase; + u32 aeq_head; + bool aeqc_phase; + atomic_t flags; + int stop_reason; +}; + +struct hisi_qm; + +struct hisi_qm_err_info { + char *acpi_rst; + u32 msi_wr_port; + u32 ecc_2bits_mask; + u32 dev_ce_mask; + u32 ce; + u32 nfe; + u32 fe; +}; + +struct hisi_qm_err_status { + u32 is_qm_ecc_mbit; + u32 is_dev_ecc_mbit; +}; + +struct hisi_qm_err_ini { + int (*hw_init)(struct hisi_qm *qm); + void (*hw_err_enable)(struct hisi_qm *qm); + void (*hw_err_disable)(struct hisi_qm *qm); + u32 (*get_dev_hw_err_status)(struct hisi_qm *qm); + void (*clear_dev_hw_err_status)(struct hisi_qm *qm, u32 err_sts); + void (*open_axi_master_ooo)(struct hisi_qm *qm); + void (*close_axi_master_ooo)(struct hisi_qm *qm); + void (*open_sva_prefetch)(struct hisi_qm *qm); + void (*close_sva_prefetch)(struct hisi_qm *qm); + void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts); + void (*err_info_init)(struct hisi_qm *qm); +}; + +struct hisi_qm_list { + struct mutex lock; + struct list_head list; + int (*register_to_crypto)(struct hisi_qm *qm); + void (*unregister_from_crypto)(struct hisi_qm *qm); +}; + +struct hisi_qm { + enum qm_hw_ver ver; + enum qm_fun_type fun_type; + const char *dev_name; + struct pci_dev *pdev; + void __iomem *io_base; + void __iomem *db_io_base; + u32 sqe_size; + u32 qp_base; + u32 qp_num; + u32 qp_in_used; + u32 ctrl_qp_num; + u32 max_qp_num; + u32 vfs_num; + u32 db_interval; + struct list_head list; + struct hisi_qm_list *qm_list; + + struct qm_dma qdma; + struct qm_sqc *sqc; + struct qm_cqc *cqc; + struct qm_eqe *eqe; + struct qm_aeqe *aeqe; + dma_addr_t sqc_dma; + dma_addr_t cqc_dma; + dma_addr_t eqe_dma; + dma_addr_t aeqe_dma; + + struct hisi_qm_status status; + const struct hisi_qm_err_ini *err_ini; + struct hisi_qm_err_info err_info; + struct hisi_qm_err_status err_status; + unsigned long misc_ctl; /* driver removing and reset sched */ + + struct rw_semaphore qps_lock; + struct idr qp_idr; + struct hisi_qp *qp_array; + + struct mutex mailbox_lock; + + const struct hisi_qm_hw_ops *ops; + + struct qm_debug debug; + + u32 error_mask; + + struct workqueue_struct *wq; + struct work_struct work; + struct work_struct rst_work; + struct work_struct cmd_process; + + const char *algs; + bool use_sva; + bool is_frozen; + + /* doorbell isolation enable */ + bool use_db_isolation; + resource_size_t phys_base; + resource_size_t db_phys_base; + struct uacce_device *uacce; + int mode; + struct qm_shaper_factor *factor; + u32 mb_qos; + u32 type_rate; +}; + +struct hisi_qp_status { + atomic_t used; + u16 sq_tail; + u16 cq_head; + bool cqc_phase; + atomic_t flags; +}; + +struct hisi_qp_ops { + int (*fill_sqe)(void *sqe, void *q_parm, void *d_parm); +}; + +struct hisi_qp { + u32 qp_id; + u8 alg_type; + u8 req_type; + + struct qm_dma qdma; + void *sqe; + struct qm_cqe *cqe; + dma_addr_t sqe_dma; + dma_addr_t cqe_dma; + + struct hisi_qp_status qp_status; + struct hisi_qp_ops *hw_ops; + void *qp_ctx; + void (*req_cb)(struct hisi_qp *qp, void *data); + void (*event_cb)(struct hisi_qp *qp); + + struct hisi_qm *qm; + bool is_resetting; + bool is_in_kernel; + u16 pasid; + struct uacce_queue *uacce_q; +}; + +static inline int q_num_set(const char *val, const struct kernel_param *kp, + unsigned int device) +{ + struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, + device, NULL); + u32 n, q_num; + int ret; + + if (!val) + return -EINVAL; + + if (!pdev) { + q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2); + pr_info("No device found currently, suppose queue number is %u\n", + q_num); + } else { + if (pdev->revision == QM_HW_V1) + q_num = QM_QNUM_V1; + else + q_num = QM_QNUM_V2; + } + + ret = kstrtou32(val, 10, &n); + if (ret || n < QM_MIN_QNUM || n > q_num) + return -EINVAL; + + return param_set_int(val, kp); +} + +static inline int vfs_num_set(const char *val, const struct kernel_param *kp) +{ + u32 n; + int ret; + + if (!val) + return -EINVAL; + + ret = kstrtou32(val, 10, &n); + if (ret < 0) + return ret; + + if (n > QM_MAX_VFS_NUM_V2) + return -EINVAL; + + return param_set_int(val, kp); +} + +static inline int mode_set(const char *val, const struct kernel_param *kp) +{ + u32 n; + int ret; + + if (!val) + return -EINVAL; + + ret = kstrtou32(val, 10, &n); + if (ret != 0 || (n != UACCE_MODE_SVA && + n != UACCE_MODE_NOUACCE)) + return -EINVAL; + + return param_set_int(val, kp); +} + +static inline int uacce_mode_set(const char *val, const struct kernel_param *kp) +{ + return mode_set(val, kp); +} + +static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list) +{ + INIT_LIST_HEAD(&qm_list->list); + mutex_init(&qm_list->lock); +} + +int hisi_qm_init(struct hisi_qm *qm); +void hisi_qm_uninit(struct hisi_qm *qm); +int hisi_qm_start(struct hisi_qm *qm); +int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r); +struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type); +int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg); +int hisi_qm_stop_qp(struct hisi_qp *qp); +void hisi_qm_release_qp(struct hisi_qp *qp); +int hisi_qp_send(struct hisi_qp *qp, const void *msg); +int hisi_qm_get_free_qp_num(struct hisi_qm *qm); +int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number); +void hisi_qm_debug_init(struct hisi_qm *qm); +enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev); +void hisi_qm_debug_regs_clear(struct hisi_qm *qm); +int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs); +int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen); +int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs); +void hisi_qm_dev_err_init(struct hisi_qm *qm); +void hisi_qm_dev_err_uninit(struct hisi_qm *qm); +pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, + pci_channel_state_t state); +pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev); +void hisi_qm_reset_prepare(struct pci_dev *pdev); +void hisi_qm_reset_done(struct pci_dev *pdev); + +struct hisi_acc_sgl_pool; +struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, + struct scatterlist *sgl, struct hisi_acc_sgl_pool *pool, + u32 index, dma_addr_t *hw_sgl_dma); +void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl, + struct hisi_acc_hw_sgl *hw_sgl); +struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, + u32 count, u32 sge_nr); +void hisi_acc_free_sgl_pool(struct device *dev, + struct hisi_acc_sgl_pool *pool); +int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, + u8 alg_type, int node, struct hisi_qp **qps); +void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num); +void hisi_qm_dev_shutdown(struct pci_dev *pdev); +void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +int hisi_qm_resume(struct device *dev); +int hisi_qm_suspend(struct device *dev); +void hisi_qm_pm_uninit(struct hisi_qm *qm); +void hisi_qm_pm_init(struct hisi_qm *qm); +int hisi_qm_get_dfx_access(struct hisi_qm *qm); +void hisi_qm_put_dfx_access(struct hisi_qm *qm); +void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); +#endif -- cgit v1.2.3 From b4b084d7133284e29777ca445e001186259602d4 Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Tue, 8 Mar 2022 18:48:55 +0000 Subject: crypto: hisilicon/qm: Move few definitions to common header Move Doorbell and Mailbox definitions to common header file. Also export QM mailbox functions. This will be useful when we introduce VFIO PCI HiSilicon ACC live migration driver. Signed-off-by: Longfang Liu Acked-by: Zhou Wang Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20220308184902.2242-3-shameerali.kolothum.thodi@huawei.com Signed-off-by: Alex Williamson --- include/linux/hisi_acc_qm.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 3068093229a5..6a6477c34666 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -34,6 +34,40 @@ #define QM_WUSER_M_CFG_ENABLE 0x1000a8 #define WUSER_M_CFG_ENABLE 0xffffffff +/* mailbox */ +#define QM_MB_CMD_SQC 0x0 +#define QM_MB_CMD_CQC 0x1 +#define QM_MB_CMD_EQC 0x2 +#define QM_MB_CMD_AEQC 0x3 +#define QM_MB_CMD_SQC_BT 0x4 +#define QM_MB_CMD_CQC_BT 0x5 +#define QM_MB_CMD_SQC_VFT_V2 0x6 +#define QM_MB_CMD_STOP_QP 0x8 +#define QM_MB_CMD_SRC 0xc +#define QM_MB_CMD_DST 0xd + +#define QM_MB_CMD_SEND_BASE 0x300 +#define QM_MB_EVENT_SHIFT 8 +#define QM_MB_BUSY_SHIFT 13 +#define QM_MB_OP_SHIFT 14 +#define QM_MB_CMD_DATA_ADDR_L 0x304 +#define QM_MB_CMD_DATA_ADDR_H 0x308 +#define QM_MB_MAX_WAIT_CNT 6000 + +/* doorbell */ +#define QM_DOORBELL_CMD_SQ 0 +#define QM_DOORBELL_CMD_CQ 1 +#define QM_DOORBELL_CMD_EQ 2 +#define QM_DOORBELL_CMD_AEQ 3 + +#define QM_DOORBELL_SQ_CQ_BASE_V2 0x1000 +#define QM_DOORBELL_EQ_AEQ_BASE_V2 0x2000 +#define QM_QP_MAX_NUM_SHIFT 11 +#define QM_DB_CMD_SHIFT_V2 12 +#define QM_DB_RAND_SHIFT_V2 16 +#define QM_DB_INDEX_SHIFT_V2 32 +#define QM_DB_PRIORITY_SHIFT_V2 48 + /* qm cache */ #define QM_CACHE_CTL 0x100050 #define SQC_CACHE_ENABLE BIT(0) @@ -414,6 +448,10 @@ pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev); void hisi_qm_reset_prepare(struct pci_dev *pdev); void hisi_qm_reset_done(struct pci_dev *pdev); +int hisi_qm_wait_mb_ready(struct hisi_qm *qm); +int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, + bool op); + struct hisi_acc_sgl_pool; struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, struct scatterlist *sgl, struct hisi_acc_sgl_pool *pool, -- cgit v1.2.3 From fae74feacd2dfb6172c4862fc760f969a991c06f Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Tue, 8 Mar 2022 18:48:56 +0000 Subject: hisi_acc_qm: Move VF PCI device IDs to common header Move the PCI Device IDs of HiSilicon ACC VF devices to a common header and also use a uniform naming convention. This will be useful when we introduce the vfio PCI HiSilicon ACC live migration driver in subsequent patches. Cc: Bjorn Helgaas Acked-by: Zhou Wang Acked-by: Longfang Liu Acked-by: Kai Ye Signed-off-by: Shameer Kolothum Acked-by: Bjorn Helgaas # pci_ids.h Link: https://lore.kernel.org/r/20220308184902.2242-4-shameerali.kolothum.thodi@huawei.com Signed-off-by: Alex Williamson --- include/linux/pci_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index aad54c666407..31dee2b65a62 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2529,6 +2529,9 @@ #define PCI_DEVICE_ID_KORENIX_JETCARDF3 0x17ff #define PCI_VENDOR_ID_HUAWEI 0x19e5 +#define PCI_DEVICE_ID_HUAWEI_ZIP_VF 0xa251 +#define PCI_DEVICE_ID_HUAWEI_SEC_VF 0xa256 +#define PCI_DEVICE_ID_HUAWEI_HPRE_VF 0xa259 #define PCI_VENDOR_ID_NETRONOME 0x19ee #define PCI_DEVICE_ID_NETRONOME_NFP4000 0x4000 -- cgit v1.2.3 From 442fbc099b839551f8576723da22c1269cc695ce Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Tue, 8 Mar 2022 18:48:59 +0000 Subject: hisi_acc_vfio_pci: Add helper to retrieve the struct pci_driver struct pci_driver pointer is an input into the pci_iov_get_pf_drvdata(). Introduce helpers to retrieve the ACC PF dev struct pci_driver pointers as we use this in ACC vfio migration driver. Acked-by: Zhou Wang Acked-by: Kai Ye Acked-by: Longfang Liu Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20220308184902.2242-7-shameerali.kolothum.thodi@huawei.com Signed-off-by: Alex Williamson --- include/linux/hisi_acc_qm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 6a6477c34666..00f2a4db8723 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -476,4 +476,9 @@ void hisi_qm_pm_init(struct hisi_qm *qm); int hisi_qm_get_dfx_access(struct hisi_qm *qm); void hisi_qm_put_dfx_access(struct hisi_qm *qm); void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); + +/* Used by VFIO ACC live migration driver */ +struct pci_driver *hisi_sec_get_pf_driver(void); +struct pci_driver *hisi_hpre_get_pf_driver(void); +struct pci_driver *hisi_zip_get_pf_driver(void); #endif -- cgit v1.2.3 From 1e459b25081d4f939b8a1fb4c71dab0cec8f974a Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Tue, 8 Mar 2022 18:49:00 +0000 Subject: crypto: hisilicon/qm: Set the VF QM state register We use VF QM state register to record the status of the QM configuration state. This will be used in the ACC migration driver to determine whether we can safely save and restore the QM data. Signed-off-by: Longfang Liu Acked-by: Zhou Wang Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20220308184902.2242-8-shameerali.kolothum.thodi@huawei.com Signed-off-by: Alex Williamson --- include/linux/hisi_acc_qm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 00f2a4db8723..177f7b7cd414 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -67,6 +67,7 @@ #define QM_DB_RAND_SHIFT_V2 16 #define QM_DB_INDEX_SHIFT_V2 32 #define QM_DB_PRIORITY_SHIFT_V2 48 +#define QM_VF_STATE 0x60 /* qm cache */ #define QM_CACHE_CTL 0x100050 @@ -162,6 +163,11 @@ enum qm_debug_file { DEBUG_FILE_NUM, }; +enum qm_vf_state { + QM_READY = 0, + QM_NOT_READY, +}; + struct qm_dfx { atomic64_t err_irq_cnt; atomic64_t aeq_irq_cnt; -- cgit v1.2.3 From 8126b1c73108bc691f5643df19071a59a69d0bc6 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 14 Mar 2022 19:59:53 +0100 Subject: pstore: Don't use semaphores in always-atomic-context code pstore_dump() is *always* invoked in atomic context (nowadays in an RCU read-side critical section, before that under a spinlock). It doesn't make sense to try to use semaphores here. This is mostly a revert of commit ea84b580b955 ("pstore: Convert buf_lock to semaphore"), except that two parts aren't restored back exactly as they were: - keep the lock initialization in pstore_register - in efi_pstore_write(), always set the "block" flag to false - omit "is_locked", that was unnecessary since commit 959217c84c27 ("pstore: Actually give up during locking failure") - fix the bailout message The actual problem that the buggy commit was trying to address may have been that the use of preemptible() in efi_pstore_write() was wrong - it only looks at preempt_count() and the state of IRQs, but __rcu_read_lock() doesn't touch either of those under CONFIG_PREEMPT_RCU. (Sidenote: CONFIG_PREEMPT_RCU means that the scheduler can preempt tasks in RCU read-side critical sections, but you're not allowed to actively block/reschedule.) Lockdep probably never caught the problem because it's very rare that you actually hit the contended case, so lockdep always just sees the down_trylock(), not the down_interruptible(), and so it can't tell that there's a problem. Fixes: ea84b580b955 ("pstore: Convert buf_lock to semaphore") Cc: stable@vger.kernel.org Acked-by: Sebastian Andrzej Siewior Signed-off-by: Jann Horn Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220314185953.2068993-1-jannh@google.com --- include/linux/pstore.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index eb93a54cff31..e97a8188f0fd 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include @@ -87,7 +87,7 @@ struct pstore_record { * @owner: module which is responsible for this backend driver * @name: name of the backend driver * - * @buf_lock: semaphore to serialize access to @buf + * @buf_lock: spinlock to serialize access to @buf * @buf: preallocated crash dump buffer * @bufsize: size of @buf available for crash dump bytes (must match * smallest number of bytes available for writing to a @@ -178,7 +178,7 @@ struct pstore_info { struct module *owner; const char *name; - struct semaphore buf_lock; + spinlock_t buf_lock; char *buf; size_t bufsize; -- cgit v1.2.3 From 65722ff6181aa52c3d5b0929004af22a3a63e148 Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Tue, 8 Mar 2022 14:00:50 -0500 Subject: drm/amdkfd: CRIU export dmabuf handles for GTT BOs Export dmabuf handles for GTT BOs so that their contents can be accessed using SDMA during checkpoint/restore. v2: Squash in fix from David to set dmabuf handle to invalid for BOs that cannot be accessed using SDMA during checkpoint/restore. Signed-off-by: David Yat Sin Reviewed-by : Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index b40687bf1014..42975e940758 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -33,9 +33,10 @@ * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API * - 1.7 - Checkpoint Restore (CRIU) API + * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 7 +#define KFD_IOCTL_MINOR_VERSION 8 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -195,6 +196,8 @@ struct kfd_ioctl_dbg_wave_control_args { __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ }; +#define KFD_INVALID_FD 0xffffffff + /* Matching HSA_EVENTTYPE */ #define KFD_IOC_EVENT_SIGNAL 0 #define KFD_IOC_EVENT_NODECHANGE 1 -- cgit v1.2.3 From 8850c3eae2c7a68ed901432a456ea70bbfef65a4 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 3 Feb 2022 10:56:16 +0200 Subject: clk: ti: Drop legacy compatibility clocks for am3 We no longer have users for the compatibility clocks and we can drop them. These are old duplicate clocks for what we using. Cc: devicetree@vger.kernel.org Cc: Rob Herring Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20220203085618.16043-2-tony@atomide.com Acked-by: Rob Herring Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/am3.h | 93 ----------------------------------------- 1 file changed, 93 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/am3.h b/include/dt-bindings/clock/am3.h index 894951541276..dfbad5c87933 100644 --- a/include/dt-bindings/clock/am3.h +++ b/include/dt-bindings/clock/am3.h @@ -8,99 +8,6 @@ #define AM3_CLKCTRL_OFFSET 0x0 #define AM3_CLKCTRL_INDEX(offset) ((offset) - AM3_CLKCTRL_OFFSET) -/* XXX: Compatibility part begin, remove this once compatibility support is no longer needed */ - -/* l4_per clocks */ -#define AM3_L4_PER_CLKCTRL_OFFSET 0x14 -#define AM3_L4_PER_CLKCTRL_INDEX(offset) ((offset) - AM3_L4_PER_CLKCTRL_OFFSET) -#define AM3_CPGMAC0_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x14) -#define AM3_LCDC_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x18) -#define AM3_USB_OTG_HS_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x1c) -#define AM3_TPTC0_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x24) -#define AM3_EMIF_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x28) -#define AM3_OCMCRAM_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x2c) -#define AM3_GPMC_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x30) -#define AM3_MCASP0_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x34) -#define AM3_UART6_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x38) -#define AM3_MMC1_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x3c) -#define AM3_ELM_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x40) -#define AM3_I2C3_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x44) -#define AM3_I2C2_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x48) -#define AM3_SPI0_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x4c) -#define AM3_SPI1_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x50) -#define AM3_L4_LS_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x60) -#define AM3_MCASP1_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x68) -#define AM3_UART2_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x6c) -#define AM3_UART3_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x70) -#define AM3_UART4_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x74) -#define AM3_UART5_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x78) -#define AM3_TIMER7_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x7c) -#define AM3_TIMER2_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x80) -#define AM3_TIMER3_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x84) -#define AM3_TIMER4_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x88) -#define AM3_RNG_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x90) -#define AM3_AES_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x94) -#define AM3_SHAM_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xa0) -#define AM3_GPIO2_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xac) -#define AM3_GPIO3_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xb0) -#define AM3_GPIO4_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xb4) -#define AM3_TPCC_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xbc) -#define AM3_D_CAN0_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xc0) -#define AM3_D_CAN1_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xc4) -#define AM3_EPWMSS1_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xcc) -#define AM3_EPWMSS0_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xd4) -#define AM3_EPWMSS2_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xd8) -#define AM3_L3_INSTR_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xdc) -#define AM3_L3_MAIN_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xe0) -#define AM3_PRUSS_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xe8) -#define AM3_TIMER5_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xec) -#define AM3_TIMER6_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xf0) -#define AM3_MMC2_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xf4) -#define AM3_MMC3_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xf8) -#define AM3_TPTC1_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0xfc) -#define AM3_TPTC2_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x100) -#define AM3_SPINLOCK_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x10c) -#define AM3_MAILBOX_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x110) -#define AM3_L4_HS_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x120) -#define AM3_OCPWP_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x130) -#define AM3_CLKDIV32K_CLKCTRL AM3_L4_PER_CLKCTRL_INDEX(0x14c) - -/* l4_wkup clocks */ -#define AM3_L4_WKUP_CLKCTRL_OFFSET 0x4 -#define AM3_L4_WKUP_CLKCTRL_INDEX(offset) ((offset) - AM3_L4_WKUP_CLKCTRL_OFFSET) -#define AM3_CONTROL_CLKCTRL AM3_L4_WKUP_CLKCTRL_INDEX(0x4) -#define AM3_GPIO1_CLKCTRL AM3_L4_WKUP_CLKCTRL_INDEX(0x8) -#define AM3_L4_WKUP_CLKCTRL AM3_L4_WKUP_CLKCTRL_INDEX(0xc) -#define AM3_DEBUGSS_CLKCTRL AM3_L4_WKUP_CLKCTRL_INDEX(0x14) -#define AM3_WKUP_M3_CLKCTRL AM3_L4_WKUP_CLKCTRL_INDEX(0xb0) -#define AM3_UART1_CLKCTRL AM3_L4_WKUP_CLKCTRL_INDEX(0xb4) -#define AM3_I2C1_CLKCTRL AM3_L4_WKUP_CLKCTRL_INDEX(0xb8) -#define AM3_ADC_TSC_CLKCTRL AM3_L4_WKUP_CLKCTRL_INDEX(0xbc) -#define AM3_SMARTREFLEX0_CLKCTRL AM3_L4_WKUP_CLKCTRL_INDEX(0xc0) -#define AM3_TIMER1_CLKCTRL AM3_L4_WKUP_CLKCTRL_INDEX(0xc4) -#define AM3_SMARTREFLEX1_CLKCTRL AM3_L4_WKUP_CLKCTRL_INDEX(0xc8) -#define AM3_WD_TIMER2_CLKCTRL AM3_L4_WKUP_CLKCTRL_INDEX(0xd4) - -/* mpu clocks */ -#define AM3_MPU_CLKCTRL_OFFSET 0x4 -#define AM3_MPU_CLKCTRL_INDEX(offset) ((offset) - AM3_MPU_CLKCTRL_OFFSET) -#define AM3_MPU_CLKCTRL AM3_MPU_CLKCTRL_INDEX(0x4) - -/* l4_rtc clocks */ -#define AM3_RTC_CLKCTRL AM3_CLKCTRL_INDEX(0x0) - -/* gfx_l3 clocks */ -#define AM3_GFX_L3_CLKCTRL_OFFSET 0x4 -#define AM3_GFX_L3_CLKCTRL_INDEX(offset) ((offset) - AM3_GFX_L3_CLKCTRL_OFFSET) -#define AM3_GFX_CLKCTRL AM3_GFX_L3_CLKCTRL_INDEX(0x4) - -/* l4_cefuse clocks */ -#define AM3_L4_CEFUSE_CLKCTRL_OFFSET 0x20 -#define AM3_L4_CEFUSE_CLKCTRL_INDEX(offset) ((offset) - AM3_L4_CEFUSE_CLKCTRL_OFFSET) -#define AM3_CEFUSE_CLKCTRL AM3_L4_CEFUSE_CLKCTRL_INDEX(0x20) - -/* XXX: Compatibility part end */ - /* l4ls clocks */ #define AM3_L4LS_CLKCTRL_OFFSET 0x38 #define AM3_L4LS_CLKCTRL_INDEX(offset) ((offset) - AM3_L4LS_CLKCTRL_OFFSET) -- cgit v1.2.3 From e65eb2efc6175392f0e97f515e6e5b179e181cd8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 3 Feb 2022 10:56:17 +0200 Subject: clk: ti: Drop legacy compatibility clocks for am4 We no longer have users for the compatibility clocks and we can drop them. These are old duplicate clocks for what we using. Cc: devicetree@vger.kernel.org Cc: Rob Herring Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20220203085618.16043-3-tony@atomide.com Acked-by: Rob Herring Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/am4.h | 98 ----------------------------------------- 1 file changed, 98 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/am4.h b/include/dt-bindings/clock/am4.h index 4be6c5961f34..a65b082e9cff 100644 --- a/include/dt-bindings/clock/am4.h +++ b/include/dt-bindings/clock/am4.h @@ -8,104 +8,6 @@ #define AM4_CLKCTRL_OFFSET 0x20 #define AM4_CLKCTRL_INDEX(offset) ((offset) - AM4_CLKCTRL_OFFSET) -/* XXX: Compatibility part begin, remove this once compatibility support is no longer needed */ - -/* l4_wkup clocks */ -#define AM4_ADC_TSC_CLKCTRL AM4_CLKCTRL_INDEX(0x120) -#define AM4_L4_WKUP_CLKCTRL AM4_CLKCTRL_INDEX(0x220) -#define AM4_WKUP_M3_CLKCTRL AM4_CLKCTRL_INDEX(0x228) -#define AM4_COUNTER_32K_CLKCTRL AM4_CLKCTRL_INDEX(0x230) -#define AM4_TIMER1_CLKCTRL AM4_CLKCTRL_INDEX(0x328) -#define AM4_WD_TIMER2_CLKCTRL AM4_CLKCTRL_INDEX(0x338) -#define AM4_I2C1_CLKCTRL AM4_CLKCTRL_INDEX(0x340) -#define AM4_UART1_CLKCTRL AM4_CLKCTRL_INDEX(0x348) -#define AM4_SMARTREFLEX0_CLKCTRL AM4_CLKCTRL_INDEX(0x350) -#define AM4_SMARTREFLEX1_CLKCTRL AM4_CLKCTRL_INDEX(0x358) -#define AM4_CONTROL_CLKCTRL AM4_CLKCTRL_INDEX(0x360) -#define AM4_GPIO1_CLKCTRL AM4_CLKCTRL_INDEX(0x368) - -/* mpu clocks */ -#define AM4_MPU_CLKCTRL AM4_CLKCTRL_INDEX(0x20) - -/* gfx_l3 clocks */ -#define AM4_GFX_CLKCTRL AM4_CLKCTRL_INDEX(0x20) - -/* l4_rtc clocks */ -#define AM4_RTC_CLKCTRL AM4_CLKCTRL_INDEX(0x20) - -/* l4_per clocks */ -#define AM4_L3_MAIN_CLKCTRL AM4_CLKCTRL_INDEX(0x20) -#define AM4_AES_CLKCTRL AM4_CLKCTRL_INDEX(0x28) -#define AM4_DES_CLKCTRL AM4_CLKCTRL_INDEX(0x30) -#define AM4_L3_INSTR_CLKCTRL AM4_CLKCTRL_INDEX(0x40) -#define AM4_OCMCRAM_CLKCTRL AM4_CLKCTRL_INDEX(0x50) -#define AM4_SHAM_CLKCTRL AM4_CLKCTRL_INDEX(0x58) -#define AM4_VPFE0_CLKCTRL AM4_CLKCTRL_INDEX(0x68) -#define AM4_VPFE1_CLKCTRL AM4_CLKCTRL_INDEX(0x70) -#define AM4_TPCC_CLKCTRL AM4_CLKCTRL_INDEX(0x78) -#define AM4_TPTC0_CLKCTRL AM4_CLKCTRL_INDEX(0x80) -#define AM4_TPTC1_CLKCTRL AM4_CLKCTRL_INDEX(0x88) -#define AM4_TPTC2_CLKCTRL AM4_CLKCTRL_INDEX(0x90) -#define AM4_L4_HS_CLKCTRL AM4_CLKCTRL_INDEX(0xa0) -#define AM4_GPMC_CLKCTRL AM4_CLKCTRL_INDEX(0x220) -#define AM4_MCASP0_CLKCTRL AM4_CLKCTRL_INDEX(0x238) -#define AM4_MCASP1_CLKCTRL AM4_CLKCTRL_INDEX(0x240) -#define AM4_MMC3_CLKCTRL AM4_CLKCTRL_INDEX(0x248) -#define AM4_QSPI_CLKCTRL AM4_CLKCTRL_INDEX(0x258) -#define AM4_USB_OTG_SS0_CLKCTRL AM4_CLKCTRL_INDEX(0x260) -#define AM4_USB_OTG_SS1_CLKCTRL AM4_CLKCTRL_INDEX(0x268) -#define AM4_PRUSS_CLKCTRL AM4_CLKCTRL_INDEX(0x320) -#define AM4_L4_LS_CLKCTRL AM4_CLKCTRL_INDEX(0x420) -#define AM4_D_CAN0_CLKCTRL AM4_CLKCTRL_INDEX(0x428) -#define AM4_D_CAN1_CLKCTRL AM4_CLKCTRL_INDEX(0x430) -#define AM4_EPWMSS0_CLKCTRL AM4_CLKCTRL_INDEX(0x438) -#define AM4_EPWMSS1_CLKCTRL AM4_CLKCTRL_INDEX(0x440) -#define AM4_EPWMSS2_CLKCTRL AM4_CLKCTRL_INDEX(0x448) -#define AM4_EPWMSS3_CLKCTRL AM4_CLKCTRL_INDEX(0x450) -#define AM4_EPWMSS4_CLKCTRL AM4_CLKCTRL_INDEX(0x458) -#define AM4_EPWMSS5_CLKCTRL AM4_CLKCTRL_INDEX(0x460) -#define AM4_ELM_CLKCTRL AM4_CLKCTRL_INDEX(0x468) -#define AM4_GPIO2_CLKCTRL AM4_CLKCTRL_INDEX(0x478) -#define AM4_GPIO3_CLKCTRL AM4_CLKCTRL_INDEX(0x480) -#define AM4_GPIO4_CLKCTRL AM4_CLKCTRL_INDEX(0x488) -#define AM4_GPIO5_CLKCTRL AM4_CLKCTRL_INDEX(0x490) -#define AM4_GPIO6_CLKCTRL AM4_CLKCTRL_INDEX(0x498) -#define AM4_HDQ1W_CLKCTRL AM4_CLKCTRL_INDEX(0x4a0) -#define AM4_I2C2_CLKCTRL AM4_CLKCTRL_INDEX(0x4a8) -#define AM4_I2C3_CLKCTRL AM4_CLKCTRL_INDEX(0x4b0) -#define AM4_MAILBOX_CLKCTRL AM4_CLKCTRL_INDEX(0x4b8) -#define AM4_MMC1_CLKCTRL AM4_CLKCTRL_INDEX(0x4c0) -#define AM4_MMC2_CLKCTRL AM4_CLKCTRL_INDEX(0x4c8) -#define AM4_RNG_CLKCTRL AM4_CLKCTRL_INDEX(0x4e0) -#define AM4_SPI0_CLKCTRL AM4_CLKCTRL_INDEX(0x500) -#define AM4_SPI1_CLKCTRL AM4_CLKCTRL_INDEX(0x508) -#define AM4_SPI2_CLKCTRL AM4_CLKCTRL_INDEX(0x510) -#define AM4_SPI3_CLKCTRL AM4_CLKCTRL_INDEX(0x518) -#define AM4_SPI4_CLKCTRL AM4_CLKCTRL_INDEX(0x520) -#define AM4_SPINLOCK_CLKCTRL AM4_CLKCTRL_INDEX(0x528) -#define AM4_TIMER2_CLKCTRL AM4_CLKCTRL_INDEX(0x530) -#define AM4_TIMER3_CLKCTRL AM4_CLKCTRL_INDEX(0x538) -#define AM4_TIMER4_CLKCTRL AM4_CLKCTRL_INDEX(0x540) -#define AM4_TIMER5_CLKCTRL AM4_CLKCTRL_INDEX(0x548) -#define AM4_TIMER6_CLKCTRL AM4_CLKCTRL_INDEX(0x550) -#define AM4_TIMER7_CLKCTRL AM4_CLKCTRL_INDEX(0x558) -#define AM4_TIMER8_CLKCTRL AM4_CLKCTRL_INDEX(0x560) -#define AM4_TIMER9_CLKCTRL AM4_CLKCTRL_INDEX(0x568) -#define AM4_TIMER10_CLKCTRL AM4_CLKCTRL_INDEX(0x570) -#define AM4_TIMER11_CLKCTRL AM4_CLKCTRL_INDEX(0x578) -#define AM4_UART2_CLKCTRL AM4_CLKCTRL_INDEX(0x580) -#define AM4_UART3_CLKCTRL AM4_CLKCTRL_INDEX(0x588) -#define AM4_UART4_CLKCTRL AM4_CLKCTRL_INDEX(0x590) -#define AM4_UART5_CLKCTRL AM4_CLKCTRL_INDEX(0x598) -#define AM4_UART6_CLKCTRL AM4_CLKCTRL_INDEX(0x5a0) -#define AM4_OCP2SCP0_CLKCTRL AM4_CLKCTRL_INDEX(0x5b8) -#define AM4_OCP2SCP1_CLKCTRL AM4_CLKCTRL_INDEX(0x5c0) -#define AM4_EMIF_CLKCTRL AM4_CLKCTRL_INDEX(0x720) -#define AM4_DSS_CORE_CLKCTRL AM4_CLKCTRL_INDEX(0xa20) -#define AM4_CPGMAC0_CLKCTRL AM4_CLKCTRL_INDEX(0xb20) - -/* XXX: Compatibility part end. */ - /* l3s_tsc clocks */ #define AM4_L3S_TSC_CLKCTRL_OFFSET 0x120 #define AM4_L3S_TSC_CLKCTRL_INDEX(offset) ((offset) - AM4_L3S_TSC_CLKCTRL_OFFSET) -- cgit v1.2.3 From 579cdf58b7e50fa0650324667d240ecf45c7d12f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 3 Feb 2022 10:56:18 +0200 Subject: clk: ti: Drop legacy compatibility clocks for dra7 We no longer have users for the compatibility clocks and we can drop them. These are old duplicate clocks for what we using. Depends-on: 31aa7056bbec ("ARM: dts: Don't use legacy clock defines for dra7 clkctrl") Depends-on: 9206a3af4fc0 ("clk: ti: Move dra7 clock devices out of the legacy section") Cc: devicetree@vger.kernel.org Cc: Rob Herring Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20220203085618.16043-4-tony@atomide.com Acked-by: Rob Herring Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/dra7.h | 168 --------------------------------------- 1 file changed, 168 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/dra7.h b/include/dt-bindings/clock/dra7.h index 29ff6b895848..8a903c78c5a5 100644 --- a/include/dt-bindings/clock/dra7.h +++ b/include/dt-bindings/clock/dra7.h @@ -8,174 +8,6 @@ #define DRA7_CLKCTRL_OFFSET 0x20 #define DRA7_CLKCTRL_INDEX(offset) ((offset) - DRA7_CLKCTRL_OFFSET) -/* XXX: Compatibility part begin, remove this once compatibility support is no longer needed */ - -/* mpu clocks */ -#define DRA7_MPU_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) - -/* ipu clocks */ -#define _DRA7_IPU_CLKCTRL_OFFSET 0x40 -#define _DRA7_IPU_CLKCTRL_INDEX(offset) ((offset) - _DRA7_IPU_CLKCTRL_OFFSET) -#define DRA7_MCASP1_CLKCTRL _DRA7_IPU_CLKCTRL_INDEX(0x50) -#define DRA7_TIMER5_CLKCTRL _DRA7_IPU_CLKCTRL_INDEX(0x58) -#define DRA7_TIMER6_CLKCTRL _DRA7_IPU_CLKCTRL_INDEX(0x60) -#define DRA7_TIMER7_CLKCTRL _DRA7_IPU_CLKCTRL_INDEX(0x68) -#define DRA7_TIMER8_CLKCTRL _DRA7_IPU_CLKCTRL_INDEX(0x70) -#define DRA7_I2C5_CLKCTRL _DRA7_IPU_CLKCTRL_INDEX(0x78) -#define DRA7_UART6_CLKCTRL _DRA7_IPU_CLKCTRL_INDEX(0x80) - -/* rtc clocks */ -#define DRA7_RTC_CLKCTRL_OFFSET 0x40 -#define DRA7_RTC_CLKCTRL_INDEX(offset) ((offset) - DRA7_RTC_CLKCTRL_OFFSET) -#define DRA7_RTCSS_CLKCTRL DRA7_RTC_CLKCTRL_INDEX(0x44) - -/* vip clocks */ -#define DRA7_VIP1_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) -#define DRA7_VIP2_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) -#define DRA7_VIP3_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) - -/* vpe clocks */ -#define DRA7_VPE_CLKCTRL_OFFSET 0x60 -#define DRA7_VPE_CLKCTRL_INDEX(offset) ((offset) - DRA7_VPE_CLKCTRL_OFFSET) -#define DRA7_VPE_CLKCTRL DRA7_VPE_CLKCTRL_INDEX(0x64) - -/* coreaon clocks */ -#define DRA7_SMARTREFLEX_MPU_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) -#define DRA7_SMARTREFLEX_CORE_CLKCTRL DRA7_CLKCTRL_INDEX(0x38) - -/* l3main1 clocks */ -#define DRA7_L3_MAIN_1_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) -#define DRA7_GPMC_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) -#define DRA7_TPCC_CLKCTRL DRA7_CLKCTRL_INDEX(0x70) -#define DRA7_TPTC0_CLKCTRL DRA7_CLKCTRL_INDEX(0x78) -#define DRA7_TPTC1_CLKCTRL DRA7_CLKCTRL_INDEX(0x80) -#define DRA7_VCP1_CLKCTRL DRA7_CLKCTRL_INDEX(0x88) -#define DRA7_VCP2_CLKCTRL DRA7_CLKCTRL_INDEX(0x90) - -/* dma clocks */ -#define DRA7_DMA_SYSTEM_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) - -/* emif clocks */ -#define DRA7_DMM_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) - -/* atl clocks */ -#define DRA7_ATL_CLKCTRL_OFFSET 0x0 -#define DRA7_ATL_CLKCTRL_INDEX(offset) ((offset) - DRA7_ATL_CLKCTRL_OFFSET) -#define DRA7_ATL_CLKCTRL DRA7_ATL_CLKCTRL_INDEX(0x0) - -/* l4cfg clocks */ -#define DRA7_L4_CFG_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) -#define DRA7_SPINLOCK_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) -#define DRA7_MAILBOX1_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) -#define DRA7_MAILBOX2_CLKCTRL DRA7_CLKCTRL_INDEX(0x48) -#define DRA7_MAILBOX3_CLKCTRL DRA7_CLKCTRL_INDEX(0x50) -#define DRA7_MAILBOX4_CLKCTRL DRA7_CLKCTRL_INDEX(0x58) -#define DRA7_MAILBOX5_CLKCTRL DRA7_CLKCTRL_INDEX(0x60) -#define DRA7_MAILBOX6_CLKCTRL DRA7_CLKCTRL_INDEX(0x68) -#define DRA7_MAILBOX7_CLKCTRL DRA7_CLKCTRL_INDEX(0x70) -#define DRA7_MAILBOX8_CLKCTRL DRA7_CLKCTRL_INDEX(0x78) -#define DRA7_MAILBOX9_CLKCTRL DRA7_CLKCTRL_INDEX(0x80) -#define DRA7_MAILBOX10_CLKCTRL DRA7_CLKCTRL_INDEX(0x88) -#define DRA7_MAILBOX11_CLKCTRL DRA7_CLKCTRL_INDEX(0x90) -#define DRA7_MAILBOX12_CLKCTRL DRA7_CLKCTRL_INDEX(0x98) -#define DRA7_MAILBOX13_CLKCTRL DRA7_CLKCTRL_INDEX(0xa0) - -/* l3instr clocks */ -#define DRA7_L3_MAIN_2_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) -#define DRA7_L3_INSTR_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) - -/* dss clocks */ -#define DRA7_DSS_CORE_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) -#define DRA7_BB2D_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) - -/* l3init clocks */ -#define DRA7_MMC1_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) -#define DRA7_MMC2_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) -#define DRA7_USB_OTG_SS2_CLKCTRL DRA7_CLKCTRL_INDEX(0x40) -#define DRA7_USB_OTG_SS3_CLKCTRL DRA7_CLKCTRL_INDEX(0x48) -#define DRA7_USB_OTG_SS4_CLKCTRL DRA7_CLKCTRL_INDEX(0x50) -#define DRA7_SATA_CLKCTRL DRA7_CLKCTRL_INDEX(0x88) -#define DRA7_PCIE1_CLKCTRL DRA7_CLKCTRL_INDEX(0xb0) -#define DRA7_PCIE2_CLKCTRL DRA7_CLKCTRL_INDEX(0xb8) -#define DRA7_GMAC_CLKCTRL DRA7_CLKCTRL_INDEX(0xd0) -#define DRA7_OCP2SCP1_CLKCTRL DRA7_CLKCTRL_INDEX(0xe0) -#define DRA7_OCP2SCP3_CLKCTRL DRA7_CLKCTRL_INDEX(0xe8) -#define DRA7_USB_OTG_SS1_CLKCTRL DRA7_CLKCTRL_INDEX(0xf0) - -/* l4per clocks */ -#define _DRA7_L4PER_CLKCTRL_OFFSET 0x0 -#define _DRA7_L4PER_CLKCTRL_INDEX(offset) ((offset) - _DRA7_L4PER_CLKCTRL_OFFSET) -#define DRA7_L4_PER2_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0xc) -#define DRA7_L4_PER3_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x14) -#define DRA7_TIMER10_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x28) -#define DRA7_TIMER11_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x30) -#define DRA7_TIMER2_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x38) -#define DRA7_TIMER3_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x40) -#define DRA7_TIMER4_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x48) -#define DRA7_TIMER9_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x50) -#define DRA7_ELM_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x58) -#define DRA7_GPIO2_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x60) -#define DRA7_GPIO3_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x68) -#define DRA7_GPIO4_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x70) -#define DRA7_GPIO5_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x78) -#define DRA7_GPIO6_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x80) -#define DRA7_HDQ1W_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x88) -#define DRA7_EPWMSS1_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x90) -#define DRA7_EPWMSS2_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x98) -#define DRA7_I2C1_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0xa0) -#define DRA7_I2C2_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0xa8) -#define DRA7_I2C3_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0xb0) -#define DRA7_I2C4_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0xb8) -#define DRA7_L4_PER1_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0xc0) -#define DRA7_EPWMSS0_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0xc4) -#define DRA7_TIMER13_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0xc8) -#define DRA7_TIMER14_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0xd0) -#define DRA7_TIMER15_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0xd8) -#define DRA7_MCSPI1_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0xf0) -#define DRA7_MCSPI2_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0xf8) -#define DRA7_MCSPI3_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x100) -#define DRA7_MCSPI4_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x108) -#define DRA7_GPIO7_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x110) -#define DRA7_GPIO8_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x118) -#define DRA7_MMC3_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x120) -#define DRA7_MMC4_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x128) -#define DRA7_TIMER16_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x130) -#define DRA7_QSPI_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x138) -#define DRA7_UART1_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x140) -#define DRA7_UART2_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x148) -#define DRA7_UART3_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x150) -#define DRA7_UART4_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x158) -#define DRA7_MCASP2_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x160) -#define DRA7_MCASP3_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x168) -#define DRA7_UART5_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x170) -#define DRA7_MCASP5_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x178) -#define DRA7_MCASP8_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x190) -#define DRA7_MCASP4_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x198) -#define DRA7_AES1_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x1a0) -#define DRA7_AES2_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x1a8) -#define DRA7_DES_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x1b0) -#define DRA7_RNG_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x1c0) -#define DRA7_SHAM_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x1c8) -#define DRA7_UART7_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x1d0) -#define DRA7_UART8_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x1e0) -#define DRA7_UART9_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x1e8) -#define DRA7_DCAN2_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x1f0) -#define DRA7_MCASP6_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x204) -#define DRA7_MCASP7_CLKCTRL _DRA7_L4PER_CLKCTRL_INDEX(0x208) - -/* wkupaon clocks */ -#define DRA7_L4_WKUP_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) -#define DRA7_WD_TIMER2_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) -#define DRA7_GPIO1_CLKCTRL DRA7_CLKCTRL_INDEX(0x38) -#define DRA7_TIMER1_CLKCTRL DRA7_CLKCTRL_INDEX(0x40) -#define DRA7_TIMER12_CLKCTRL DRA7_CLKCTRL_INDEX(0x48) -#define DRA7_COUNTER_32K_CLKCTRL DRA7_CLKCTRL_INDEX(0x50) -#define DRA7_UART10_CLKCTRL DRA7_CLKCTRL_INDEX(0x80) -#define DRA7_DCAN1_CLKCTRL DRA7_CLKCTRL_INDEX(0x88) -#define DRA7_ADC_CLKCTRL DRA7_CLKCTRL_INDEX(0xa0) - -/* XXX: Compatibility part end. */ - /* mpu clocks */ #define DRA7_MPU_MPU_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) -- cgit v1.2.3 From 08f4c42abad1b93914a93f9042e2443593bd3137 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Sat, 12 Mar 2022 11:09:51 +0530 Subject: ext4: add new trace event in ext4_fc_cleanup This adds a new trace event in ext4_fc_cleanup() which is helpful in debugging some fast_commit issues. Signed-off-by: Ritesh Harjani Link: https://lore.kernel.org/r/794cdb1d5d3622f3f80d30c222ff6652ea68c375.1647057583.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 3d48fcb62987..20654aaf9495 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2863,6 +2863,32 @@ TRACE_EVENT(ext4_fc_track_range, __entry->end) ); +TRACE_EVENT(ext4_fc_cleanup, + TP_PROTO(journal_t *journal, int full, tid_t tid), + + TP_ARGS(journal, full, tid), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, j_fc_off) + __field(int, full) + __field(tid_t, tid) + ), + + TP_fast_assign( + struct super_block *sb = journal->j_private; + + __entry->dev = sb->s_dev; + __entry->j_fc_off = journal->j_fc_off; + __entry->full = full; + __entry->tid = tid; + ), + + TP_printk("dev %d,%d, j_fc_off %d, full %d, tid %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->j_fc_off, __entry->full, __entry->tid) + ); + TRACE_EVENT(ext4_update_sb, TP_PROTO(struct super_block *sb, ext4_fsblk_t fsblk, unsigned int flags), -- cgit v1.2.3 From 1d2e2440c5191da82b1191298909283a58f0ece8 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Sat, 12 Mar 2022 11:09:52 +0530 Subject: ext4: add transaction tid info in fc_track events This patch adds the transaction & inode tid info in trace events for callers of ext4_fc_track_template(). This is helpful in debugging race conditions where an inode could belong to two different transaction tids. It also fixes the checkpatch warnings which says use tabs instead of spaces. Signed-off-by: Ritesh Harjani Link: https://lore.kernel.org/r/c203c09dc11bb372803c430f621f25a4b8c2c8b4.1647057583.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 113 ++++++++++++++++++++++++++------------------ 1 file changed, 68 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 20654aaf9495..6bd90df07b5c 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2782,32 +2782,41 @@ TRACE_EVENT(ext4_fc_stats, DECLARE_EVENT_CLASS(ext4_fc_track_dentry, - TP_PROTO(struct inode *inode, struct dentry *dentry, int ret), + TP_PROTO(handle_t *handle, struct inode *inode, + struct dentry *dentry, int ret), - TP_ARGS(inode, dentry, ret), + TP_ARGS(handle, inode, dentry, ret), TP_STRUCT__entry( __field(dev_t, dev) + __field(tid_t, t_tid) __field(ino_t, i_ino) + __field(tid_t, i_sync_tid) __field(int, error) ), TP_fast_assign( + struct ext4_inode_info *ei = EXT4_I(inode); + __entry->dev = inode->i_sb->s_dev; + __entry->t_tid = handle->h_transaction->t_tid; __entry->i_ino = inode->i_ino; + __entry->i_sync_tid = ei->i_sync_tid; __entry->error = ret; ), - TP_printk("dev %d,%d, ino %lu, error %d", + TP_printk("dev %d,%d, t_tid %u, ino %lu, i_sync_tid %u, error %d", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->i_ino, __entry->error + __entry->t_tid, __entry->i_ino, __entry->i_sync_tid, + __entry->error ) ); #define DEFINE_EVENT_CLASS_DENTRY(__type) \ DEFINE_EVENT(ext4_fc_track_dentry, ext4_fc_track_##__type, \ - TP_PROTO(struct inode *inode, struct dentry *dentry, int ret), \ - TP_ARGS(inode, dentry, ret) \ + TP_PROTO(handle_t *handle, struct inode *inode, \ + struct dentry *dentry, int ret), \ + TP_ARGS(handle, inode, dentry, ret) \ ) DEFINE_EVENT_CLASS_DENTRY(create); @@ -2815,52 +2824,66 @@ DEFINE_EVENT_CLASS_DENTRY(link); DEFINE_EVENT_CLASS_DENTRY(unlink); TRACE_EVENT(ext4_fc_track_inode, - TP_PROTO(struct inode *inode, int ret), + TP_PROTO(handle_t *handle, struct inode *inode, int ret), - TP_ARGS(inode, ret), + TP_ARGS(handle, inode, ret), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(int, ino) - __field(int, error) - ), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(tid_t, t_tid) + __field(ino_t, i_ino) + __field(tid_t, i_sync_tid) + __field(int, error) + ), - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->error = ret; - ), + TP_fast_assign( + struct ext4_inode_info *ei = EXT4_I(inode); - TP_printk("dev %d:%d, inode %d, error %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, __entry->error) + __entry->dev = inode->i_sb->s_dev; + __entry->t_tid = handle->h_transaction->t_tid; + __entry->i_ino = inode->i_ino; + __entry->i_sync_tid = ei->i_sync_tid; + __entry->error = ret; + ), + + TP_printk("dev %d:%d, t_tid %u, inode %lu, i_sync_tid %u, error %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->t_tid, __entry->i_ino, __entry->i_sync_tid, + __entry->error) ); TRACE_EVENT(ext4_fc_track_range, - TP_PROTO(struct inode *inode, long start, long end, int ret), - - TP_ARGS(inode, start, end, ret), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(int, ino) - __field(long, start) - __field(long, end) - __field(int, error) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->start = start; - __entry->end = end; - __entry->error = ret; - ), - - TP_printk("dev %d:%d, inode %d, error %d, start %ld, end %ld", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, __entry->error, __entry->start, - __entry->end) + TP_PROTO(handle_t *handle, struct inode *inode, + long start, long end, int ret), + + TP_ARGS(handle, inode, start, end, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(tid_t, t_tid) + __field(ino_t, i_ino) + __field(tid_t, i_sync_tid) + __field(long, start) + __field(long, end) + __field(int, error) + ), + + TP_fast_assign( + struct ext4_inode_info *ei = EXT4_I(inode); + + __entry->dev = inode->i_sb->s_dev; + __entry->t_tid = handle->h_transaction->t_tid; + __entry->i_ino = inode->i_ino; + __entry->i_sync_tid = ei->i_sync_tid; + __entry->start = start; + __entry->end = end; + __entry->error = ret; + ), + + TP_printk("dev %d:%d, t_tid %u, inode %lu, i_sync_tid %u, error %d, start %ld, end %ld", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->t_tid, __entry->i_ino, __entry->i_sync_tid, + __entry->error, __entry->start, __entry->end) ); TRACE_EVENT(ext4_fc_cleanup, -- cgit v1.2.3 From 5641ace54471cb5c393e71f33232088602455c6b Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Sat, 12 Mar 2022 11:09:54 +0530 Subject: ext4: add commit tid info in ext4_fc_commit_start/stop trace events This adds commit_tid info in ext4_fc_commit_start/stop which is helpful in debugging fast_commit issues. For e.g. issues where due to jbd2 journal full commit, FC miss to commit updates to a file. Also improves TP_prink format string i.e. all ext4 and jbd2 trace events starts with "dev MAjOR,MINOR". Let's follow the same convention while we are still at it. Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/ebcd6b9ab5b718db30f90854497886801ce38c63.1647057583.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 6bd90df07b5c..aad97376e032 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2686,26 +2686,29 @@ TRACE_EVENT(ext4_fc_replay, ); TRACE_EVENT(ext4_fc_commit_start, - TP_PROTO(struct super_block *sb), + TP_PROTO(struct super_block *sb, tid_t commit_tid), - TP_ARGS(sb), + TP_ARGS(sb, commit_tid), TP_STRUCT__entry( __field(dev_t, dev) + __field(tid_t, tid) ), TP_fast_assign( __entry->dev = sb->s_dev; + __entry->tid = commit_tid; ), - TP_printk("fast_commit started on dev %d,%d", - MAJOR(__entry->dev), MINOR(__entry->dev)) + TP_printk("dev %d,%d tid %u", MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->tid) ); TRACE_EVENT(ext4_fc_commit_stop, - TP_PROTO(struct super_block *sb, int nblks, int reason), + TP_PROTO(struct super_block *sb, int nblks, int reason, + tid_t commit_tid), - TP_ARGS(sb, nblks, reason), + TP_ARGS(sb, nblks, reason, commit_tid), TP_STRUCT__entry( __field(dev_t, dev) @@ -2714,6 +2717,7 @@ TRACE_EVENT(ext4_fc_commit_stop, __field(int, num_fc) __field(int, num_fc_ineligible) __field(int, nblks_agg) + __field(tid_t, tid) ), TP_fast_assign( @@ -2724,12 +2728,13 @@ TRACE_EVENT(ext4_fc_commit_stop, __entry->num_fc_ineligible = EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits; __entry->nblks_agg = EXT4_SB(sb)->s_fc_stats.fc_numblks; + __entry->tid = commit_tid; ), - TP_printk("fc on [%d,%d] nblks %d, reason %d, fc = %d, ineligible = %d, agg_nblks %d", + TP_printk("dev %d,%d nblks %d, reason %d, fc = %d, ineligible = %d, agg_nblks %d, tid %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nblks, __entry->reason, __entry->num_fc, - __entry->num_fc_ineligible, __entry->nblks_agg) + __entry->num_fc_ineligible, __entry->nblks_agg, __entry->tid) ); #define FC_REASON_NAME_STAT(reason) \ -- cgit v1.2.3 From 163f11b8b3489fbf04be8d8d0e029f1dcd6546d1 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Sat, 12 Mar 2022 11:09:55 +0530 Subject: ext4: fix remaining two trace events to use same printk convention All ext4 & jbd2 trace events starts with "dev Major:Minor". While we are still improving/adding the ftrace events for FC, let's fix last two remaining trace events to follow the same convention. Signed-off-by: Ritesh Harjani Reviewed-by: Harshad Shirwadkar Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/8f33b163f0f29df2491c03b79f8ac96890ea5184.1647057583.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index aad97376e032..ce2b3ad0ee86 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2654,7 +2654,7 @@ TRACE_EVENT(ext4_fc_replay_scan, __entry->off = off; ), - TP_printk("FC scan pass on dev %d,%d: error %d, off %d", + TP_printk("dev %d,%d error %d, off %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->error, __entry->off) ); @@ -2680,7 +2680,7 @@ TRACE_EVENT(ext4_fc_replay, __entry->priv2 = priv2; ), - TP_printk("FC Replay %d,%d: tag %d, ino %d, data1 %d, data2 %d", + TP_printk("dev %d,%d: tag %d, ino %d, data1 %d, data2 %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tag, __entry->ino, __entry->priv1, __entry->priv2) ); -- cgit v1.2.3 From 0493692b40d02a74dfb6823754f31951ace2000b Mon Sep 17 00:00:00 2001 From: Zong Li Date: Fri, 4 Mar 2022 18:03:18 +0800 Subject: dt-bindings: change the macro name of prci in header files and example We currently change the macro name for fu540 and fu740 by adding the prefix respectively, the dt-bindings should be modified as well. Signed-off-by: Zong Li Acked-by: Rob Herring Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/f9284873c2993a9952d9fe4f8dd5e89f20daab75.1646388139.git.zong.li@sifive.com Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/sifive-fu540-prci.h | 8 ++++---- include/dt-bindings/clock/sifive-fu740-prci.h | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/sifive-fu540-prci.h b/include/dt-bindings/clock/sifive-fu540-prci.h index 3b21d0522c91..5af372e8385f 100644 --- a/include/dt-bindings/clock/sifive-fu540-prci.h +++ b/include/dt-bindings/clock/sifive-fu540-prci.h @@ -10,9 +10,9 @@ /* Clock indexes for use by Device Tree data and the PRCI driver */ -#define PRCI_CLK_COREPLL 0 -#define PRCI_CLK_DDRPLL 1 -#define PRCI_CLK_GEMGXLPLL 2 -#define PRCI_CLK_TLCLK 3 +#define FU540_PRCI_CLK_COREPLL 0 +#define FU540_PRCI_CLK_DDRPLL 1 +#define FU540_PRCI_CLK_GEMGXLPLL 2 +#define FU540_PRCI_CLK_TLCLK 3 #endif diff --git a/include/dt-bindings/clock/sifive-fu740-prci.h b/include/dt-bindings/clock/sifive-fu740-prci.h index 7899b7fee7db..672bdadbf6c0 100644 --- a/include/dt-bindings/clock/sifive-fu740-prci.h +++ b/include/dt-bindings/clock/sifive-fu740-prci.h @@ -11,14 +11,14 @@ /* Clock indexes for use by Device Tree data and the PRCI driver */ -#define PRCI_CLK_COREPLL 0 -#define PRCI_CLK_DDRPLL 1 -#define PRCI_CLK_GEMGXLPLL 2 -#define PRCI_CLK_DVFSCOREPLL 3 -#define PRCI_CLK_HFPCLKPLL 4 -#define PRCI_CLK_CLTXPLL 5 -#define PRCI_CLK_TLCLK 6 -#define PRCI_CLK_PCLK 7 -#define PRCI_CLK_PCIE_AUX 8 +#define FU740_PRCI_CLK_COREPLL 0 +#define FU740_PRCI_CLK_DDRPLL 1 +#define FU740_PRCI_CLK_GEMGXLPLL 2 +#define FU740_PRCI_CLK_DVFSCOREPLL 3 +#define FU740_PRCI_CLK_HFPCLKPLL 4 +#define FU740_PRCI_CLK_CLTXPLL 5 +#define FU740_PRCI_CLK_TLCLK 6 +#define FU740_PRCI_CLK_PCLK 7 +#define FU740_PRCI_CLK_PCIE_AUX 8 #endif /* __DT_BINDINGS_CLOCK_SIFIVE_FU740_PRCI_H */ -- cgit v1.2.3 From b0f212633b31ddca99c76aa38f812fe492e8410a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 11 Mar 2022 11:34:28 +0100 Subject: xen/grant-table: remove gnttab_*transfer*() functions All grant table operations related to the "transfer" functionality are unused currently. There have been users in the old days of the "Xen-o-Linux" kernel, but those didn't make it upstream. So remove the "transfer" related functions. Signed-off-by: Juergen Gross Link: https://lore.kernel.org/r/20220311103429.12845-2-jgross@suse.com Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- include/xen/grant_table.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index c9fea9389ebe..9f9b1a297f0d 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -125,11 +125,6 @@ void gnttab_end_foreign_access(grant_ref_t ref, int readonly, */ int gnttab_try_end_foreign_access(grant_ref_t ref); -int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); - -unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); -unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); - /* * operations on reserved batches of grant references */ @@ -162,9 +157,6 @@ static inline void gnttab_page_grant_foreign_access_ref_one( readonly); } -void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, - unsigned long pfn); - static inline void gnttab_set_map_op(struct gnttab_map_grant_ref *map, phys_addr_t addr, uint32_t flags, grant_ref_t ref, domid_t domid) -- cgit v1.2.3 From c94b731da21f10086a9e52d63c21c730e3f6c939 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 11 Mar 2022 11:34:29 +0100 Subject: xen/grant-table: remove readonly parameter from functions The gnttab_end_foreign_access() family of functions is taking a "readonly" parameter, which isn't used. Remove it from the function parameters. Signed-off-by: Juergen Gross Link: https://lore.kernel.org/r/20220311103429.12845-3-jgross@suse.com Reviewed-by: Jan Beulich Acked-by: Christian Schoenebeck Signed-off-by: Boris Ostrovsky --- include/xen/grant_table.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 9f9b1a297f0d..dfd5bf31cfb9 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -97,7 +97,7 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, * longer in use. Return 1 if the grant entry was freed, 0 if it is still in * use. */ -int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); +int gnttab_end_foreign_access_ref(grant_ref_t ref); /* * Eventually end access through the given grant reference, and once that @@ -114,8 +114,7 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); * gnttab_end_foreign_access() are done via alloc_pages_exact() (and freeing * via free_pages_exact()) in order to avoid high order pages. */ -void gnttab_end_foreign_access(grant_ref_t ref, int readonly, - unsigned long page); +void gnttab_end_foreign_access(grant_ref_t ref, unsigned long page); /* * End access through the given grant reference, iff the grant entry is -- cgit v1.2.3 From 40867d74c374b235e14d839f3a77f26684feefe5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 14 Mar 2022 14:45:51 -0600 Subject: net: Add l3mdev index to flow struct and avoid oif reset for port devices The fundamental premise of VRF and l3mdev core code is binding a socket to a device (l3mdev or netdev with an L3 domain) to indicate L3 scope. Legacy code resets flowi_oif to the l3mdev losing any original port device binding. Ben (among others) has demonstrated use cases where the original port device binding is important and needs to be retained. This patch handles that by adding a new entry to the common flow struct that can indicate the l3mdev index for later rule and table matching avoiding the need to reset flowi_oif. In addition to allowing more use cases that require port device binds, this patch brings a few datapath simplications: 1. l3mdev_fib_rule_match is only called when walking fib rules and always after l3mdev_update_flow. That allows an optimization to bail early for non-VRF type uses cases when flowi_l3mdev is not set. Also, only that index needs to be checked for the FIB table id. 2. l3mdev_update_flow can be called with flowi_oif set to a l3mdev (e.g., VRF) device. By resetting flowi_oif only for this case the FLOWI_FLAG_SKIP_NH_OIF flag is not longer needed and can be removed, removing several checks in the datapath. The flowi_iif path can be simplified to only be called if the it is not loopback (loopback can not be assigned to an L3 domain) and the l3mdev index is not already set. 3. Avoid another device lookup in the output path when the fib lookup returns a reject failure. Note: 2 functional tests for local traffic with reject fib rules are updated to reflect the new direct failure at FIB lookup time for ping rather than the failure on packet path. The current code fails like this: HINT: Fails since address on vrf device is out of device scope COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1 ping: Warning: source address might be selected on device other than: eth1 PING 172.16.3.1 (172.16.3.1) from 172.16.3.1 eth1: 56(84) bytes of data. --- 172.16.3.1 ping statistics --- 1 packets transmitted, 0 received, 100% packet loss, time 0ms where the test now directly fails: HINT: Fails since address on vrf device is out of device scope COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1 ping: connect: No route to host Signed-off-by: David Ahern Tested-by: Ben Greear Link: https://lore.kernel.org/r/20220314204551.16369-1-dsahern@kernel.org Signed-off-by: Jakub Kicinski --- include/net/flow.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index 58beb16a49b8..987bd511d652 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -29,6 +29,7 @@ struct flowi_tunnel { struct flowi_common { int flowic_oif; int flowic_iif; + int flowic_l3mdev; __u32 flowic_mark; __u8 flowic_tos; __u8 flowic_scope; @@ -36,7 +37,6 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 -#define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; kuid_t flowic_uid; struct flowi_tunnel flowic_tun_key; @@ -70,6 +70,7 @@ struct flowi4 { struct flowi_common __fl_common; #define flowi4_oif __fl_common.flowic_oif #define flowi4_iif __fl_common.flowic_iif +#define flowi4_l3mdev __fl_common.flowic_l3mdev #define flowi4_mark __fl_common.flowic_mark #define flowi4_tos __fl_common.flowic_tos #define flowi4_scope __fl_common.flowic_scope @@ -102,6 +103,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; + fl4->flowi4_l3mdev = 0; fl4->flowi4_mark = mark; fl4->flowi4_tos = tos; fl4->flowi4_scope = scope; @@ -132,6 +134,7 @@ struct flowi6 { struct flowi_common __fl_common; #define flowi6_oif __fl_common.flowic_oif #define flowi6_iif __fl_common.flowic_iif +#define flowi6_l3mdev __fl_common.flowic_l3mdev #define flowi6_mark __fl_common.flowic_mark #define flowi6_scope __fl_common.flowic_scope #define flowi6_proto __fl_common.flowic_proto @@ -177,6 +180,7 @@ struct flowi { } u; #define flowi_oif u.__fl_common.flowic_oif #define flowi_iif u.__fl_common.flowic_iif +#define flowi_l3mdev u.__fl_common.flowic_l3mdev #define flowi_mark u.__fl_common.flowic_mark #define flowi_tos u.__fl_common.flowic_tos #define flowi_scope u.__fl_common.flowic_scope -- cgit v1.2.3 From 0492d857636e1c52cd71594a723c4b26a7b31978 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 16 Mar 2022 11:19:43 +0100 Subject: netfilter: flowtable: Fix QinQ and pppoe support for inet table nf_flow_offload_inet_hook() does not check for 802.1q and PPPoE. Fetch inner ethertype from these encapsulation protocols. Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support") Fixes: 4cd91f7c290f ("netfilter: flowtable: add vlan support") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index bd59e950f4d6..64daafd1fc41 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -10,6 +10,8 @@ #include #include #include +#include +#include struct nf_flowtable; struct nf_flow_rule; @@ -317,4 +319,20 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, int nf_flow_table_offload_init(void); void nf_flow_table_offload_exit(void); +static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) +{ + __be16 proto; + + proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + + sizeof(struct pppoe_hdr))); + switch (proto) { + case htons(PPP_IP): + return htons(ETH_P_IP); + case htons(PPP_IPV6): + return htons(ETH_P_IPV6); + } + + return 0; +} + #endif /* _NF_FLOW_TABLE_H */ -- cgit v1.2.3 From 052ebf1fbb1cab86b145a68d80219c8c57321cbd Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Wed, 16 Mar 2022 02:52:04 -0700 Subject: io_uring: make tracing format consistent Make the tracing formatting for user_data and flags consistent. Having consistent formatting allows one for example to grep for a specific user_data/flags and be able to trace a single sqe through easily. Change user_data to 0x%llx and flags to 0x%x everywhere. The '0x' is useful to disambiguate for example "user_data 100". Additionally remove the '=' for flags in io_uring_req_failed, again for consistency. Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220316095204.2191498-1-dylany@fb.com Signed-off-by: Jens Axboe --- include/trace/events/io_uring.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 18d4341c581c..cddf5b6fbeb4 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -44,7 +44,7 @@ TRACE_EVENT(io_uring_create, __entry->flags = flags; ), - TP_printk("ring %p, fd %d sq size %d, cq size %d, flags %d", + TP_printk("ring %p, fd %d sq size %d, cq size %d, flags 0x%x", __entry->ctx, __entry->fd, __entry->sq_entries, __entry->cq_entries, __entry->flags) ); @@ -125,7 +125,7 @@ TRACE_EVENT(io_uring_file_get, __entry->fd = fd; ), - TP_printk("ring %p, req %p, user_data %llu, fd %d", + TP_printk("ring %p, req %p, user_data 0x%llx, fd %d", __entry->ctx, __entry->req, __entry->user_data, __entry->fd) ); @@ -169,7 +169,7 @@ TRACE_EVENT(io_uring_queue_async_work, __entry->rw = rw; ), - TP_printk("ring %p, request %p, user_data %llu, opcode %d, flags %d, %s queue, work %p", + TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p", __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work) ); @@ -205,7 +205,7 @@ TRACE_EVENT(io_uring_defer, __entry->opcode = opcode; ), - TP_printk("ring %p, request %p, user_data %llu, opcode %d", + TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d", __entry->ctx, __entry->req, __entry->data, __entry->opcode) ); @@ -305,7 +305,7 @@ TRACE_EVENT(io_uring_fail_link, __entry->link = link; ), - TP_printk("ring %p, request %p, user_data %llu, opcode %d, link %p", + TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p", __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->link) ); @@ -342,9 +342,9 @@ TRACE_EVENT(io_uring_complete, __entry->cflags = cflags; ), - TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags %x", + TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x", __entry->ctx, __entry->req, - (unsigned long long)__entry->user_data, + __entry->user_data, __entry->res, __entry->cflags) ); @@ -389,7 +389,7 @@ TRACE_EVENT(io_uring_submit_sqe, __entry->sq_thread = sq_thread; ), - TP_printk("ring %p, req %p, user_data %llu, opcode %d, flags %u, " + TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, " "non block %d, sq_thread %d", __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->flags, __entry->force_nonblock, __entry->sq_thread) @@ -433,7 +433,7 @@ TRACE_EVENT(io_uring_poll_arm, __entry->events = events; ), - TP_printk("ring %p, req %p, user_data %llu, opcode %d, mask 0x%x, events 0x%x", + TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x", __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->mask, __entry->events) ); @@ -470,7 +470,7 @@ TRACE_EVENT(io_uring_task_add, __entry->mask = mask; ), - TP_printk("ring %p, req %p, user_data %llu, opcode %d, mask %x", + TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x", __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->mask) ); @@ -529,8 +529,8 @@ TRACE_EVENT(io_uring_req_failed, __entry->error = error; ), - TP_printk("ring %p, req %p, user_data %llu, " - "op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, " + TP_printk("ring %p, req %p, user_data 0x%llx, " + "op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, " "len=%u, rw_flags=0x%x, buf_index=%d, " "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d", __entry->ctx, __entry->req, __entry->user_data, -- cgit v1.2.3 From e621900ad28b748e058b81d6078a5d5eb37b3973 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:22:12 +0000 Subject: fs: Convert __set_page_dirty_buffers to block_dirty_folio Convert all callers; mostly this is just changing the aops to point at it, but a few implementations need a little more work. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/buffer_head.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 9ee9d003d736..bcb4fe9b8575 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -397,7 +397,7 @@ __bread(struct block_device *bdev, sector_t block, unsigned size) return __bread_gfp(bdev, block, size, __GFP_MOVABLE); } -extern int __set_page_dirty_buffers(struct page *page); +bool block_dirty_folio(struct address_space *mapping, struct folio *folio); #else /* CONFIG_BLOCK */ -- cgit v1.2.3 From 46de8b979492e1377947700ecb1e3169088668b2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:22:13 +0000 Subject: fs: Convert __set_page_dirty_no_writeback to noop_dirty_folio This is a mechanical change. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/pagemap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 6a9617e9c6bc..ab85d2856a37 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -919,7 +919,7 @@ static inline int __must_check write_one_page(struct page *page) } int __set_page_dirty_nobuffers(struct page *page); -int __set_page_dirty_no_writeback(struct page *page); +bool noop_dirty_folio(struct address_space *mapping, struct folio *folio); void page_endio(struct page *page, bool is_write, int err); -- cgit v1.2.3 From 3a3bae50af5d73fab5da20484029de77ca67bb2e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:22:15 +0000 Subject: fs: Remove aops ->set_page_dirty With all implementations converted to ->dirty_folio, we can stop calling this fallback method and remove it entirely. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- include/linux/fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index c3d5db8851ae..b472d78f00b0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -367,8 +367,7 @@ struct address_space_operations { /* Write back some dirty pages from this mapping. */ int (*writepages)(struct address_space *, struct writeback_control *); - /* Set a page dirty. Return true if this dirtied it */ - int (*set_page_dirty)(struct page *page); + /* Mark a folio dirty. Return true if this dirtied it */ bool (*dirty_folio)(struct address_space *, struct folio *); /* -- cgit v1.2.3 From f6c46b1d62f8ffbf2cf6eb43ab0d277bd3f7e948 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 11 Mar 2022 19:20:17 +0000 Subject: PM: hibernate: Honour ACPI hardware signature by default for virtual guests The ACPI specification says that OSPM should refuse to restore from hibernate if the hardware signature changes, and should boot from scratch. However, real BIOSes often vary the hardware signature in cases where we *do* want to resume from hibernate, so Linux doesn't follow the spec by default. However, in a virtual environment there's no reason for the VMM to vary the hardware signature *unless* it wants to trigger a clean reboot as defined by the ACPI spec. So enable the check by default if a hypervisor is detected. Signed-off-by: David Woodhouse Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6274758648e3..766dbcb82df1 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -526,7 +526,7 @@ acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, int acpi_resources_are_enforced(void); #ifdef CONFIG_HIBERNATION -void __init acpi_check_s4_hw_signature(int check); +extern int acpi_check_s4_hw_signature; #endif #ifdef CONFIG_PM_SLEEP -- cgit v1.2.3 From 2cb7b4890d6e7f20560dc251e7f8d3cc68b0d554 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Mar 2022 23:00:04 -0700 Subject: devlink: expose instance locking and add locked port registering It should be familiar and beneficial to expose devlink instance lock to the drivers. This way drivers can block devlink from calling them during critical sections without breakneck locking. Add port helpers, port splitting callbacks will be the first target. Use 'devl_' prefix for "explicitly locked" API. Initial RFC used '__devlink' but that's too much typing. devl_lock_is_held() is not defined without lockdep, which is the same behavior as lockdep_is_held() itself. Reviewed-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 8d5349d2fb68..9de0d091aee9 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1479,6 +1479,17 @@ void *devlink_priv(struct devlink *devlink); struct devlink *priv_to_devlink(void *priv); struct device *devlink_to_dev(const struct devlink *devlink); +/* Devlink instance explicit locking */ +void devl_lock(struct devlink *devlink); +void devl_unlock(struct devlink *devlink); +void devl_assert_locked(struct devlink *devlink); +bool devl_lock_is_held(struct devlink *devlink); + +int devl_port_register(struct devlink *devlink, + struct devlink_port *devlink_port, + unsigned int port_index); +void devl_port_unregister(struct devlink_port *devlink_port); + struct ib_device; struct net *devlink_net(const struct devlink *devlink); -- cgit v1.2.3 From 706217c1ceb516c96283a1557a31fe003d0c8052 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Mar 2022 23:00:09 -0700 Subject: devlink: pass devlink_port to port_split / port_unsplit callbacks Now that devlink ports are protected by the instance lock it seems natural to pass devlink_port as an argument to the port_split / port_unsplit callbacks. This should save the drivers from doing a lookup. In theory drivers may have supported unsplitting ports which were not registered prior to this change. Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 9de0d091aee9..fd89a17adea1 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1197,9 +1197,9 @@ struct devlink_ops { struct netlink_ext_ack *extack); int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); - int (*port_split)(struct devlink *devlink, unsigned int port_index, + int (*port_split)(struct devlink *devlink, struct devlink_port *port, unsigned int count, struct netlink_ext_ack *extack); - int (*port_unsplit)(struct devlink *devlink, unsigned int port_index, + int (*port_unsplit)(struct devlink *devlink, struct devlink_port *port, struct netlink_ext_ack *extack); int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index, u16 pool_index, -- cgit v1.2.3 From d2a3b7c5becc3992f8e7d2b9bf5eacceeedb9a48 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 9 Mar 2022 20:33:20 +0800 Subject: bpf: Fix net.core.bpf_jit_harden race It is the bpf_jit_harden counterpart to commit 60b58afc96c9 ("bpf: fix net.core.bpf_jit_enable race"). bpf_jit_harden will be tested twice for each subprog if there are subprogs in bpf program and constant blinding may increase the length of program, so when running "./test_progs -t subprogs" and toggling bpf_jit_harden between 0 and 2, jit_subprogs may fail because constant blinding increases the length of subprog instructions during extra passs. So cache the value of bpf_jit_blinding_enabled() during program allocation, and use the cached value during constant blinding, subprog JITing and args tracking of tail call. Signed-off-by: Hou Tao Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220309123321.2400262-4-houtao1@huawei.com --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 05ed9bd31b45..ed0c0ff42ad5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -566,6 +566,7 @@ struct bpf_prog { gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ dst_needed:1, /* Do we need dst entry? */ + blinding_requested:1, /* needs constant blinding */ blinded:1, /* Was blinded */ is_func:1, /* program is a bpf function */ kprobe_override:1, /* Do we override a kprobe? */ -- cgit v1.2.3 From 4ee06de7729d795773145692e246a06448b1eb7a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 15 Mar 2022 10:20:08 +0100 Subject: net: handle ARPHRD_PIMREG in dev_is_mac_header_xmit() This kind of interface doesn't have a mac header. This patch fixes bpf_redirect() to a PIM interface. Fixes: 27b29f63058d ("bpf: add bpf_redirect() helper") Signed-off-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20220315092008.31423-1-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- include/linux/if_arp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index b712217f7030..1ed52441972f 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -52,6 +52,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) case ARPHRD_VOID: case ARPHRD_NONE: case ARPHRD_RAWIP: + case ARPHRD_PIMREG: return false; default: return true; -- cgit v1.2.3 From ab95465cde2337108252cdf01f064abdc1a67f6c Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Tue, 15 Mar 2022 13:02:09 +0200 Subject: net/sched: add vlan push_eth and pop_eth action to the hardware IR Add vlan push_eth and pop_eth action to the hardware intermediate representation model which would subsequently allow it to be used by drivers for offload. Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Jakub Kicinski --- include/net/flow_offload.h | 6 ++++++ include/net/tc_act/tc_vlan.h | 10 ++++++++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 92267d23083e..021778a7e1af 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -150,6 +150,8 @@ enum flow_action_id { FLOW_ACTION_PPPOE_PUSH, FLOW_ACTION_JUMP, FLOW_ACTION_PIPE, + FLOW_ACTION_VLAN_PUSH_ETH, + FLOW_ACTION_VLAN_POP_ETH, NUM_FLOW_ACTIONS, }; @@ -211,6 +213,10 @@ struct flow_action_entry { __be16 proto; u8 prio; } vlan; + struct { /* FLOW_ACTION_VLAN_PUSH_ETH */ + unsigned char dst[ETH_ALEN]; + unsigned char src[ETH_ALEN]; + } vlan_push_eth; struct { /* FLOW_ACTION_MANGLE */ /* FLOW_ACTION_ADD */ enum flow_action_mangle_base htype; diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index f94b8bc26f9e..a97600f742de 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -78,4 +78,14 @@ static inline u8 tcf_vlan_push_prio(const struct tc_action *a) return tcfv_push_prio; } + +static inline void tcf_vlan_push_eth(unsigned char *src, unsigned char *dest, + const struct tc_action *a) +{ + rcu_read_lock(); + memcpy(dest, rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_dst, ETH_ALEN); + memcpy(dest, rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_src, ETH_ALEN); + rcu_read_unlock(); +} + #endif /* __NET_TC_VLAN_H */ -- cgit v1.2.3 From 435fe1c0c1f74b682dba85641406abf4337aade6 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Wed, 16 Mar 2022 08:15:57 +0200 Subject: net: geneve: support IPv4/IPv6 as inner protocol This patch adds support for encapsulating IPv4/IPv6 within GENEVE. In order to use this, a new IFLA_GENEVE_INNER_PROTO_INHERIT flag needs to be provided at device creation. This property cannot be changed for the time being. In case IP traffic is received on a non-tun device the drop count is increased. Signed-off-by: Eyal Birger Link: https://lore.kernel.org/r/20220316061557.431872-1-eyal.birger@gmail.com Signed-off-by: Paolo Abeni --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index bd24c7dc10a2..cc284c048e69 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -842,6 +842,7 @@ enum { IFLA_GENEVE_LABEL, IFLA_GENEVE_TTL_INHERIT, IFLA_GENEVE_DF, + IFLA_GENEVE_INNER_PROTO_INHERIT, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) -- cgit v1.2.3 From c42fa24b44751c62c86e98430ef915c0609a2ab8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 16 Mar 2022 13:39:03 +0100 Subject: ACPI: bus: Avoid using CPPC if not supported by firmware If the platform firmware indicates that it does not support CPPC by clearing the OSC_SB_CPC_SUPPORT and OSC_SB_CPCV2_SUPPORT bits in the platform _OSC capabilities mask, avoid attempting to evaluate _CPC which may fail in that case. Because the OSC_SB_CPC_SUPPORT and OSC_SB_CPCV2_SUPPORT bits are only added to the supported platform capabilities mask on x86, when X86_FEATURE_HWP is supported, allow _CPC to be evaluated regardless in the other cases. Link: https://lore.kernel.org/linux-acpi/CAJZ5v0i=ecAksq0TV+iLVObm-=fUfdqPABzzkgm9K6KxO1ZCcg@mail.gmail.com Signed-off-by: Rafael J. Wysocki Tested-by: Mario Limonciello Acked-by: Huang Rui Reviewed-by: Mika Westerberg --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6274758648e3..690b0533104a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -580,6 +580,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); extern bool osc_sb_apei_support_acked; extern bool osc_pc_lpi_support_confirmed; extern bool osc_sb_native_usb4_support_confirmed; +extern bool osc_sb_cppc_not_supported; /* USB4 Capabilities */ #define OSC_USB_USB3_TUNNELING 0x00000001 -- cgit v1.2.3 From 20e1d6402a71dba7ad2b81f332a3c14c7d3b939b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 17 Mar 2022 09:14:42 -0500 Subject: ACPI / x86: Add support for LPS0 callback handler Currenty the latest thing run during a suspend to idle attempt is the LPS0 `prepare_late` callback and the earliest thing is the `resume_early` callback. There is a desire for the `amd-pmc` driver to suspend later in the suspend process (ideally the very last thing), so create a callback that it or any other driver can hook into to do this. Signed-off-by: Mario Limonciello Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20220317141445.6498-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/acpi.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6274758648e3..47c16cdc8e0e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1023,7 +1023,15 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b); - +#ifdef CONFIG_X86 +struct acpi_s2idle_dev_ops { + struct list_head list_node; + void (*prepare)(void); + void (*restore)(void); +}; +int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg); +void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg); +#endif /* CONFIG_X86 */ #ifndef CONFIG_IA64 void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else -- cgit v1.2.3 From 4206fe40b2c01fb5988980cff6ea958b16578026 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 27 Apr 2021 16:30:32 +0300 Subject: net/mlx5: Remove unused exported contiguous coherent buffer allocation API All WQ types moved to using the fragmented allocation API for coherent memory. Contiguous API is not used anymore. Remove it, reduce the number of exported functions. Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 00a914b0716e..a386aec1eb65 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1009,9 +1009,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); -int mlx5_buf_alloc(struct mlx5_core_dev *dev, - int size, struct mlx5_frag_buf *buf); -void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node); void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); -- cgit v1.2.3 From 770c9a3a01af178a90368a78c75eb91707c7233c Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 20 May 2021 15:34:57 +0300 Subject: net/mlx5: Remove unused fill page array API function mlx5_fill_page_array API function is not used. Remove it, reduce the number of exported functions. Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a386aec1eb65..96cd740d94a3 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1036,7 +1036,6 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); void mlx5_unregister_debugfs(void); -void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn); -- cgit v1.2.3 From 5142239a22219921a7863cf00c9ab853c00689d8 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 11 Mar 2022 10:14:18 +0100 Subject: net: veth: Account total xdp_frame len running ndo_xdp_xmit Even if this is a theoretical issue since it is not possible to perform XDP_REDIRECT on a non-linear xdp_frame, veth driver does not account paged area in ndo_xdp_xmit function pointer. Introduce xdp_get_frame_len utility routine to get the xdp_frame full length and account total frame size running XDP_REDIRECT of a non-linear xdp frame into a veth device. Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/54f9fd3bb65d190daf2c0bbae2f852ff16cfbaa0.1646989407.git.lorenzo@kernel.org --- include/net/xdp.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index b7721c3e4d1f..04c852c7a77f 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -343,6 +343,20 @@ out: __xdp_release_frame(xdpf->data, mem); } +static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf) +{ + struct skb_shared_info *sinfo; + unsigned int len = xdpf->len; + + if (likely(!xdp_frame_has_frags(xdpf))) + goto out; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + len += sinfo->xdp_frags_size; +out: + return len; +} + int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, struct net_device *dev, u32 queue_index, unsigned int napi_id, u32 frag_size); -- cgit v1.2.3 From ec7328b59176227216c461601c6bd0e922232a9b Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:43 +0100 Subject: net: bridge: mst: Multiple Spanning Tree (MST) mode Allow the user to switch from the current per-VLAN STP mode to an MST mode. Up to this point, per-VLAN STP states where always isolated from each other. This is in contrast to the MSTP standard (802.1Q-2018, Clause 13.5), where VLANs are grouped into MST instances (MSTIs), and the state is managed on a per-MSTI level, rather that at the per-VLAN level. Perhaps due to the prevalence of the standard, many switching ASICs are built after the same model. Therefore, add a corresponding MST mode to the bridge, which we can later add offloading support for in a straight-forward way. For now, all VLANs are fixed to MSTI 0, also called the Common Spanning Tree (CST). That is, all VLANs will follow the port-global state. Upcoming changes will make this actually useful by allowing VLANs to be mapped to arbitrary MSTIs and allow individual MSTI states to be changed. Signed-off-by: Tobias Waldekranz Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 2711c3522010..30a242195ced 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -759,6 +759,7 @@ struct br_mcast_stats { enum br_boolopt_id { BR_BOOLOPT_NO_LL_LEARN, BR_BOOLOPT_MCAST_VLAN_SNOOPING, + BR_BOOLOPT_MST_ENABLE, BR_BOOLOPT_MAX }; -- cgit v1.2.3 From 8c678d60562f3e5f6d0a5f5465e27930ffedb8ca Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:44 +0100 Subject: net: bridge: mst: Allow changing a VLAN's MSTI Allow a VLAN to move out of the CST (MSTI 0), to an independent tree. The user manages the VID to MSTI mappings via a global VLAN setting. The proposed iproute2 interface would be: bridge vlan global set dev br0 vid msti Changing the state in non-zero MSTIs is still not supported, but will be addressed in upcoming changes. Signed-off-by: Tobias Waldekranz Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 30a242195ced..f60244b747ae 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -564,6 +564,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE, + BRIDGE_VLANDB_GOPTS_MSTI, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 122c29486e1ff78033c45d0d31c710e7dc8945a5 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:45 +0100 Subject: net: bridge: mst: Support setting and reporting MST port states Make it possible to change the port state in a given MSTI by extending the bridge port netlink interface (RTM_SETLINK on PF_BRIDGE).The proposed iproute2 interface would be: bridge mst set dev msti state Current states in all applicable MSTIs can also be dumped via a corresponding RTM_GETLINK. The proposed iproute interface looks like this: $ bridge mst port msti vb1 0 state forwarding 100 state disabled vb2 0 state forwarding 100 state forwarding The preexisting per-VLAN states are still valid in the MST mode (although they are read-only), and can be queried as usual if one is interested in knowing a particular VLAN's state without having to care about the VID to MSTI mapping (in this example VLAN 20 and 30 are bound to MSTI 100): $ bridge -d vlan port vlan-id vb1 10 state forwarding mcast_router 1 20 state disabled mcast_router 1 30 state disabled mcast_router 1 40 state forwarding mcast_router 1 vb2 10 state forwarding mcast_router 1 20 state forwarding mcast_router 1 30 state forwarding mcast_router 1 40 state forwarding mcast_router 1 Signed-off-by: Tobias Waldekranz Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 16 ++++++++++++++++ include/uapi/linux/rtnetlink.h | 1 + 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index f60244b747ae..a86a7e7b811f 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -122,6 +122,7 @@ enum { IFLA_BRIDGE_VLAN_TUNNEL_INFO, IFLA_BRIDGE_MRP, IFLA_BRIDGE_CFM, + IFLA_BRIDGE_MST, __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) @@ -453,6 +454,21 @@ enum { #define IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX (__IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX - 1) +enum { + IFLA_BRIDGE_MST_UNSPEC, + IFLA_BRIDGE_MST_ENTRY, + __IFLA_BRIDGE_MST_MAX, +}; +#define IFLA_BRIDGE_MST_MAX (__IFLA_BRIDGE_MST_MAX - 1) + +enum { + IFLA_BRIDGE_MST_ENTRY_UNSPEC, + IFLA_BRIDGE_MST_ENTRY_MSTI, + IFLA_BRIDGE_MST_ENTRY_STATE, + __IFLA_BRIDGE_MST_ENTRY_MAX, +}; +#define IFLA_BRIDGE_MST_ENTRY_MAX (__IFLA_BRIDGE_MST_ENTRY_MAX - 1) + struct bridge_stp_xstats { __u64 transition_blk; __u64 transition_fwd; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 51530aade46e..83849a37db5b 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -817,6 +817,7 @@ enum { #define RTEXT_FILTER_MRP (1 << 4) #define RTEXT_FILTER_CFM_CONFIG (1 << 5) #define RTEXT_FILTER_CFM_STATUS (1 << 6) +#define RTEXT_FILTER_MST (1 << 7) /* End of information exported to user level */ -- cgit v1.2.3 From 87c167bb94ee3fd49569d4aa2038b9b8840d906f Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:46 +0100 Subject: net: bridge: mst: Notify switchdev drivers of MST mode changes Trigger a switchdev event whenever the bridge's MST mode is enabled/disabled. This allows constituent ports to either perform any required hardware config, or refuse the change if it not supported. Signed-off-by: Tobias Waldekranz Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/net/switchdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 3e424d40fae3..85dd004dc9ad 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -27,6 +27,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL, SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, + SWITCHDEV_ATTR_ID_BRIDGE_MST, SWITCHDEV_ATTR_ID_MRP_PORT_ROLE, }; @@ -48,6 +49,7 @@ struct switchdev_attr { clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ u16 vlan_protocol; /* BRIDGE_VLAN_PROTOCOL */ + bool mst; /* BRIDGE_MST */ bool mc_disabled; /* MC_DISABLED */ u8 mrp_port_role; /* MRP_PORT_ROLE */ } u; -- cgit v1.2.3 From 6284c723d9b9995cc27ab3c6368a9d95d67111ff Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:47 +0100 Subject: net: bridge: mst: Notify switchdev drivers of VLAN MSTI migrations Whenever a VLAN moves to a new MSTI, send a switchdev notification so that switchdevs can track a bridge's VID to MSTI mappings. Signed-off-by: Tobias Waldekranz Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/net/switchdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 85dd004dc9ad..53dfa0f7cf5b 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -29,6 +29,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, SWITCHDEV_ATTR_ID_BRIDGE_MST, SWITCHDEV_ATTR_ID_MRP_PORT_ROLE, + SWITCHDEV_ATTR_ID_VLAN_MSTI, }; struct switchdev_brport_flags { @@ -36,6 +37,11 @@ struct switchdev_brport_flags { unsigned long mask; }; +struct switchdev_vlan_msti { + u16 vid; + u16 msti; +}; + struct switchdev_attr { struct net_device *orig_dev; enum switchdev_attr_id id; @@ -52,6 +58,7 @@ struct switchdev_attr { bool mst; /* BRIDGE_MST */ bool mc_disabled; /* MC_DISABLED */ u8 mrp_port_role; /* MRP_PORT_ROLE */ + struct switchdev_vlan_msti vlan_msti; /* VLAN_MSTI */ } u; }; -- cgit v1.2.3 From 7ae9147f4312903b97eae231c48571bbd95dc63f Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:48 +0100 Subject: net: bridge: mst: Notify switchdev drivers of MST state changes Generate a switchdev notification whenever an MST state changes. This notification is keyed by the VLANs MSTI rather than the VID, since multiple VLANs may share the same MST instance. Signed-off-by: Tobias Waldekranz Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/net/switchdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 53dfa0f7cf5b..aa0171d5786d 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -19,6 +19,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_UNDEFINED, SWITCHDEV_ATTR_ID_PORT_STP_STATE, + SWITCHDEV_ATTR_ID_PORT_MST_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_PORT_MROUTER, @@ -32,6 +33,11 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_VLAN_MSTI, }; +struct switchdev_mst_state { + u16 msti; + u8 state; +}; + struct switchdev_brport_flags { unsigned long val; unsigned long mask; @@ -50,6 +56,7 @@ struct switchdev_attr { void (*complete)(struct net_device *dev, int err, void *priv); union { u8 stp_state; /* PORT_STP_STATE */ + struct switchdev_mst_state mst_state; /* PORT_MST_STATE */ struct switchdev_brport_flags brport_flags; /* PORT_BRIDGE_FLAGS */ bool mrouter; /* PORT_MROUTER */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ -- cgit v1.2.3 From cceac97afa090284b3ceecd93ea6b7b527116767 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:49 +0100 Subject: net: bridge: mst: Add helper to map an MSTI to a VID set br_mst_get_info answers the question: "On this bridge, which VIDs are mapped to the given MSTI?" This is useful in switchdev drivers, which might have to fan-out operations, relating to an MSTI, per VLAN. An example: When a port's MST state changes from forwarding to blocking, a driver may choose to flush the dynamic FDB entries on that port to get faster reconvergence of the network, but this should only be done in the VLANs that are managed by the MSTI in question. Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/linux/if_bridge.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 3aae023a9353..1cf0cc46d90d 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -119,6 +119,7 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo); int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo); +int br_mst_get_info(const struct net_device *dev, u16 msti, unsigned long *vids); #else static inline bool br_vlan_enabled(const struct net_device *dev) { @@ -151,6 +152,12 @@ static inline int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, { return -EINVAL; } + +static inline int br_mst_get_info(const struct net_device *dev, u16 msti, + unsigned long *vids) +{ + return -EINVAL; +} #endif #if IS_ENABLED(CONFIG_BRIDGE) -- cgit v1.2.3 From 48d57b2e5f439246c4e12ed7705a5e3241294b03 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:50 +0100 Subject: net: bridge: mst: Add helper to check if MST is enabled This is useful for switchdev drivers that might want to refuse to join a bridge where MST is enabled, if the hardware can't support it. Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/linux/if_bridge.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 1cf0cc46d90d..4efd5540279a 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -119,6 +119,7 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo); int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo); +bool br_mst_enabled(const struct net_device *dev); int br_mst_get_info(const struct net_device *dev, u16 msti, unsigned long *vids); #else static inline bool br_vlan_enabled(const struct net_device *dev) @@ -153,6 +154,11 @@ static inline int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, return -EINVAL; } +static inline bool br_mst_enabled(const struct net_device *dev) +{ + return false; +} + static inline int br_mst_get_info(const struct net_device *dev, u16 msti, unsigned long *vids) { -- cgit v1.2.3 From f54fd0e163068ced4d0d27db64cd3cbda95b74e4 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:51 +0100 Subject: net: bridge: mst: Add helper to query a port's MST state This is useful for switchdev drivers who are offloading MST states into hardware. As an example, a driver may wish to flush the FDB for a port when it transitions from forwarding to blocking - which means that the previous state must be discoverable. Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/linux/if_bridge.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 4efd5540279a..d62ef428e3aa 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -121,6 +121,7 @@ int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo); bool br_mst_enabled(const struct net_device *dev); int br_mst_get_info(const struct net_device *dev, u16 msti, unsigned long *vids); +int br_mst_get_state(const struct net_device *dev, u16 msti, u8 *state); #else static inline bool br_vlan_enabled(const struct net_device *dev) { @@ -164,6 +165,11 @@ static inline int br_mst_get_info(const struct net_device *dev, u16 msti, { return -EINVAL; } +static inline int br_mst_get_state(const struct net_device *dev, u16 msti, + u8 *state) +{ + return -EINVAL; +} #endif #if IS_ENABLED(CONFIG_BRIDGE) -- cgit v1.2.3 From 8e6598a7b0fa834a563392d30017eac1b0102fcd Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:53 +0100 Subject: net: dsa: Pass VLAN MSTI migration notifications to driver Add the usual trampoline functionality from the generic DSA layer down to the drivers for VLAN MSTI migrations. Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 9bfe984fcdbf..644fda2293a2 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -976,6 +976,9 @@ struct dsa_switch_ops { struct netlink_ext_ack *extack); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); + int (*vlan_msti_set)(struct dsa_switch *ds, struct dsa_bridge bridge, + const struct switchdev_vlan_msti *msti); + /* * Forwarding database */ -- cgit v1.2.3 From 7414af30b7d80f117bed5ad6769e6ffb433573ba Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:54 +0100 Subject: net: dsa: Handle MST state changes Add the usual trampoline functionality from the generic DSA layer down to the drivers for MST state changes. When a state changes to disabled/blocking/listening, make sure to fast age any dynamic entries in the affected VLANs (those controlled by the MSTI in question). Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 644fda2293a2..06cdefd3b9dd 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -957,7 +957,10 @@ struct dsa_switch_ops { struct dsa_bridge bridge); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); + int (*port_mst_state_set)(struct dsa_switch *ds, int port, + const struct switchdev_mst_state *state); void (*port_fast_age)(struct dsa_switch *ds, int port); + int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid); int (*port_pre_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); -- cgit v1.2.3 From ccb6ed426f10ac4f742efa7d897c266aa10ac64a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 16 Mar 2022 22:41:40 +0200 Subject: net: mscc: ocelot: add port mirroring support using tc-matchall Ocelot switches perform port-based ingress mirroring if ANA:PORT:PORT_CFG field SRC_MIRROR_ENA is set, and egress mirroring if the port is in ANA:ANA:EMIRRORPORTS. Both ingress-mirrored and egress-mirrored frames are copied to the port mask from ANA:ANA:MIRRORPORTS. So the choice of limiting to a single mirror port via ocelot_mirror_get() and ocelot_mirror_put() may seem bizarre, but the hardware model doesn't map very well to the user space model. If the user wants to mirror the ingress of swp1 towards swp2 and the ingress of swp3 towards swp4, we'd have to program ANA:ANA:MIRRORPORTS with BIT(2) | BIT(4), and that would make swp1 be mirrored towards swp4 too, and swp3 towards swp2. But there are no tc-matchall rules to describe those actions. Now, we could offload a matchall rule with multiple mirred actions, one per desired mirror port, and force the user to stick to the multi-action rule format for subsequent matchall filters. But both DSA and ocelot have the flow_offload_has_one_action() check for the matchall offload, plus the fact that it will get cumbersome to cross-check matchall mirrors with flower mirrors (which will be added in the next patch). As a result, we limit the configuration to a single mirror port, with the possibility of lifting the restriction in the future. Frames injected from the CPU don't get egress-mirrored, since they are sent with the BYPASS bit in the injection frame header, and this bypasses the analyzer module (effectively also the mirroring logic). I don't know what to do/say about this. Functionality was tested with: tc qdisc add dev swp3 clsact tc filter add dev swp3 ingress \ matchall skip_sw \ action mirred egress mirror dev swp1 and pinging through swp3, while seeing that the ICMP replies are mirrored towards swp1. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/soc/mscc/ocelot.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 4d51e2a7120f..9b4e6c78d0f4 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -642,6 +642,11 @@ struct ocelot_lag_fdb { struct list_head list; }; +struct ocelot_mirror { + refcount_t refcount; + int to; +}; + struct ocelot_port { struct ocelot *ocelot; @@ -723,6 +728,7 @@ struct ocelot { struct ocelot_vcap_block block[3]; struct ocelot_vcap_policer vcap_pol; struct vcap_props *vcap; + struct ocelot_mirror *mirror; struct ocelot_psfp_list psfp; @@ -908,6 +914,9 @@ int ocelot_get_max_mtu(struct ocelot *ocelot, int port); int ocelot_port_policer_add(struct ocelot *ocelot, int port, struct ocelot_policer *pol); int ocelot_port_policer_del(struct ocelot *ocelot, int port); +int ocelot_port_mirror_add(struct ocelot *ocelot, int from, int to, + bool ingress, struct netlink_ext_ack *extack); +void ocelot_port_mirror_del(struct ocelot *ocelot, int from, bool ingress); int ocelot_cls_flower_replace(struct ocelot *ocelot, int port, struct flow_cls_offload *f, bool ingress); int ocelot_cls_flower_destroy(struct ocelot *ocelot, int port, -- cgit v1.2.3 From f2a0e216bee5d95e2c2d916a8815a659cd3703c2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 16 Mar 2022 22:41:42 +0200 Subject: net: mscc: ocelot: offload per-flow mirroring using tc-mirred and VCAP IS2 Per-flow mirroring with the VCAP IS2 TCAM (in itself handled as an offload for tc-flower) is done by setting the MIRROR_ENA bit from the action vector of the filter. The packet is mirrored to the port mask configured in the ANA:ANA:MIRRORPORTS register (the same port mask as the destinations for port-based mirroring). Functionality was tested with: tc qdisc add dev swp3 clsact tc filter add dev swp3 ingress protocol ip \ flower skip_sw ip_proto icmp \ action mirred egress mirror dev swp1 and pinging through swp3, while seeing that the ICMP replies are mirrored towards swp1. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/soc/mscc/ocelot_vcap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index deb2ad9eb0a5..7b2bf9b1fe69 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -654,6 +654,7 @@ struct ocelot_vcap_action { enum ocelot_mask_mode mask_mode; unsigned long port_mask; bool police_ena; + bool mirror_ena; struct ocelot_policer pol; u32 pol_ix; }; @@ -697,6 +698,7 @@ struct ocelot_vcap_filter { unsigned long ingress_port_mask; /* For VCAP ES0 */ struct ocelot_vcap_port ingress_port; + /* For VCAP IS2 mirrors and ES0 */ struct ocelot_vcap_port egress_port; enum ocelot_vcap_bit dmac_mc; -- cgit v1.2.3 From 0148bb50b8fd51baf357de8b237c0c6011506540 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 16 Mar 2022 22:41:43 +0200 Subject: net: dsa: pass extack to dsa_switch_ops :: port_mirror_add() Drivers might have error messages to propagate to user space, most common being that they support a single mirror port. Propagate the netlink extack so that they can inform user space in a verbal way of their limitations. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 06cdefd3b9dd..934958fda962 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1028,7 +1028,7 @@ struct dsa_switch_ops { struct flow_cls_offload *cls, bool ingress); int (*port_mirror_add)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, - bool ingress); + bool ingress, struct netlink_ext_ack *extack); void (*port_mirror_del)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); int (*port_policer_add)(struct dsa_switch *ds, int port, -- cgit v1.2.3 From 4f554e955614f19425cee86de4669351741a6280 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 15 Mar 2022 23:00:26 +0900 Subject: ftrace: Add ftrace_set_filter_ips function Adding ftrace_set_filter_ips function to be able to set filter on multiple ip addresses at once. With the kprobe multi attach interface we have cases where we need to initialize ftrace_ops object with thousands of functions, so having single function diving into ftrace_hash_move_and_update_ops with ftrace_lock is faster. The functions ips are passed as unsigned long array with count. Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt (Google) Tested-by: Steven Rostedt (Google) Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/164735282673.1084943.18310504594134769804.stgit@devnote2 --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9999e29187de..60847cbce0da 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -512,6 +512,8 @@ struct dyn_ftrace { int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset); +int ftrace_set_filter_ips(struct ftrace_ops *ops, unsigned long *ips, + unsigned int cnt, int remove, int reset); int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, @@ -802,6 +804,7 @@ static inline unsigned long ftrace_location(unsigned long ip) #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) #define ftrace_set_early_filter(ops, buf, enable) do { } while (0) #define ftrace_set_filter_ip(ops, ip, remove, reset) ({ -ENODEV; }) +#define ftrace_set_filter_ips(ops, ips, cnt, remove, reset) ({ -ENODEV; }) #define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; }) #define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; }) #define ftrace_free_filter(ops) do { } while (0) -- cgit v1.2.3 From cad9931f64dc7f5dbdec12cae9f30063360f9855 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 15 Mar 2022 23:00:38 +0900 Subject: fprobe: Add ftrace based probe APIs The fprobe is a wrapper API for ftrace function tracer. Unlike kprobes, this probes only supports the function entry, but this can probe multiple functions by one fprobe. The usage is similar, user will set their callback to fprobe::entry_handler and call register_fprobe*() with probed functions. There are 3 registration interfaces, - register_fprobe() takes filtering patterns of the functin names. - register_fprobe_ips() takes an array of ftrace-location addresses. - register_fprobe_syms() takes an array of function names. The registered fprobes can be unregistered with unregister_fprobe(). e.g. struct fprobe fp = { .entry_handler = user_handler }; const char *targets[] = { "func1", "func2", "func3"}; ... ret = register_fprobe_syms(&fp, targets, ARRAY_SIZE(targets)); ... unregister_fprobe(&fp); Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (Google) Tested-by: Steven Rostedt (Google) Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/164735283857.1084943.1154436951479395551.stgit@devnote2 --- include/linux/fprobe.h | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 include/linux/fprobe.h (limited to 'include') diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h new file mode 100644 index 000000000000..2ba099aff041 --- /dev/null +++ b/include/linux/fprobe.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Simple ftrace probe wrapper */ +#ifndef _LINUX_FPROBE_H +#define _LINUX_FPROBE_H + +#include +#include + +/** + * struct fprobe - ftrace based probe. + * @ops: The ftrace_ops. + * @nmissed: The counter for missing events. + * @flags: The status flag. + * @entry_handler: The callback function for function entry. + */ +struct fprobe { +#ifdef CONFIG_FUNCTION_TRACER + /* + * If CONFIG_FUNCTION_TRACER is not set, CONFIG_FPROBE is disabled too. + * But user of fprobe may keep embedding the struct fprobe on their own + * code. To avoid build error, this will keep the fprobe data structure + * defined here, but remove ftrace_ops data structure. + */ + struct ftrace_ops ops; +#endif + unsigned long nmissed; + unsigned int flags; + void (*entry_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs); +}; + +#define FPROBE_FL_DISABLED 1 + +static inline bool fprobe_disabled(struct fprobe *fp) +{ + return (fp) ? fp->flags & FPROBE_FL_DISABLED : false; +} + +#ifdef CONFIG_FPROBE +int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter); +int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num); +int register_fprobe_syms(struct fprobe *fp, const char **syms, int num); +int unregister_fprobe(struct fprobe *fp); +#else +static inline int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter) +{ + return -EOPNOTSUPP; +} +static inline int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num) +{ + return -EOPNOTSUPP; +} +static inline int register_fprobe_syms(struct fprobe *fp, const char **syms, int num) +{ + return -EOPNOTSUPP; +} +static inline int unregister_fprobe(struct fprobe *fp) +{ + return -EOPNOTSUPP; +} +#endif + +/** + * disable_fprobe() - Disable fprobe + * @fp: The fprobe to be disabled. + * + * This will soft-disable @fp. Note that this doesn't remove the ftrace + * hooks from the function entry. + */ +static inline void disable_fprobe(struct fprobe *fp) +{ + if (fp) + fp->flags |= FPROBE_FL_DISABLED; +} + +/** + * enable_fprobe() - Enable fprobe + * @fp: The fprobe to be enabled. + * + * This will soft-enable @fp. + */ +static inline void enable_fprobe(struct fprobe *fp) +{ + if (fp) + fp->flags &= ~FPROBE_FL_DISABLED; +} + +#endif -- cgit v1.2.3 From 54ecbe6f1ed5138c895bdff55608cf502755b20e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 15 Mar 2022 23:00:50 +0900 Subject: rethook: Add a generic return hook Add a return hook framework which hooks the function return. Most of the logic came from the kretprobe, but this is independent from kretprobe. Note that this is expected to be used with other function entry hooking feature, like ftrace, fprobe, adn kprobes. Eventually this will replace the kretprobe (e.g. kprobe + rethook = kretprobe), but at this moment, this is just an additional hook. Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (Google) Tested-by: Steven Rostedt (Google) Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/164735285066.1084943.9259661137330166643.stgit@devnote2 --- include/linux/rethook.h | 100 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 3 ++ 2 files changed, 103 insertions(+) create mode 100644 include/linux/rethook.h (limited to 'include') diff --git a/include/linux/rethook.h b/include/linux/rethook.h new file mode 100644 index 000000000000..c8ac1e5afcd1 --- /dev/null +++ b/include/linux/rethook.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Return hooking with list-based shadow stack. + */ +#ifndef _LINUX_RETHOOK_H +#define _LINUX_RETHOOK_H + +#include +#include +#include +#include +#include +#include + +struct rethook_node; + +typedef void (*rethook_handler_t) (struct rethook_node *, void *, struct pt_regs *); + +/** + * struct rethook - The rethook management data structure. + * @data: The user-defined data storage. + * @handler: The user-defined return hook handler. + * @pool: The pool of struct rethook_node. + * @ref: The reference counter. + * @rcu: The rcu_head for deferred freeing. + * + * Don't embed to another data structure, because this is a self-destructive + * data structure when all rethook_node are freed. + */ +struct rethook { + void *data; + rethook_handler_t handler; + struct freelist_head pool; + refcount_t ref; + struct rcu_head rcu; +}; + +/** + * struct rethook_node - The rethook shadow-stack entry node. + * @freelist: The freelist, linked to struct rethook::pool. + * @rcu: The rcu_head for deferred freeing. + * @llist: The llist, linked to a struct task_struct::rethooks. + * @rethook: The pointer to the struct rethook. + * @ret_addr: The storage for the real return address. + * @frame: The storage for the frame pointer. + * + * You can embed this to your extended data structure to store any data + * on each entry of the shadow stack. + */ +struct rethook_node { + union { + struct freelist_node freelist; + struct rcu_head rcu; + }; + struct llist_node llist; + struct rethook *rethook; + unsigned long ret_addr; + unsigned long frame; +}; + +struct rethook *rethook_alloc(void *data, rethook_handler_t handler); +void rethook_free(struct rethook *rh); +void rethook_add_node(struct rethook *rh, struct rethook_node *node); +struct rethook_node *rethook_try_get(struct rethook *rh); +void rethook_recycle(struct rethook_node *node); +void rethook_hook(struct rethook_node *node, struct pt_regs *regs, bool mcount); +unsigned long rethook_find_ret_addr(struct task_struct *tsk, unsigned long frame, + struct llist_node **cur); + +/* Arch dependent code must implement arch_* and trampoline code */ +void arch_rethook_prepare(struct rethook_node *node, struct pt_regs *regs, bool mcount); +void arch_rethook_trampoline(void); + +/** + * is_rethook_trampoline() - Check whether the address is rethook trampoline + * @addr: The address to be checked + * + * Return true if the @addr is the rethook trampoline address. + */ +static inline bool is_rethook_trampoline(unsigned long addr) +{ + return addr == (unsigned long)dereference_symbol_descriptor(arch_rethook_trampoline); +} + +/* If the architecture needs to fixup the return address, implement it. */ +void arch_rethook_fixup_return(struct pt_regs *regs, + unsigned long correct_ret_addr); + +/* Generic trampoline handler, arch code must prepare asm stub */ +unsigned long rethook_trampoline_handler(struct pt_regs *regs, + unsigned long frame); + +#ifdef CONFIG_RETHOOK +void rethook_flush_task(struct task_struct *tk); +#else +#define rethook_flush_task(tsk) do { } while (0) +#endif + +#endif + diff --git a/include/linux/sched.h b/include/linux/sched.h index 75ba8aa60248..7034f53404e3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1481,6 +1481,9 @@ struct task_struct { #ifdef CONFIG_KRETPROBES struct llist_head kretprobe_instances; #endif +#ifdef CONFIG_RETHOOK + struct llist_head rethooks; +#endif #ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH /* -- cgit v1.2.3 From 5b0ab78998e32564a011b14c4c7f9c81e2d42b9d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 15 Mar 2022 23:01:48 +0900 Subject: fprobe: Add exit_handler support Add exit_handler to fprobe. fprobe + rethook allows us to hook the kernel function return. The rethook will be enabled only if the fprobe::exit_handler is set. Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (Google) Tested-by: Steven Rostedt (Google) Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/164735290790.1084943.10601965782208052202.stgit@devnote2 --- include/linux/fprobe.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h index 2ba099aff041..8eefec2b485e 100644 --- a/include/linux/fprobe.h +++ b/include/linux/fprobe.h @@ -5,13 +5,16 @@ #include #include +#include /** * struct fprobe - ftrace based probe. * @ops: The ftrace_ops. * @nmissed: The counter for missing events. * @flags: The status flag. + * @rethook: The rethook data structure. (internal data) * @entry_handler: The callback function for function entry. + * @exit_handler: The callback function for function exit. */ struct fprobe { #ifdef CONFIG_FUNCTION_TRACER @@ -25,7 +28,10 @@ struct fprobe { #endif unsigned long nmissed; unsigned int flags; + struct rethook *rethook; + void (*entry_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs); + void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs); }; #define FPROBE_FL_DISABLED 1 -- cgit v1.2.3 From ab51e15d535e07be9839e0df056a4ebe9c5bac83 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 15 Mar 2022 23:02:11 +0900 Subject: fprobe: Introduce FPROBE_FL_KPROBE_SHARED flag for fprobe Introduce FPROBE_FL_KPROBE_SHARED flag for sharing fprobe callback with kprobes safely from the viewpoint of recursion. Since the recursion safety of the fprobe (and ftrace) is a bit different from the kprobes, this may cause an issue if user wants to run the same code from the fprobe and the kprobes. The kprobes has per-cpu 'current_kprobe' variable which protects the kprobe handler from recursion in any case. On the other hand, the fprobe uses only ftrace_test_recursion_trylock(), which will allow interrupt context calls another (or same) fprobe during the fprobe user handler is running. This is not a matter in cases if the common callback shared among the kprobes and the fprobe has its own recursion detection, or it can handle the recursion in the different contexts (normal/interrupt/NMI.) But if it relies on the 'current_kprobe' recursion lock, it has to check kprobe_running() and use kprobe_busy_*() APIs. Fprobe has FPROBE_FL_KPROBE_SHARED flag to do this. If your common callback code will be shared with kprobes, please set FPROBE_FL_KPROBE_SHARED *before* registering the fprobe, like; fprobe.flags = FPROBE_FL_KPROBE_SHARED; register_fprobe(&fprobe, "func*", NULL); This will protect your common callback from the nested call. Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (Google) Tested-by: Steven Rostedt (Google) Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/164735293127.1084943.15687374237275817599.stgit@devnote2 --- include/linux/fprobe.h | 12 ++++++++++++ include/linux/kprobes.h | 3 +++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h index 8eefec2b485e..1c2bde0ead73 100644 --- a/include/linux/fprobe.h +++ b/include/linux/fprobe.h @@ -34,13 +34,25 @@ struct fprobe { void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs); }; +/* This fprobe is soft-disabled. */ #define FPROBE_FL_DISABLED 1 +/* + * This fprobe handler will be shared with kprobes. + * This flag must be set before registering. + */ +#define FPROBE_FL_KPROBE_SHARED 2 + static inline bool fprobe_disabled(struct fprobe *fp) { return (fp) ? fp->flags & FPROBE_FL_DISABLED : false; } +static inline bool fprobe_shared_with_kprobes(struct fprobe *fp) +{ + return (fp) ? fp->flags & FPROBE_FL_KPROBE_SHARED : false; +} + #ifdef CONFIG_FPROBE int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter); int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 19b884353b15..5f1859836deb 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -427,6 +427,9 @@ static inline struct kprobe *kprobe_running(void) { return NULL; } +#define kprobe_busy_begin() do {} while (0) +#define kprobe_busy_end() do {} while (0) + static inline int register_kprobe(struct kprobe *p) { return -EOPNOTSUPP; -- cgit v1.2.3 From a0019cd7d41a191253859349535d76ee28ab7d96 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Mar 2022 13:24:07 +0100 Subject: lib/sort: Add priv pointer to swap function Adding support to have priv pointer in swap callback function. Following the initial change on cmp callback functions [1] and adding SWAP_WRAPPER macro to identify sort call of sort_r. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Reviewed-by: Masami Hiramatsu Link: https://lore.kernel.org/bpf/20220316122419.933957-2-jolsa@kernel.org [1] 4333fb96ca10 ("media: lib/sort.c: implement sort() variant taking context argument") --- include/linux/sort.h | 2 +- include/linux/types.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sort.h b/include/linux/sort.h index b5898725fe9d..e163287ac6c1 100644 --- a/include/linux/sort.h +++ b/include/linux/sort.h @@ -6,7 +6,7 @@ void sort_r(void *base, size_t num, size_t size, cmp_r_func_t cmp_func, - swap_func_t swap_func, + swap_r_func_t swap_func, const void *priv); void sort(void *base, size_t num, size_t size, diff --git a/include/linux/types.h b/include/linux/types.h index ac825ad90e44..ea8cf60a8a79 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -226,6 +226,7 @@ struct callback_head { typedef void (*rcu_callback_t)(struct rcu_head *head); typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func); +typedef void (*swap_r_func_t)(void *a, void *b, int size, const void *priv); typedef void (*swap_func_t)(void *a, void *b, int size); typedef int (*cmp_r_func_t)(const void *a, const void *b, const void *priv); -- cgit v1.2.3 From 0dcac272540613d41c05e89679e4ddb978b612f1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Mar 2022 13:24:09 +0100 Subject: bpf: Add multi kprobe link Adding new link type BPF_LINK_TYPE_KPROBE_MULTI that attaches kprobe program through fprobe API. The fprobe API allows to attach probe on multiple functions at once very fast, because it works on top of ftrace. On the other hand this limits the probe point to the function entry or return. The kprobe program gets the same pt_regs input ctx as when it's attached through the perf API. Adding new attach type BPF_TRACE_KPROBE_MULTI that allows attachment kprobe to multiple function with new link. User provides array of addresses or symbols with count to attach the kprobe program to. The new link_create uapi interface looks like: struct { __u32 flags; __u32 cnt; __aligned_u64 syms; __aligned_u64 addrs; } kprobe_multi; The flags field allows single BPF_TRACE_KPROBE_MULTI bit to create return multi kprobe. Signed-off-by: Masami Hiramatsu Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220316122419.933957-4-jolsa@kernel.org --- include/linux/bpf_types.h | 1 + include/linux/trace_events.h | 7 +++++++ include/uapi/linux/bpf.h | 13 +++++++++++++ 3 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 48a91c51c015..3e24ad0c4b3c 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -140,3 +140,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) #ifdef CONFIG_PERF_EVENTS BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) #endif +BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index dcea51fb60e2..8f0e9e7cb493 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -15,6 +15,7 @@ struct array_buffer; struct tracer; struct dentry; struct bpf_prog; +union bpf_attr; const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, @@ -738,6 +739,7 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, u64 *probe_addr); +int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { @@ -779,6 +781,11 @@ static inline int bpf_get_perf_event_info(const struct perf_event *event, { return -EOPNOTSUPP; } +static inline int +bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} #endif enum { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 99fab54ae9c0..d77f47af7752 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -997,6 +997,7 @@ enum bpf_attach_type { BPF_SK_REUSEPORT_SELECT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, BPF_PERF_EVENT, + BPF_TRACE_KPROBE_MULTI, __MAX_BPF_ATTACH_TYPE }; @@ -1011,6 +1012,7 @@ enum bpf_link_type { BPF_LINK_TYPE_NETNS = 5, BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, + BPF_LINK_TYPE_KPROBE_MULTI = 8, MAX_BPF_LINK_TYPE, }; @@ -1118,6 +1120,11 @@ enum bpf_link_type { */ #define BPF_F_XDP_HAS_FRAGS (1U << 5) +/* link_create.kprobe_multi.flags used in LINK_CREATE command for + * BPF_TRACE_KPROBE_MULTI attach type to create return probe. + */ +#define BPF_F_KPROBE_MULTI_RETURN (1U << 0) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * @@ -1475,6 +1482,12 @@ union bpf_attr { */ __u64 bpf_cookie; } perf_event; + struct { + __u32 flags; + __u32 cnt; + __aligned_u64 syms; + __aligned_u64 addrs; + } kprobe_multi; }; } link_create; -- cgit v1.2.3 From ca74823c6e16dd42b7cf60d9fdde80e2a81a67bb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Mar 2022 13:24:12 +0100 Subject: bpf: Add cookie support to programs attached with kprobe multi link Adding support to call bpf_get_attach_cookie helper from kprobe programs attached with kprobe multi link. The cookie is provided by array of u64 values, where each value is paired with provided function address or symbol with the same array index. When cookie array is provided it's sorted together with addresses (check bpf_kprobe_multi_cookie_swap). This way we can find cookie based on the address in bpf_get_attach_cookie helper. Suggested-by: Andrii Nakryiko Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220316122419.933957-7-jolsa@kernel.org --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d77f47af7752..7604e7d5438f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1487,6 +1487,7 @@ union bpf_attr { __u32 cnt; __aligned_u64 syms; __aligned_u64 addrs; + __aligned_u64 cookies; } kprobe_multi; }; } link_create; -- cgit v1.2.3 From b58b1f563ab78955d37e9e43e02790a85c66ac05 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 14 Mar 2022 11:38:22 +0100 Subject: xfrm: rework default policy structure This is a follow up of commit f8d858e607b2 ("xfrm: make user policy API complete"). The goal is to align userland API to the internal structures. Signed-off-by: Nicolas Dichtel Reviewed-by: Antony Antony Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 6 +----- include/net/xfrm.h | 48 ++++++++++++++++++------------------------------ 2 files changed, 19 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 947733a639a6..bd7c3be4af5d 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -66,11 +66,7 @@ struct netns_xfrm { int sysctl_larval_drop; u32 sysctl_acq_expires; - u8 policy_default; -#define XFRM_POL_DEFAULT_IN 1 -#define XFRM_POL_DEFAULT_OUT 2 -#define XFRM_POL_DEFAULT_FWD 4 -#define XFRM_POL_DEFAULT_MASK 7 + u8 policy_default[XFRM_POLICY_MAX]; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_hdr; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index fdb41e8bb626..1541ca0cb13c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1081,25 +1081,18 @@ xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, un } #ifdef CONFIG_XFRM -static inline bool -xfrm_default_allow(struct net *net, int dir) -{ - u8 def = net->xfrm.policy_default; - - switch (dir) { - case XFRM_POLICY_IN: - return def & XFRM_POL_DEFAULT_IN ? false : true; - case XFRM_POLICY_OUT: - return def & XFRM_POL_DEFAULT_OUT ? false : true; - case XFRM_POLICY_FWD: - return def & XFRM_POL_DEFAULT_FWD ? false : true; - } - return false; -} - int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, unsigned short family); +static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb, + int dir) +{ + if (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) + return net->xfrm.policy_default[dir] == XFRM_USERPOLICY_ACCEPT; + + return false; +} + static inline int __xfrm_policy_check2(struct sock *sk, int dir, struct sk_buff *skb, unsigned int family, int reverse) @@ -1110,13 +1103,9 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); - if (xfrm_default_allow(net, dir)) - return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) || - (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || - __xfrm_policy_check(sk, ndir, skb, family); - else - return (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || - __xfrm_policy_check(sk, ndir, skb, family); + return __xfrm_check_nopolicy(net, skb, dir) || + (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || + __xfrm_policy_check(sk, ndir, skb, family); } static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) @@ -1168,13 +1157,12 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); - if (xfrm_default_allow(net, XFRM_POLICY_OUT)) - return !net->xfrm.policy_count[XFRM_POLICY_OUT] || - (skb_dst(skb)->flags & DST_NOXFRM) || - __xfrm_route_forward(skb, family); - else - return (skb_dst(skb)->flags & DST_NOXFRM) || - __xfrm_route_forward(skb, family); + if (!net->xfrm.policy_count[XFRM_POLICY_OUT] && + net->xfrm.policy_default[XFRM_POLICY_OUT] == XFRM_USERPOLICY_ACCEPT) + return true; + + return (skb_dst(skb)->flags & DST_NOXFRM) || + __xfrm_route_forward(skb, family); } static inline int xfrm4_route_forward(struct sk_buff *skb) -- cgit v1.2.3 From 7a19006b60b129ce2cbe787f4f910dc0ec5a1ec6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 18 Mar 2022 08:34:52 +0100 Subject: kernfs: remove unneeded #if 0 guard Commit f2eb478f2f32 ("kernfs: move struct kernfs_root out of the public view.") moved kernfs_root out of kernfs.h, but my debugging code of a #if 0 was left in accidentally. Fix that up by removing the guards. Fixes: f2eb478f2f32 ("kernfs: move struct kernfs_root out of the public view.") Cc: Tejun Heo Reported-by: Al Viro Link: https://lore.kernel.org/r/20220318073452.1486568-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 62aff082dc3f..e2ae15a6225e 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -185,26 +185,6 @@ struct kernfs_syscall_ops { struct kernfs_root *root); }; -#if 0 -struct kernfs_root { - /* published fields */ - struct kernfs_node *kn; - unsigned int flags; /* KERNFS_ROOT_* flags */ - - /* private fields, do not use outside kernfs proper */ - struct idr ino_idr; - u32 last_id_lowbits; - u32 id_highbits; - struct kernfs_syscall_ops *syscall_ops; - - /* list of kernfs_super_info of this root, protected by kernfs_rwsem */ - struct list_head supers; - - wait_queue_head_t deactivate_waitq; - struct rw_semaphore kernfs_rwsem; -}; -#endif - struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root); struct kernfs_open_file { -- cgit v1.2.3 From e9b57aaae605eb869a628fdc21fe7d2c77a2205d Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Wed, 9 Feb 2022 14:00:47 +0800 Subject: fscache: export fscache_end_operation() Export fscache_end_operation() to avoid code duplication. Besides, considering the paired fscache_begin_read_operation() is already exported, it shall make sense to also export fscache_end_operation(). Signed-off-by: Jeffle Xu Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/20220302125134.131039-2-jefflexu@linux.alibaba.com/ # Jeffle's v4 Link: https://lore.kernel.org/r/164622971432.3564931.12184135678781328146.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/164678190346.1200972.7453733431978569479.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/164692888334.2099075.5166283293894267365.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/20220316131723.111553-2-jefflexu@linux.alibaba.com/ # v5 --- include/linux/fscache.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 296c5f1d9f35..d2430da8aa67 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -456,6 +456,20 @@ int fscache_begin_read_operation(struct netfs_cache_resources *cres, return -ENOBUFS; } +/** + * fscache_end_operation - End the read operation for the netfs lib + * @cres: The cache resources for the read operation + * + * Clean up the resources at the end of the read request. + */ +static inline void fscache_end_operation(struct netfs_cache_resources *cres) +{ + const struct netfs_cache_ops *ops = fscache_operation_valid(cres); + + if (ops) + ops->end_operation(cres); +} + /** * fscache_read - Start a read from the cache. * @cres: The cache resources to use -- cgit v1.2.3 From 5ac417d24c6cc2fa9ac69d8b9f4510a38ec40486 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 2 Feb 2022 15:12:44 +0000 Subject: netfs: Generate enums from trace symbol mapping lists netfs has a number of lists of symbols for use in tracing, listed in an enum and then listed again in a symbol->string mapping for use with __print_symbolic(). This is, however, redundant. Instead, use the symbol->string mapping list to also generate the enum where the enum is in the same file. Changes ======= ver #3) - #undef EM and E_ at the end of the trace file[1]. Signed-off-by: David Howells Acked-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/c2f4b3dc107b106e04c48f54945a12715cccfdf3.camel@redhat.com/ [1] Link: https://lore.kernel.org/r/164622980839.3564931.5673300162465266909.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/164678192454.1200972.4428834328108580460.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/CALF+zOkB38_MB5QwNUtqTU4WjMaLUJ5+Piwsn3pMxkO3d4J7Kg@mail.gmail.com/ # v2 Link: https://lore.kernel.org/r/164692890614.2099075.12960653141802151575.stgit@warthog.procyon.org.uk/ # v3 --- include/trace/events/netfs.h | 59 ++++++++++++-------------------------------- 1 file changed, 16 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index e6f4ebbb4c69..4d0bf02d490a 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -15,49 +15,6 @@ /* * Define enums for tracing information. */ -#ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY -#define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY - -enum netfs_read_trace { - netfs_read_trace_expanded, - netfs_read_trace_readahead, - netfs_read_trace_readpage, - netfs_read_trace_write_begin, -}; - -enum netfs_rreq_trace { - netfs_rreq_trace_assess, - netfs_rreq_trace_done, - netfs_rreq_trace_free, - netfs_rreq_trace_resubmit, - netfs_rreq_trace_unlock, - netfs_rreq_trace_unmark, - netfs_rreq_trace_write, -}; - -enum netfs_sreq_trace { - netfs_sreq_trace_download_instead, - netfs_sreq_trace_free, - netfs_sreq_trace_prepare, - netfs_sreq_trace_resubmit_short, - netfs_sreq_trace_submit, - netfs_sreq_trace_terminated, - netfs_sreq_trace_write, - netfs_sreq_trace_write_skip, - netfs_sreq_trace_write_term, -}; - -enum netfs_failure { - netfs_fail_check_write_begin, - netfs_fail_copy_to_cache, - netfs_fail_read, - netfs_fail_short_readpage, - netfs_fail_short_write_begin, - netfs_fail_prepare_write, -}; - -#endif - #define netfs_read_traces \ EM(netfs_read_trace_expanded, "EXPANDED ") \ EM(netfs_read_trace_readahead, "READAHEAD") \ @@ -98,6 +55,20 @@ enum netfs_failure { EM(netfs_fail_short_write_begin, "short-write-begin") \ E_(netfs_fail_prepare_write, "prep-write") +#ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY +#define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY + +#undef EM +#undef E_ +#define EM(a, b) a, +#define E_(a, b) a + +enum netfs_read_trace { netfs_read_traces } __mode(byte); +enum netfs_rreq_trace { netfs_rreq_traces } __mode(byte); +enum netfs_sreq_trace { netfs_sreq_traces } __mode(byte); +enum netfs_failure { netfs_failures } __mode(byte); + +#endif /* * Export enum symbols via userspace. @@ -258,6 +229,8 @@ TRACE_EVENT(netfs_failure, __entry->error) ); +#undef EM +#undef E_ #endif /* _TRACE_NETFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 6a19114b8e7f1e24d80b0812e26d78d7ae1ec6dd Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 17 Feb 2022 10:01:23 +0000 Subject: netfs: Rename netfs_read_*request to netfs_io_*request Rename netfs_read_*request to netfs_io_*request so that the same structures can be used for the write helpers too. perl -p -i -e 's/netfs_read_(request|subrequest)/netfs_io_$1/g' \ `git grep -l 'netfs_read_\(sub\|\)request'` perl -p -i -e 's/nr_rd_ops/nr_outstanding/g' \ `git grep -l nr_rd_ops` perl -p -i -e 's/nr_wr_ops/nr_copy_ops/g' \ `git grep -l nr_wr_ops` perl -p -i -e 's/netfs_read_source/netfs_io_source/g' \ `git grep -l 'netfs_read_source'` perl -p -i -e 's/netfs_io_request_ops/netfs_request_ops/g' \ `git grep -l 'netfs_io_request_ops'` perl -p -i -e 's/init_rreq/init_request/g' \ `git grep -l 'init_rreq'` Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/164622988070.3564931.7089670190434315183.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/164678195157.1200972.366609966927368090.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/164692891535.2099075.18435198075367420588.stgit@warthog.procyon.org.uk/ # v3 --- include/linux/netfs.h | 42 +++++++++++++++++++-------------------- include/trace/events/cachefiles.h | 6 +++--- include/trace/events/netfs.h | 14 ++++++------- 3 files changed, 31 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 614f22213e21..a2ca91cb7a68 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -106,7 +106,7 @@ static inline int wait_on_page_fscache_killable(struct page *page) return folio_wait_private_2_killable(page_folio(page)); } -enum netfs_read_source { +enum netfs_io_source { NETFS_FILL_WITH_ZEROES, NETFS_DOWNLOAD_FROM_SERVER, NETFS_READ_FROM_CACHE, @@ -130,8 +130,8 @@ struct netfs_cache_resources { /* * Descriptor for a single component subrequest. */ -struct netfs_read_subrequest { - struct netfs_read_request *rreq; /* Supervising read request */ +struct netfs_io_subrequest { + struct netfs_io_request *rreq; /* Supervising read request */ struct list_head rreq_link; /* Link in rreq->subrequests */ loff_t start; /* Where to start the I/O */ size_t len; /* Size of the I/O */ @@ -139,7 +139,7 @@ struct netfs_read_subrequest { refcount_t usage; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ - enum netfs_read_source source; /* Where to read from */ + enum netfs_io_source source; /* Where to read from */ unsigned long flags; #define NETFS_SREQ_WRITE_TO_CACHE 0 /* Set if should write to cache */ #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ @@ -152,7 +152,7 @@ struct netfs_read_subrequest { * Descriptor for a read helper request. This is used to make multiple I/O * requests on a variety of sources and then stitch the result together. */ -struct netfs_read_request { +struct netfs_io_request { struct work_struct work; struct inode *inode; /* The file being accessed */ struct address_space *mapping; /* The mapping being accessed */ @@ -160,8 +160,8 @@ struct netfs_read_request { struct list_head subrequests; /* Requests to fetch I/O from disk or net */ void *netfs_priv; /* Private data for the netfs */ unsigned int debug_id; - atomic_t nr_rd_ops; /* Number of read ops in progress */ - atomic_t nr_wr_ops; /* Number of write ops in progress */ + atomic_t nr_outstanding; /* Number of read ops in progress */ + atomic_t nr_copy_ops; /* Number of write ops in progress */ size_t submitted; /* Amount submitted for I/O so far */ size_t len; /* Length of the request */ short error; /* 0 or error that occurred */ @@ -176,23 +176,23 @@ struct netfs_read_request { #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ - const struct netfs_read_request_ops *netfs_ops; + const struct netfs_request_ops *netfs_ops; }; /* * Operations the network filesystem can/must provide to the helpers. */ -struct netfs_read_request_ops { +struct netfs_request_ops { bool (*is_cache_enabled)(struct inode *inode); - void (*init_rreq)(struct netfs_read_request *rreq, struct file *file); - int (*begin_cache_operation)(struct netfs_read_request *rreq); - void (*expand_readahead)(struct netfs_read_request *rreq); - bool (*clamp_length)(struct netfs_read_subrequest *subreq); - void (*issue_op)(struct netfs_read_subrequest *subreq); - bool (*is_still_valid)(struct netfs_read_request *rreq); + void (*init_request)(struct netfs_io_request *rreq, struct file *file); + int (*begin_cache_operation)(struct netfs_io_request *rreq); + void (*expand_readahead)(struct netfs_io_request *rreq); + bool (*clamp_length)(struct netfs_io_subrequest *subreq); + void (*issue_op)(struct netfs_io_subrequest *subreq); + bool (*is_still_valid)(struct netfs_io_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, struct folio *folio, void **_fsdata); - void (*done)(struct netfs_read_request *rreq); + void (*done)(struct netfs_io_request *rreq); void (*cleanup)(struct address_space *mapping, void *netfs_priv); }; @@ -235,7 +235,7 @@ struct netfs_cache_ops { /* Prepare a read operation, shortening it to a cached/uncached * boundary as appropriate. */ - enum netfs_read_source (*prepare_read)(struct netfs_read_subrequest *subreq, + enum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq, loff_t i_size); /* Prepare a write operation, working out what part of the write we can @@ -255,19 +255,19 @@ struct netfs_cache_ops { struct readahead_control; extern void netfs_readahead(struct readahead_control *, - const struct netfs_read_request_ops *, + const struct netfs_request_ops *, void *); extern int netfs_readpage(struct file *, struct folio *, - const struct netfs_read_request_ops *, + const struct netfs_request_ops *, void *); extern int netfs_write_begin(struct file *, struct address_space *, loff_t, unsigned int, unsigned int, struct folio **, void **, - const struct netfs_read_request_ops *, + const struct netfs_request_ops *, void *); -extern void netfs_subreq_terminated(struct netfs_read_subrequest *, ssize_t, bool); +extern void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); extern void netfs_stats_show(struct seq_file *); #endif /* _LINUX_NETFS_H */ diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index c6f5aa74db89..002d0ae4f9bc 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -424,8 +424,8 @@ TRACE_EVENT(cachefiles_vol_coherency, ); TRACE_EVENT(cachefiles_prep_read, - TP_PROTO(struct netfs_read_subrequest *sreq, - enum netfs_read_source source, + TP_PROTO(struct netfs_io_subrequest *sreq, + enum netfs_io_source source, enum cachefiles_prepare_read_trace why, ino_t cache_inode), @@ -435,7 +435,7 @@ TRACE_EVENT(cachefiles_prep_read, __field(unsigned int, rreq ) __field(unsigned short, index ) __field(unsigned short, flags ) - __field(enum netfs_read_source, source ) + __field(enum netfs_io_source, source ) __field(enum cachefiles_prepare_read_trace, why ) __field(size_t, len ) __field(loff_t, start ) diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 4d0bf02d490a..b40809c0bd74 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -94,7 +94,7 @@ netfs_failures; #define E_(a, b) { a, b } TRACE_EVENT(netfs_read, - TP_PROTO(struct netfs_read_request *rreq, + TP_PROTO(struct netfs_io_request *rreq, loff_t start, size_t len, enum netfs_read_trace what), @@ -127,7 +127,7 @@ TRACE_EVENT(netfs_read, ); TRACE_EVENT(netfs_rreq, - TP_PROTO(struct netfs_read_request *rreq, + TP_PROTO(struct netfs_io_request *rreq, enum netfs_rreq_trace what), TP_ARGS(rreq, what), @@ -151,7 +151,7 @@ TRACE_EVENT(netfs_rreq, ); TRACE_EVENT(netfs_sreq, - TP_PROTO(struct netfs_read_subrequest *sreq, + TP_PROTO(struct netfs_io_subrequest *sreq, enum netfs_sreq_trace what), TP_ARGS(sreq, what), @@ -161,7 +161,7 @@ TRACE_EVENT(netfs_sreq, __field(unsigned short, index ) __field(short, error ) __field(unsigned short, flags ) - __field(enum netfs_read_source, source ) + __field(enum netfs_io_source, source ) __field(enum netfs_sreq_trace, what ) __field(size_t, len ) __field(size_t, transferred ) @@ -190,8 +190,8 @@ TRACE_EVENT(netfs_sreq, ); TRACE_EVENT(netfs_failure, - TP_PROTO(struct netfs_read_request *rreq, - struct netfs_read_subrequest *sreq, + TP_PROTO(struct netfs_io_request *rreq, + struct netfs_io_subrequest *sreq, int error, enum netfs_failure what), TP_ARGS(rreq, sreq, error, what), @@ -201,7 +201,7 @@ TRACE_EVENT(netfs_failure, __field(unsigned short, index ) __field(short, error ) __field(unsigned short, flags ) - __field(enum netfs_read_source, source ) + __field(enum netfs_io_source, source ) __field(enum netfs_failure, what ) __field(size_t, len ) __field(size_t, transferred ) -- cgit v1.2.3 From f18a378580a761c8559b7d90afaa157269559c05 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 17 Feb 2022 10:14:32 +0000 Subject: netfs: Finish off rename of netfs_read_request to netfs_io_request Adjust helper function names and comments after mass rename of struct netfs_read_*request to struct netfs_io_*request. Changes ======= ver #2) - Make the changes in the docs also. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/164622992433.3564931.6684311087845150271.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/164678196111.1200972.5001114956865989528.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/164692892567.2099075.13895804222087028813.stgit@warthog.procyon.org.uk/ # v3 --- include/linux/netfs.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index a2ca91cb7a68..f63de27d6f29 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -131,7 +131,7 @@ struct netfs_cache_resources { * Descriptor for a single component subrequest. */ struct netfs_io_subrequest { - struct netfs_io_request *rreq; /* Supervising read request */ + struct netfs_io_request *rreq; /* Supervising I/O request */ struct list_head rreq_link; /* Link in rreq->subrequests */ loff_t start; /* Where to start the I/O */ size_t len; /* Size of the I/O */ @@ -139,29 +139,29 @@ struct netfs_io_subrequest { refcount_t usage; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ - enum netfs_io_source source; /* Where to read from */ + enum netfs_io_source source; /* Where to read from/write to */ unsigned long flags; -#define NETFS_SREQ_WRITE_TO_CACHE 0 /* Set if should write to cache */ +#define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ -#define NETFS_SREQ_SHORT_READ 2 /* Set if there was a short read from the cache */ +#define NETFS_SREQ_SHORT_IO 2 /* Set if the I/O was short */ #define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ #define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ }; /* - * Descriptor for a read helper request. This is used to make multiple I/O - * requests on a variety of sources and then stitch the result together. + * Descriptor for an I/O helper request. This is used to make multiple I/O + * operations to a variety of data stores and then stitch the result together. */ struct netfs_io_request { struct work_struct work; struct inode *inode; /* The file being accessed */ struct address_space *mapping; /* The mapping being accessed */ struct netfs_cache_resources cache_resources; - struct list_head subrequests; /* Requests to fetch I/O from disk or net */ + struct list_head subrequests; /* Contributory I/O operations */ void *netfs_priv; /* Private data for the netfs */ unsigned int debug_id; - atomic_t nr_outstanding; /* Number of read ops in progress */ - atomic_t nr_copy_ops; /* Number of write ops in progress */ + atomic_t nr_outstanding; /* Number of ops in progress */ + atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */ size_t submitted; /* Amount submitted for I/O so far */ size_t len; /* Length of the request */ short error; /* 0 or error that occurred */ @@ -171,7 +171,7 @@ struct netfs_io_request { refcount_t usage; unsigned long flags; #define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */ -#define NETFS_RREQ_WRITE_TO_CACHE 1 /* Need to write to the cache */ +#define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */ #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ @@ -188,7 +188,7 @@ struct netfs_request_ops { int (*begin_cache_operation)(struct netfs_io_request *rreq); void (*expand_readahead)(struct netfs_io_request *rreq); bool (*clamp_length)(struct netfs_io_subrequest *subreq); - void (*issue_op)(struct netfs_io_subrequest *subreq); + void (*issue_read)(struct netfs_io_subrequest *subreq); bool (*is_still_valid)(struct netfs_io_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, struct folio *folio, void **_fsdata); -- cgit v1.2.3 From 18b3ff9fe8b857557fd02bfae0d91834b2c380ca Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 19 Feb 2022 23:05:40 +0000 Subject: netfs: Adjust the netfs_rreq tracepoint slightly Adjust the netfs_rreq tracepoint to include the origin of the request and to increase the size of the "what trace" output strings by a character so that "ENCRYPT" and "DECRYPT" will fit without abbreviation. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/164622996715.3564931.4252319907990358129.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/164678199468.1200972.17275585970238114726.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/164692898684.2099075.12153225958137716567.stgit@warthog.procyon.org.uk/ # v3 --- include/trace/events/netfs.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index b40809c0bd74..0c7a26c4d11c 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -22,13 +22,13 @@ E_(netfs_read_trace_write_begin, "WRITEBEGN") #define netfs_rreq_traces \ - EM(netfs_rreq_trace_assess, "ASSESS") \ - EM(netfs_rreq_trace_done, "DONE ") \ - EM(netfs_rreq_trace_free, "FREE ") \ - EM(netfs_rreq_trace_resubmit, "RESUBM") \ - EM(netfs_rreq_trace_unlock, "UNLOCK") \ - EM(netfs_rreq_trace_unmark, "UNMARK") \ - E_(netfs_rreq_trace_write, "WRITE ") + EM(netfs_rreq_trace_assess, "ASSESS ") \ + EM(netfs_rreq_trace_copy, "COPY ") \ + EM(netfs_rreq_trace_done, "DONE ") \ + EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_resubmit, "RESUBMT") \ + EM(netfs_rreq_trace_unlock, "UNLOCK ") \ + E_(netfs_rreq_trace_unmark, "UNMARK ") #define netfs_sreq_sources \ EM(NETFS_FILL_WITH_ZEROES, "ZERO") \ @@ -134,7 +134,7 @@ TRACE_EVENT(netfs_rreq, TP_STRUCT__entry( __field(unsigned int, rreq ) - __field(unsigned short, flags ) + __field(unsigned int, flags ) __field(enum netfs_rreq_trace, what ) ), @@ -182,8 +182,8 @@ TRACE_EVENT(netfs_sreq, TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx/%zx e=%d", __entry->rreq, __entry->index, - __print_symbolic(__entry->what, netfs_sreq_traces), __print_symbolic(__entry->source, netfs_sreq_sources), + __print_symbolic(__entry->what, netfs_sreq_traces), __entry->flags, __entry->start, __entry->transferred, __entry->len, __entry->error) -- cgit v1.2.3 From de74023befa1876f64bc5871a2a4a51850517118 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 17 Feb 2022 21:13:05 +0000 Subject: netfs: Trace refcounting on the netfs_io_request struct Add refcount tracing for the netfs_io_request structure. Changes ======= ver #3) - Switch 'W=' to 'R=' in the traceline to match other request debug IDs. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/164622997668.3564931.14456171619219324968.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/164678200943.1200972.7241495532327787765.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/164692900920.2099075.11847712419940675791.stgit@warthog.procyon.org.uk/ # v3 --- include/linux/netfs.h | 2 +- include/trace/events/netfs.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index f63de27d6f29..541aebe828f3 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -168,7 +168,7 @@ struct netfs_io_request { loff_t i_size; /* Size of the file */ loff_t start; /* Start position */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ - refcount_t usage; + refcount_t ref; unsigned long flags; #define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */ #define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 0c7a26c4d11c..e35a5ce52eb5 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -55,6 +55,15 @@ EM(netfs_fail_short_write_begin, "short-write-begin") \ E_(netfs_fail_prepare_write, "prep-write") +#define netfs_rreq_ref_traces \ + EM(netfs_rreq_trace_get_hold, "GET HOLD ") \ + EM(netfs_rreq_trace_get_subreq, "GET SUBREQ ") \ + EM(netfs_rreq_trace_put_complete, "PUT COMPLT ") \ + EM(netfs_rreq_trace_put_failed, "PUT FAILED ") \ + EM(netfs_rreq_trace_put_hold, "PUT HOLD ") \ + EM(netfs_rreq_trace_put_subreq, "PUT SUBREQ ") \ + E_(netfs_rreq_trace_new, "NEW ") + #ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY #define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY @@ -67,6 +76,7 @@ enum netfs_read_trace { netfs_read_traces } __mode(byte); enum netfs_rreq_trace { netfs_rreq_traces } __mode(byte); enum netfs_sreq_trace { netfs_sreq_traces } __mode(byte); enum netfs_failure { netfs_failures } __mode(byte); +enum netfs_rreq_ref_trace { netfs_rreq_ref_traces } __mode(byte); #endif @@ -83,6 +93,7 @@ netfs_rreq_traces; netfs_sreq_sources; netfs_sreq_traces; netfs_failures; +netfs_rreq_ref_traces; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -229,6 +240,30 @@ TRACE_EVENT(netfs_failure, __entry->error) ); +TRACE_EVENT(netfs_rreq_ref, + TP_PROTO(unsigned int rreq_debug_id, int ref, + enum netfs_rreq_ref_trace what), + + TP_ARGS(rreq_debug_id, ref, what), + + TP_STRUCT__entry( + __field(unsigned int, rreq ) + __field(int, ref ) + __field(enum netfs_rreq_ref_trace, what ) + ), + + TP_fast_assign( + __entry->rreq = rreq_debug_id; + __entry->ref = ref; + __entry->what = what; + ), + + TP_printk("R=%08x %s r=%u", + __entry->rreq, + __print_symbolic(__entry->what, netfs_rreq_ref_traces), + __entry->ref) + ); + #undef EM #undef E_ #endif /* _TRACE_NETFS_H */ -- cgit v1.2.3 From 6cd3d6fd1fe2feae5ff9f6a821081569bb140bf4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 17 Feb 2022 15:01:24 +0000 Subject: netfs: Trace refcounting on the netfs_io_subrequest struct Add refcount tracing for the netfs_io_subrequest structure. Changes ======= ver #3) - Switch 'W=' to 'R=' in the traceline to match other request debug IDs. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/164622998584.3564931.5052255990645723639.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/164678202603.1200972.14726007419792315578.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/164692901860.2099075.4845820886851239935.stgit@warthog.procyon.org.uk/ # v3 --- include/linux/netfs.h | 8 +++++++- include/trace/events/netfs.h | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 541aebe828f3..c702bd8ea8da 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -18,6 +18,8 @@ #include #include +enum netfs_sreq_ref_trace; + /* * Overload PG_private_2 to give us PG_fscache - this is used to indicate that * a page is currently backed by a local disk cache @@ -136,7 +138,7 @@ struct netfs_io_subrequest { loff_t start; /* Where to start the I/O */ size_t len; /* Size of the I/O */ size_t transferred; /* Amount of data transferred */ - refcount_t usage; + refcount_t ref; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ enum netfs_io_source source; /* Where to read from/write to */ @@ -268,6 +270,10 @@ extern int netfs_write_begin(struct file *, struct address_space *, void *); extern void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); +extern void netfs_get_subrequest(struct netfs_io_subrequest *subreq, + enum netfs_sreq_ref_trace what); +extern void netfs_put_subrequest(struct netfs_io_subrequest *subreq, + bool was_async, enum netfs_sreq_ref_trace what); extern void netfs_stats_show(struct seq_file *); #endif /* _LINUX_NETFS_H */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index e35a5ce52eb5..dcea5e888fd0 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -64,6 +64,17 @@ EM(netfs_rreq_trace_put_subreq, "PUT SUBREQ ") \ E_(netfs_rreq_trace_new, "NEW ") +#define netfs_sreq_ref_traces \ + EM(netfs_sreq_trace_get_copy_to_cache, "GET COPY2C ") \ + EM(netfs_sreq_trace_get_resubmit, "GET RESUBMIT") \ + EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \ + EM(netfs_sreq_trace_new, "NEW ") \ + EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \ + EM(netfs_sreq_trace_put_failed, "PUT FAILED ") \ + EM(netfs_sreq_trace_put_merged, "PUT MERGED ") \ + EM(netfs_sreq_trace_put_no_copy, "PUT NO COPY") \ + E_(netfs_sreq_trace_put_terminated, "PUT TERM ") + #ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY #define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY @@ -77,6 +88,7 @@ enum netfs_rreq_trace { netfs_rreq_traces } __mode(byte); enum netfs_sreq_trace { netfs_sreq_traces } __mode(byte); enum netfs_failure { netfs_failures } __mode(byte); enum netfs_rreq_ref_trace { netfs_rreq_ref_traces } __mode(byte); +enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte); #endif @@ -94,6 +106,7 @@ netfs_sreq_sources; netfs_sreq_traces; netfs_failures; netfs_rreq_ref_traces; +netfs_sreq_ref_traces; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -264,6 +277,33 @@ TRACE_EVENT(netfs_rreq_ref, __entry->ref) ); +TRACE_EVENT(netfs_sreq_ref, + TP_PROTO(unsigned int rreq_debug_id, unsigned int subreq_debug_index, + int ref, enum netfs_sreq_ref_trace what), + + TP_ARGS(rreq_debug_id, subreq_debug_index, ref, what), + + TP_STRUCT__entry( + __field(unsigned int, rreq ) + __field(unsigned int, subreq ) + __field(int, ref ) + __field(enum netfs_sreq_ref_trace, what ) + ), + + TP_fast_assign( + __entry->rreq = rreq_debug_id; + __entry->subreq = subreq_debug_index; + __entry->ref = ref; + __entry->what = what; + ), + + TP_printk("R=%08x[%x] %s r=%u", + __entry->rreq, + __entry->subreq, + __print_symbolic(__entry->what, netfs_sreq_ref_traces), + __entry->ref) + ); + #undef EM #undef E_ #endif /* _TRACE_NETFS_H */ -- cgit v1.2.3 From 5c88705e2aeaee5521b8586e68bef47aab359914 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 25 Feb 2022 16:24:03 +0000 Subject: netfs: Adjust the netfs_failure tracepoint to indicate non-subreq lines Adjust the netfs_failure tracepoint to indicate a subrequest number of -1 when it's a full-request failure unrelated to any particular subrequest, such as a failure to encrypt its data buffer. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/164623001948.3564931.2353852999649380059.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/164678204587.1200972.14893513018190383961.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/164692903233.2099075.15414355147237641274.stgit@warthog.procyon.org.uk/ # v3 --- include/trace/events/netfs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index dcea5e888fd0..556859b0f107 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -222,7 +222,7 @@ TRACE_EVENT(netfs_failure, TP_STRUCT__entry( __field(unsigned int, rreq ) - __field(unsigned short, index ) + __field(short, index ) __field(short, error ) __field(unsigned short, flags ) __field(enum netfs_io_source, source ) @@ -234,17 +234,17 @@ TRACE_EVENT(netfs_failure, TP_fast_assign( __entry->rreq = rreq->debug_id; - __entry->index = sreq ? sreq->debug_index : 0; + __entry->index = sreq ? sreq->debug_index : -1; __entry->error = error; __entry->flags = sreq ? sreq->flags : 0; __entry->source = sreq ? sreq->source : NETFS_INVALID_READ; __entry->what = what; - __entry->len = sreq ? sreq->len : 0; + __entry->len = sreq ? sreq->len : rreq->len; __entry->transferred = sreq ? sreq->transferred : 0; __entry->start = sreq ? sreq->start : 0; ), - TP_printk("R=%08x[%u] %s f=%02x s=%llx %zx/%zx %s e=%d", + TP_printk("R=%08x[%d] %s f=%02x s=%llx %zx/%zx %s e=%d", __entry->rreq, __entry->index, __print_symbolic(__entry->source, netfs_sreq_sources), __entry->flags, -- cgit v1.2.3 From 663dfb65c3b3ea4b8e1944680352992d58f3aa22 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 26 Aug 2021 09:24:42 -0400 Subject: netfs: Refactor arguments for netfs_alloc_read_request Pass start and len to the rreq allocator. This should ensure that the fields are set so that ->init_request() can use them. Also add a parameter to indicates the origin of the request. Ceph can use this to tell whether to get caps. Changes ======= ver #3) - Change the author to me as Jeff feels that most of the patch is my changes now. ver #2) - Show the request origin in the netfs_rreq tracepoint. Signed-off-by: Jeff Layton Co-developed-by: David Howells Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/164622989020.3564931.17517006047854958747.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/164678208569.1200972.12153682697842916557.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/164692904155.2099075.14717645623034355995.stgit@warthog.procyon.org.uk/ # v3 --- include/linux/netfs.h | 7 +++++++ include/trace/events/netfs.h | 11 ++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c702bd8ea8da..7dc741d9b21b 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -150,6 +150,12 @@ struct netfs_io_subrequest { #define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ }; +enum netfs_io_origin { + NETFS_READAHEAD, /* This read was triggered by readahead */ + NETFS_READPAGE, /* This read is a synchronous read */ + NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ +} __mode(byte); + /* * Descriptor for an I/O helper request. This is used to make multiple I/O * operations to a variety of data stores and then stitch the result together. @@ -167,6 +173,7 @@ struct netfs_io_request { size_t submitted; /* Amount submitted for I/O so far */ size_t len; /* Length of the request */ short error; /* 0 or error that occurred */ + enum netfs_io_origin origin; /* Origin of the request */ loff_t i_size; /* Size of the file */ loff_t start; /* Start position */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 556859b0f107..f00e3e1821c8 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -21,6 +21,11 @@ EM(netfs_read_trace_readpage, "READPAGE ") \ E_(netfs_read_trace_write_begin, "WRITEBEGN") +#define netfs_rreq_origins \ + EM(NETFS_READAHEAD, "RA") \ + EM(NETFS_READPAGE, "RP") \ + E_(NETFS_READ_FOR_WRITE, "RW") + #define netfs_rreq_traces \ EM(netfs_rreq_trace_assess, "ASSESS ") \ EM(netfs_rreq_trace_copy, "COPY ") \ @@ -101,6 +106,7 @@ enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte); #define E_(a, b) TRACE_DEFINE_ENUM(a); netfs_read_traces; +netfs_rreq_origins; netfs_rreq_traces; netfs_sreq_sources; netfs_sreq_traces; @@ -159,17 +165,20 @@ TRACE_EVENT(netfs_rreq, TP_STRUCT__entry( __field(unsigned int, rreq ) __field(unsigned int, flags ) + __field(enum netfs_io_origin, origin ) __field(enum netfs_rreq_trace, what ) ), TP_fast_assign( __entry->rreq = rreq->debug_id; __entry->flags = rreq->flags; + __entry->origin = rreq->origin; __entry->what = what; ), - TP_printk("R=%08x %s f=%02x", + TP_printk("R=%08x %s %s f=%02x", __entry->rreq, + __print_symbolic(__entry->origin, netfs_rreq_origins), __print_symbolic(__entry->what, netfs_rreq_traces), __entry->flags) ); -- cgit v1.2.3 From 2de160417315b8d64455fe03e9bb7d3308ac3281 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Jan 2022 21:55:46 +0000 Subject: netfs: Change ->init_request() to return an error code Change the request initialisation function to return an error code so that the network filesystem can return a failure (ENOMEM, for example). This will also allow ceph to abort a ->readahead() op if the server refuses to give it a cap allowing local caching from within the netfslib framework (errors aren't passed back through ->readahead(), so returning, say, -ENOBUFS will cause the op to be aborted). Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/164678212401.1200972.16537041523832944934.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/164692905398.2099075.5238033621684646524.stgit@warthog.procyon.org.uk/ # v3 --- include/linux/netfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 7dc741d9b21b..4b99e38f73d9 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -193,7 +193,7 @@ struct netfs_io_request { */ struct netfs_request_ops { bool (*is_cache_enabled)(struct inode *inode); - void (*init_request)(struct netfs_io_request *rreq, struct file *file); + int (*init_request)(struct netfs_io_request *rreq, struct file *file); int (*begin_cache_operation)(struct netfs_io_request *rreq); void (*expand_readahead)(struct netfs_io_request *rreq); bool (*clamp_length)(struct netfs_io_subrequest *subreq); -- cgit v1.2.3 From bc899ee1c898e520574ff4d99356eb2e724a9265 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Jun 2021 22:37:05 +0100 Subject: netfs: Add a netfs inode context Add a netfs_i_context struct that should be included in the network filesystem's own inode struct wrapper, directly after the VFS's inode struct, e.g.: struct my_inode { struct { /* These must be contiguous */ struct inode vfs_inode; struct netfs_i_context netfs_ctx; }; }; The netfs_i_context struct so far contains a single field for the network filesystem to use - the cache cookie: struct netfs_i_context { ... struct fscache_cookie *cache; }; Three functions are provided to help with this: (1) void netfs_i_context_init(struct inode *inode, const struct netfs_request_ops *ops); Initialise the netfs context and set the operations. (2) struct netfs_i_context *netfs_i_context(struct inode *inode); Find the netfs context from the VFS inode. (3) struct inode *netfs_inode(struct netfs_i_context *ctx); Find the VFS inode from the netfs context. Changes ======= ver #4) - Fix netfs_is_cache_enabled() to check cookie->cache_priv to see if a cache is present[3]. - Fix netfs_skip_folio_read() to zero out all of the page, not just some of it[3]. ver #3) - Split out the bit to move ceph cap-getting on readahead into ceph_init_request()[1]. - Stick in a comment to the netfs inode structs indicating the contiguity requirements[2]. ver #2) - Adjust documentation to match. - Use "#if IS_ENABLED()" in netfs_i_cookie(), not "#ifdef". - Move the cap check from ceph_readahead() to ceph_init_request() to be called from netfslib. - Remove ceph_readahead() and use netfs_readahead() directly instead. Signed-off-by: David Howells Acked-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/8af0d47f17d89c06bbf602496dd845f2b0bf25b3.camel@kernel.org/ [1] Link: https://lore.kernel.org/r/beaf4f6a6c2575ed489adb14b257253c868f9a5c.camel@kernel.org/ [2] Link: https://lore.kernel.org/r/3536452.1647421585@warthog.procyon.org.uk/ [3] Link: https://lore.kernel.org/r/164622984545.3564931.15691742939278418580.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/164678213320.1200972.16807551936267647470.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/164692909854.2099075.9535537286264248057.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/306388.1647595110@warthog.procyon.org.uk/ # v4 --- include/linux/netfs.h | 81 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 4b99e38f73d9..8458b30172a5 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -118,6 +118,16 @@ enum netfs_io_source { typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, bool was_async); +/* + * Per-inode description. This must be directly after the inode struct. + */ +struct netfs_i_context { + const struct netfs_request_ops *ops; +#if IS_ENABLED(CONFIG_FSCACHE) + struct fscache_cookie *cache; +#endif +}; + /* * Resources required to do operations on a cache. */ @@ -192,7 +202,6 @@ struct netfs_io_request { * Operations the network filesystem can/must provide to the helpers. */ struct netfs_request_ops { - bool (*is_cache_enabled)(struct inode *inode); int (*init_request)(struct netfs_io_request *rreq, struct file *file); int (*begin_cache_operation)(struct netfs_io_request *rreq); void (*expand_readahead)(struct netfs_io_request *rreq); @@ -263,18 +272,11 @@ struct netfs_cache_ops { }; struct readahead_control; -extern void netfs_readahead(struct readahead_control *, - const struct netfs_request_ops *, - void *); -extern int netfs_readpage(struct file *, - struct folio *, - const struct netfs_request_ops *, - void *); +extern void netfs_readahead(struct readahead_control *); +extern int netfs_readpage(struct file *, struct page *); extern int netfs_write_begin(struct file *, struct address_space *, loff_t, unsigned int, unsigned int, struct folio **, - void **, - const struct netfs_request_ops *, - void *); + void **); extern void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); extern void netfs_get_subrequest(struct netfs_io_subrequest *subreq, @@ -283,4 +285,61 @@ extern void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async, enum netfs_sreq_ref_trace what); extern void netfs_stats_show(struct seq_file *); +/** + * netfs_i_context - Get the netfs inode context from the inode + * @inode: The inode to query + * + * Get the netfs lib inode context from the network filesystem's inode. The + * context struct is expected to directly follow on from the VFS inode struct. + */ +static inline struct netfs_i_context *netfs_i_context(struct inode *inode) +{ + return (struct netfs_i_context *)(inode + 1); +} + +/** + * netfs_inode - Get the netfs inode from the inode context + * @ctx: The context to query + * + * Get the netfs inode from the netfs library's inode context. The VFS inode + * is expected to directly precede the context struct. + */ +static inline struct inode *netfs_inode(struct netfs_i_context *ctx) +{ + return ((struct inode *)ctx) - 1; +} + +/** + * netfs_i_context_init - Initialise a netfs lib context + * @inode: The inode with which the context is associated + * @ops: The netfs's operations list + * + * Initialise the netfs library context struct. This is expected to follow on + * directly from the VFS inode struct. + */ +static inline void netfs_i_context_init(struct inode *inode, + const struct netfs_request_ops *ops) +{ + struct netfs_i_context *ctx = netfs_i_context(inode); + + memset(ctx, 0, sizeof(*ctx)); + ctx->ops = ops; +} + +/** + * netfs_i_cookie - Get the cache cookie from the inode + * @inode: The inode to query + * + * Get the caching cookie (if enabled) from the network filesystem's inode. + */ +static inline struct fscache_cookie *netfs_i_cookie(struct inode *inode) +{ +#if IS_ENABLED(CONFIG_FSCACHE) + struct netfs_i_context *ctx = netfs_i_context(inode); + return ctx->cache; +#else + return NULL; +#endif +} + #endif /* _LINUX_NETFS_H */ -- cgit v1.2.3 From 4090b31422a6f24dfe701e31ffec7ba5804a7e2f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 2 Mar 2022 10:50:22 +0000 Subject: netfs: Add a function to consolidate beginning a read Add a function to do the steps needed to begin a read request, allowing this code to be removed from several other functions and consolidated. Changes ======= ver #2) - Move before the unstaticking patch so that some functions can be left static. - Set uninitialised return code in netfs_begin_read()[1][2]. - Fixed a refleak caused by non-removal of a get from netfs_write_begin() when the request submission code got moved to netfs_begin_read(). - Use INIT_WORK() to (re-)init the request work_struct[3]. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/20220303163826.1120936-1-nathan@kernel.org/ [1] Link: https://lore.kernel.org/r/20220303235647.1297171-1-colin.i.king@gmail.com/ [2] Link: https://lore.kernel.org/r/9d69be49081bccff44260e4c6e0049c63d6d04a1.camel@redhat.com/ [3] Link: https://lore.kernel.org/r/164623004355.3564931.7275693529042495641.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/164678214287.1200972.16734134007649832160.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/164692911113.2099075.1060868473229451371.stgit@warthog.procyon.org.uk/ # v3 --- include/trace/events/netfs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index f00e3e1821c8..beec534cbaab 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -56,17 +56,18 @@ EM(netfs_fail_check_write_begin, "check-write-begin") \ EM(netfs_fail_copy_to_cache, "copy-to-cache") \ EM(netfs_fail_read, "read") \ - EM(netfs_fail_short_readpage, "short-readpage") \ - EM(netfs_fail_short_write_begin, "short-write-begin") \ + EM(netfs_fail_short_read, "short-read") \ E_(netfs_fail_prepare_write, "prep-write") #define netfs_rreq_ref_traces \ EM(netfs_rreq_trace_get_hold, "GET HOLD ") \ EM(netfs_rreq_trace_get_subreq, "GET SUBREQ ") \ EM(netfs_rreq_trace_put_complete, "PUT COMPLT ") \ + EM(netfs_rreq_trace_put_discard, "PUT DISCARD") \ EM(netfs_rreq_trace_put_failed, "PUT FAILED ") \ EM(netfs_rreq_trace_put_hold, "PUT HOLD ") \ EM(netfs_rreq_trace_put_subreq, "PUT SUBREQ ") \ + EM(netfs_rreq_trace_put_zero_len, "PUT ZEROLEN") \ E_(netfs_rreq_trace_new, "NEW ") #define netfs_sreq_ref_traces \ -- cgit v1.2.3 From 4058f742105ecfcbdf99e1139e6c1f74fb8e6db9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 5 Nov 2021 13:55:38 +0000 Subject: netfs: Keep track of the actual remote file size Provide a place in which to keep track of the actual remote file size in the netfs context. This is needed because inode->i_size will be updated as we buffer writes in the pagecache, but the server file size won't get updated until we flush them back. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/164623013727.3564931.17659955636985232717.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/164678219305.1200972.6459431995188365134.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/164692921865.2099075.5310757978508056134.stgit@warthog.procyon.org.uk/ # v3 --- include/linux/netfs.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 8458b30172a5..c7bf1eaf51d5 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -126,6 +126,7 @@ struct netfs_i_context { #if IS_ENABLED(CONFIG_FSCACHE) struct fscache_cookie *cache; #endif + loff_t remote_i_size; /* Size of the remote file */ }; /* @@ -324,6 +325,21 @@ static inline void netfs_i_context_init(struct inode *inode, memset(ctx, 0, sizeof(*ctx)); ctx->ops = ops; + ctx->remote_i_size = i_size_read(inode); +} + +/** + * netfs_resize_file - Note that a file got resized + * @inode: The inode being resized + * @new_i_size: The new file size + * + * Inform the netfs lib that a file got resized so that it can adjust its state. + */ +static inline void netfs_resize_file(struct inode *inode, loff_t new_i_size) +{ + struct netfs_i_context *ctx = netfs_i_context(inode); + + ctx->remote_i_size = new_i_size; } /** -- cgit v1.2.3 From 54f586a9153201c6cff55e1f561990c78bd99aa7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 16 Mar 2022 21:27:51 +0100 Subject: rfkill: make new event layout opt-in Again new complaints surfaced that we had broken the ABI here, although previously all the userspace tools had agreed that it was their mistake and fixed it. Yet now there are cases (e.g. RHEL) that want to run old userspace with newer kernels, and thus are broken. Since this is a bit of a whack-a-mole thing, change the whole extensibility scheme of rfkill to no longer just rely on the message lengths, but instead require userspace to opt in via a new ioctl to a given maximum event size that it is willing to understand. By default, set that to RFKILL_EVENT_SIZE_V1 (8), so that the behaviour for userspace not calling the ioctl will look as if it's just running on an older kernel. Fixes: 14486c82612a ("rfkill: add a reason to the HW rfkill state") Cc: stable@vger.kernel.org # 5.11+ Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220316212749.16491491b270.Ifcb1950998330a596f29a2a162e00b7546a1d6d0@changeid --- include/uapi/linux/rfkill.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h index 9b77cfc42efa..283c5a7b3f2c 100644 --- a/include/uapi/linux/rfkill.h +++ b/include/uapi/linux/rfkill.h @@ -159,8 +159,16 @@ struct rfkill_event_ext { * old behaviour for all userspace, unless it explicitly opts in to the * rules outlined here by using the new &struct rfkill_event_ext. * - * Userspace using &struct rfkill_event_ext must adhere to the following - * rules + * Additionally, some other userspace (bluez, g-s-d) was reading with a + * large size but as streaming reads rather than message-based, or with + * too strict checks for the returned size. So eventually, we completely + * reverted this, and extended messages need to be opted in to by using + * an ioctl: + * + * ioctl(fd, RFKILL_IOCTL_MAX_SIZE, sizeof(struct rfkill_event_ext)); + * + * Userspace using &struct rfkill_event_ext and the ioctl must adhere to + * the following rules: * * 1. accept short writes, optionally using them to detect that it's * running on an older kernel; @@ -175,6 +183,8 @@ struct rfkill_event_ext { #define RFKILL_IOC_MAGIC 'R' #define RFKILL_IOC_NOINPUT 1 #define RFKILL_IOCTL_NOINPUT _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT) +#define RFKILL_IOC_MAX_SIZE 2 +#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_EXT_SIZE, __u32) /* and that's all userspace gets */ -- cgit v1.2.3 From f58c252e30cf74f68b0054293adc03b5923b9f0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 14 Mar 2022 11:14:32 +0200 Subject: serial: 8250: fix XOFF/XON sending when DMA is used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When 8250 UART is using DMA, x_char (XON/XOFF) is never sent to the wire. After this change, x_char is injected correctly. Create uart_xchar_out() helper for sending the x_char out and accounting related to it. It seems that almost every driver does these same steps with x_char. Except for 8250, however, almost all currently lack .serial_out so they cannot immediately take advantage of this new helper. The downside of this patch is that it might reintroduce the problems some devices faced with mixed DMA/non-DMA transfer which caused revert f967fc8f165f (Revert "serial: 8250_dma: don't bother DMA with small transfers"). However, the impact should be limited to cases with XON/XOFF (that didn't work with DMA capable devices to begin with so this problem is not very likely to cause a major issue, if any at all). Fixes: 9ee4b83e51f74 ("serial: 8250: Add support for dmaengine") Reported-by: Gilles Buloz Tested-by: Gilles Buloz Reviewed-by: Andy Shevchenko Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220314091432.4288-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 14ae35f68abb..d4828e69087a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -458,6 +458,8 @@ extern void uart_handle_cts_change(struct uart_port *uport, extern void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, unsigned int ch, unsigned int flag); +void uart_xchar_out(struct uart_port *uport, int offset); + #ifdef CONFIG_MAGIC_SYSRQ_SERIAL #define SYSRQ_TIMEOUT (HZ * 5) -- cgit v1.2.3 From 73799a889262b4675799bec20a2765be6d6a3f98 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 15 Mar 2022 15:38:54 -0400 Subject: counter: add new COUNTER_EVENT_CHANGE_OF_STATE Add new counter event to notify user space about every new counter pulse. Link: https://lore.kernel.org/r/20220203135727.2374052-2-o.rempel@pengutronix.de Signed-off-by: Oleksij Rempel Signed-off-by: William Breathitt Gray Link: https://lore.kernel.org/r/486a5de67414470449efb84d06a2f2214f4bb31d.1647373009.git.vilhelm.gray@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/counter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h index d0aa95aeff7b..96c5ffd368ad 100644 --- a/include/uapi/linux/counter.h +++ b/include/uapi/linux/counter.h @@ -61,6 +61,8 @@ enum counter_event_type { COUNTER_EVENT_THRESHOLD, /* Index signal detected */ COUNTER_EVENT_INDEX, + /* State of counter is changed */ + COUNTER_EVENT_CHANGE_OF_STATE, }; /** -- cgit v1.2.3 From 5c1b97c7d7b736e6439af4f43a65837bc72f56c1 Mon Sep 17 00:00:00 2001 From: Jeya R Date: Mon, 14 Feb 2022 16:09:52 +0000 Subject: misc: fastrpc: add support for FASTRPC_IOCTL_MEM_MAP/UNMAP Add support for IOCTL requests to map and unmap on DSP based on map flags. Signed-off-by: Jeya R Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220214161002.6831-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/fastrpc.h | 51 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'include') diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 0a89f95463f6..d248eeb20e67 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -13,6 +13,37 @@ #define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) #define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) +#define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) +#define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) + +/** + * enum fastrpc_map_flags - control flags for mapping memory on DSP user process + * @FASTRPC_MAP_STATIC: Map memory pages with RW- permission and CACHE WRITEBACK. + * The driver is responsible for cache maintenance when passed + * the buffer to FastRPC calls. Same virtual address will be + * assigned for subsequent FastRPC calls. + * @FASTRPC_MAP_RESERVED: Reserved + * @FASTRPC_MAP_FD: Map memory pages with RW- permission and CACHE WRITEBACK. + * Mapping tagged with a file descriptor. User is responsible for + * CPU and DSP cache maintenance for the buffer. Get virtual address + * of buffer on DSP using HAP_mmap_get() and HAP_mmap_put() APIs. + * @FASTRPC_MAP_FD_DELAYED: Mapping delayed until user call HAP_mmap() and HAP_munmap() + * functions on DSP. It is useful to map a buffer with cache modes + * other than default modes. User is responsible for CPU and DSP + * cache maintenance for the buffer. + * @FASTRPC_MAP_FD_NOMAP: This flag is used to skip CPU mapping, + * otherwise behaves similar to FASTRPC_MAP_FD_DELAYED flag. + * @FASTRPC_MAP_MAX: max count for flags + * + */ +enum fastrpc_map_flags { + FASTRPC_MAP_STATIC = 0, + FASTRPC_MAP_RESERVED, + FASTRPC_MAP_FD = 2, + FASTRPC_MAP_FD_DELAYED, + FASTRPC_MAP_FD_NOMAP = 16, + FASTRPC_MAP_MAX, +}; struct fastrpc_invoke_args { __u64 ptr; @@ -49,9 +80,29 @@ struct fastrpc_req_mmap { __u64 vaddrout; /* dsp virtual address */ }; +struct fastrpc_mem_map { + __s32 version; + __s32 fd; /* fd */ + __s32 offset; /* buffer offset */ + __u32 flags; /* flags defined in enum fastrpc_map_flags */ + __u64 vaddrin; /* buffer virtual address */ + __u64 length; /* buffer length */ + __u64 vaddrout; /* [out] remote virtual address */ + __s32 attrs; /* buffer attributes used for SMMU mapping */ + __s32 reserved[4]; +}; + struct fastrpc_req_munmap { __u64 vaddrout; /* address to unmap */ __u64 size; /* size */ }; +struct fastrpc_mem_unmap { + __s32 vesion; + __s32 fd; /* fd */ + __u64 vaddr; /* remote process (dsp) virtual address */ + __u64 length; /* buffer size */ + __s32 reserved[5]; +}; + #endif /* __QCOM_FASTRPC_H__ */ -- cgit v1.2.3 From 6c16fd8bdd4058d4a6aaca9d5a7b40e4cb281d5a Mon Sep 17 00:00:00 2001 From: Jeya R Date: Mon, 14 Feb 2022 16:09:53 +0000 Subject: misc: fastrpc: Add support to get DSP capabilities Add support to get DSP capabilities. The capability information is cached on driver. Signed-off-by: Jeya R Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220214161002.6831-4-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/fastrpc.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index d248eeb20e67..7cc9d342078a 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -15,6 +15,7 @@ #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) +#define FASTRPC_IOCTL_GET_DSP_INFO _IOWR('R', 13, struct fastrpc_ioctl_capability) /** * enum fastrpc_map_flags - control flags for mapping memory on DSP user process @@ -105,4 +106,11 @@ struct fastrpc_mem_unmap { __s32 reserved[5]; }; +struct fastrpc_ioctl_capability { + __u32 domain; + __u32 attribute_id; + __u32 capability; /* dsp capability */ + __u32 reserved[4]; +}; + #endif /* __QCOM_FASTRPC_H__ */ -- cgit v1.2.3 From 7f1f481263c3ce5387d4fd5ad63ddaa8a295aab2 Mon Sep 17 00:00:00 2001 From: Jeya R Date: Mon, 14 Feb 2022 16:09:56 +0000 Subject: misc: fastrpc: check before loading process to the DSP Reject session if DSP domain is secure, device node is non-secure and signed PD is requested. Secure device node can access DSP without any restriction. Unsigned PD offload is only allowed for the DSP domain that can support unsigned offloading. Signed-off-by: Jeya R Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220214161002.6831-7-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/fastrpc.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 7cc9d342078a..f39edac20305 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -46,6 +46,23 @@ enum fastrpc_map_flags { FASTRPC_MAP_MAX, }; +enum fastrpc_proc_attr { + /* Macro for Debug attr */ + FASTRPC_MODE_DEBUG = (1 << 0), + /* Macro for Ptrace */ + FASTRPC_MODE_PTRACE = (1 << 1), + /* Macro for CRC Check */ + FASTRPC_MODE_CRC = (1 << 2), + /* Macro for Unsigned PD */ + FASTRPC_MODE_UNSIGNED_MODULE = (1 << 3), + /* Macro for Adaptive QoS */ + FASTRPC_MODE_ADAPTIVE_QOS = (1 << 4), + /* Macro for System Process */ + FASTRPC_MODE_SYSTEM_PROCESS = (1 << 5), + /* Macro for Prvileged Process */ + FASTRPC_MODE_PRIVILEGED = (1 << 6), +}; + struct fastrpc_invoke_args { __u64 ptr; __u64 length; -- cgit v1.2.3 From e90d911906196bf987492c94e38f10ca611dfd7b Mon Sep 17 00:00:00 2001 From: Vamsi Krishna Gattupalli Date: Mon, 14 Feb 2022 16:09:58 +0000 Subject: misc: fastrpc: Add support to secure memory map This patch adds support to secure memory allocations for DSP. It repurposes the reserved field in struct fastrpc_invoke_args to add attributes to invoke request, for example to setup a secure memory map for dsp. Secure memory is assigned to DSP Virtual Machine IDs using Qualcomm SCM calls. Signed-off-by: Vamsi Krishna Gattupalli Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220214161002.6831-9-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/fastrpc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index f39edac20305..5e29f2cfa42d 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -63,11 +63,14 @@ enum fastrpc_proc_attr { FASTRPC_MODE_PRIVILEGED = (1 << 6), }; +/* Fastrpc attribute for memory protection of buffers */ +#define FASTRPC_ATTR_SECUREMAP (1) + struct fastrpc_invoke_args { __u64 ptr; __u64 length; __s32 fd; - __u32 reserved; + __u32 attr; }; struct fastrpc_invoke { -- cgit v1.2.3 From 336d4b814bf078fa698488632c19beca47308896 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 12:15:32 -0600 Subject: ptrace: Move setting/clearing ptrace_message into ptrace_stop Today ptrace_message is easy to overlook as it not a core part of ptrace_stop. It has been overlooked so much that there are places that set ptrace_message and don't clear it, and places that never set it. So if you get an unlucky sequence of events the ptracer may be able to read a ptrace_message that does not apply to the current ptrace stop. Move setting of ptrace_message into ptrace_stop so that it always gets set before the stop, and always gets cleared after the stop. This prevents non-sense from being reported to userspace and makes ptrace_message more visible in the ptrace helper functions so that kernel developers can see it. Link: https://lkml.kernel.org/r/87bky67qfv.fsf_-_@email.froward.int.ebiederm.org Acked-by: Oleg Nesterov Reviewed-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/ptrace.h | 9 +++------ include/uapi/linux/ptrace.h | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 5310f43e4762..3e6b46e2b7be 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -60,7 +60,7 @@ extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned extern void ptrace_disable(struct task_struct *); extern int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data); -extern void ptrace_notify(int exit_code); +extern void ptrace_notify(int exit_code, unsigned long message); extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, const struct cred *ptracer_cred); @@ -155,8 +155,7 @@ static inline bool ptrace_event_enabled(struct task_struct *task, int event) static inline void ptrace_event(int event, unsigned long message) { if (unlikely(ptrace_event_enabled(current, event))) { - current->ptrace_message = message; - ptrace_notify((event << 8) | SIGTRAP); + ptrace_notify((event << 8) | SIGTRAP, message); } else if (event == PTRACE_EVENT_EXEC) { /* legacy EXEC report via SIGTRAP */ if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED) @@ -424,8 +423,7 @@ static inline int ptrace_report_syscall(unsigned long message) if (!(ptrace & PT_PTRACED)) return 0; - current->ptrace_message = message; - ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); + ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), message); /* * this isn't the same as continuing with a signal, but it will do @@ -437,7 +435,6 @@ static inline int ptrace_report_syscall(unsigned long message) current->exit_code = 0; } - current->ptrace_message = 0; return fatal_signal_pending(current); } diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index b7af92e07d1f..195ae64a8c87 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -114,7 +114,7 @@ struct ptrace_rseq_configuration { /* * These values are stored in task->ptrace_message - * by ptrace_report_syscall_* to describe the current syscall-stop. + * by ptrace_stop to describe the current syscall-stop. */ #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 -- cgit v1.2.3 From 6487d1dab837214ec2fd3f0ddd5f787e63be7c20 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 12:19:13 -0600 Subject: ptrace: Return the signal to continue with from ptrace_stop The signal a task should continue with after a ptrace stop is inconsistently read, cleared, and sent. Solve this by reading and clearing the signal to be sent in ptrace_stop. In an ideal world everything except ptrace_signal would share a common implementation of continuing with the signal, so ptracers could count on the signal they ask to continue with actually being delivered. For now retain bug compatibility and just return with the signal number the ptracer requested the code continue with. Link: https://lkml.kernel.org/r/875yoe7qdp.fsf_-_@email.froward.int.ebiederm.org Reviewed-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/ptrace.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 3e6b46e2b7be..15b3d176b6b4 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -60,7 +60,7 @@ extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned extern void ptrace_disable(struct task_struct *); extern int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data); -extern void ptrace_notify(int exit_code, unsigned long message); +extern int ptrace_notify(int exit_code, unsigned long message); extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, const struct cred *ptracer_cred); @@ -419,21 +419,21 @@ extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oa static inline int ptrace_report_syscall(unsigned long message) { int ptrace = current->ptrace; + int signr; if (!(ptrace & PT_PTRACED)) return 0; - ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), message); + signr = ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), + message); /* * this isn't the same as continuing with a signal, but it will do * for normal use. strace only continues with a signal if the * stopping signal is not SIGTRAP. -brl */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } + if (signr) + send_sig(signr, current, 1); return fatal_signal_pending(current); } -- cgit v1.2.3 From 0eaecfb2e4814d51ab172df3823e35d7c488b6d2 Mon Sep 17 00:00:00 2001 From: Ismael Ferreras Morezuelas Date: Mon, 7 Mar 2022 21:04:44 +0100 Subject: Bluetooth: hci_sync: Add a new quirk to skip HCI_FLT_CLEAR_ALL Some controllers have problems with being sent a command to clear all filtering. While the HCI code does not unconditionally send a clear-all anymore at BR/EDR setup (after the state machine refactor), there might be more ways of hitting these codepaths in the future as the kernel develops. Cc: stable@vger.kernel.org Cc: Hans de Goede Signed-off-by: Ismael Ferreras Morezuelas Reviewed-by: Hans de Goede Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 35c073d44ec5..5cb095b09a94 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -255,6 +255,16 @@ enum { * during the hdev->setup vendor callback. */ HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, + + /* When this quirk is set, HCI_OP_SET_EVENT_FLT requests with + * HCI_FLT_CLEAR_ALL are ignored and event filtering is + * completely avoided. A subset of the CSR controller + * clones struggle with this and instantly lock up. + * + * Note that devices using this must (separately) disable + * runtime suspend, because event filtering takes place there. + */ + HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, }; /* HCI device flags */ -- cgit v1.2.3 From da8912176fb0ff9fd60e14fa653108d96422b896 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Mon, 14 Mar 2022 15:42:52 -0700 Subject: Bluetooth: fix incorrect nonblock bitmask in bt_sock_wait_ready() Callers pass msg->msg_flags as flags, which contains MSG_DONTWAIT instead of O_NONBLOCK. Signed-off-by: Gavin Li Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 2aa5e95808a5..6b48d9e2aab9 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -345,7 +345,7 @@ int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, __poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); -int bt_sock_wait_ready(struct sock *sk, unsigned long flags); +int bt_sock_wait_ready(struct sock *sk, unsigned int msg_flags); void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh); void bt_accept_unlink(struct sock *sk); -- cgit v1.2.3 From 86fc59ef818beb0e1945d17f8e734898baba7e4e Mon Sep 17 00:00:00 2001 From: Colin Foster Date: Sun, 13 Mar 2022 15:45:23 -0700 Subject: regmap: add configurable downshift for addresses Add an additional reg_downshift to be applied to register addresses before any register accesses. An example of a device that uses this is a VSC7514 chip, which require each register address to be downshifted by two if the access is performed over a SPI bus. Signed-off-by: Colin Foster Link: https://lore.kernel.org/r/20220313224524.399947-2-colin.foster@in-advantage.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 22652e5fbc38..40fb9399add6 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -237,6 +237,8 @@ typedef void (*regmap_unlock)(void *); * @reg_stride: The register address stride. Valid register addresses are a * multiple of this value. If set to 0, a value of 1 will be * used. + * @reg_downshift: The number of bits to downshift the register before + * performing any operations. * @pad_bits: Number of bits of padding between register and value. * @val_bits: Number of bits in a register value, mandatory. * @@ -360,6 +362,7 @@ struct regmap_config { int reg_bits; int reg_stride; + int reg_downshift; int pad_bits; int val_bits; -- cgit v1.2.3 From 0074f3f2b1e43d3cedd97e47fb6980db6d2ba79e Mon Sep 17 00:00:00 2001 From: Colin Foster Date: Sun, 13 Mar 2022 15:45:24 -0700 Subject: regmap: allow a defined reg_base to be added to every address There's an inconsistency that arises when a register set can be accessed internally via MMIO, or externally via SPI. The VSC7514 chip allows both modes of operation. When internally accessed, the system utilizes __iomem, devm_ioremap_resource, and devm_regmap_init_mmio. For SPI it isn't possible to utilize memory-mapped IO. To properly operate, the resource base must be added to the register before every operation. Signed-off-by: Colin Foster Link: https://lore.kernel.org/r/20220313224524.399947-3-colin.foster@in-advantage.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 40fb9399add6..de81a94d7b30 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -239,6 +239,8 @@ typedef void (*regmap_unlock)(void *); * used. * @reg_downshift: The number of bits to downshift the register before * performing any operations. + * @reg_base: Value to be added to every register address before performing any + * operation. * @pad_bits: Number of bits of padding between register and value. * @val_bits: Number of bits in a register value, mandatory. * @@ -363,6 +365,7 @@ struct regmap_config { int reg_bits; int reg_stride; int reg_downshift; + unsigned int reg_base; int pad_bits; int val_bits; -- cgit v1.2.3 From 046e1537a3cf0adc68fe865b5dc9a7e731cc63b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Tue, 15 Mar 2022 10:18:32 +0100 Subject: net: set default rss queues num to physical cores / 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Network drivers can call to netif_get_num_default_rss_queues to get the default number of receive queues to use. Right now, this default number is min(8, num_online_cpus()). Instead, as suggested by Jakub, use the number of physical cores divided by 2 as a way to avoid wasting CPU resources and to avoid using both CPU threads, but still allowing to scale for high-end processors with many cores. As an exception, select 2 queues for processors with 2 cores, because otherwise it won't take any advantage of RSS despite being SMP capable. Tested: Processor Intel Xeon E5-2620 (2 sockets, 6 cores/socket, 2 threads/core). NIC Broadcom NetXtreme II BCM57810 (10GBps). Ran some tests with `perf stat iperf3 -R`, with parallelisms of 1, 8 and 24, getting the following results: - Number of queues: 6 (instead of 8) - Network throughput: not affected - CPU usage: utilized 0.05-0.12 CPUs more than before (having 24 CPUs this is only 0.2-0.5% higher) - Reduced the number of context switches by 7-50%, being more noticeable when using a higher number of parallel threads. Suggested-by: Jakub Kicinski Signed-off-by: Íñigo Huguet Link: https://lore.kernel.org/r/20220315091832.13873-1-ihuguet@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8cbe96ce0a2c..e01a8ce7181f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3664,7 +3664,6 @@ static inline unsigned int get_netdev_rx_queue_index( } #endif -#define DEFAULT_MAX_NUM_RSS_QUEUES (8) int netif_get_num_default_rss_queues(void); enum skb_free_reason { -- cgit v1.2.3 From 0c125f87a84097c182c481be7497af9f816e5db5 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 26 Feb 2022 05:07:22 +0100 Subject: clk: fixed-factor: Introduce devm_clk_hw_register_fixed_factor_index() Add an API for a fixed factor clk that uses an index for the parent instead of a string name. This allows us to move drivers away from the string based method of describing parents and use the DT/firmware based method instead. Signed-off-by: Marek Vasut Cc: Michael Turquette Cc: Rob Herring Cc: Stephen Boyd Cc: devicetree@vger.kernel.org Link: https://lore.kernel.org/r/20220226040723.143705-2-marex@denx.de [sboyd@kernel.org: Expose a new API instead of internal function] Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 2faa6f7aa8a8..b7a7923f6bbb 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1003,6 +1003,9 @@ void clk_hw_unregister_fixed_factor(struct clk_hw *hw); struct clk_hw *devm_clk_hw_register_fixed_factor(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned int mult, unsigned int div); +struct clk_hw *devm_clk_hw_register_fixed_factor_index(struct device *dev, + const char *name, unsigned int index, unsigned long flags, + unsigned int mult, unsigned int div); /** * struct clk_fractional_divider - adjustable fractional divider clock * -- cgit v1.2.3 From d714fb25e755ad96b699993fac47f48c4d6cebe9 Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Fri, 18 Mar 2022 13:41:33 -0700 Subject: i2c: add tracepoints for I2C slave events I2C slave events tracepoints can be enabled by: echo 1 > /sys/kernel/tracing/events/i2c_slave/enable and logs in /sys/kernel/tracing/trace will look like: ... i2c_slave: i2c-0 a=010 ret=0 WR_REQ [] ... i2c_slave: i2c-0 a=010 ret=0 WR_RCV [02] ... i2c_slave: i2c-0 a=010 ret=0 WR_RCV [0c] ... i2c_slave: i2c-0 a=010 ret=0 STOP [] ... i2c_slave: i2c-0 a=010 ret=0 RD_REQ [04] ... i2c_slave: i2c-0 a=010 ret=0 RD_PRO [b4] ... i2c_slave: i2c-0 a=010 ret=0 STOP [] formatted as: i2c- a= ret= <- callback return value [] trace printings can be selected by adding a filter like: echo adapter_nr==1 >/sys/kernel/tracing/events/i2c_slave/filter Signed-off-by: Jae Hyun Yoo Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 8 ++--- include/trace/events/i2c_slave.h | 67 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 6 deletions(-) create mode 100644 include/trace/events/i2c_slave.h (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 7d4f52ceb7b5..fbda5ada2afc 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -392,12 +392,8 @@ enum i2c_slave_event { int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb); int i2c_slave_unregister(struct i2c_client *client); bool i2c_detect_slave_mode(struct device *dev); - -static inline int i2c_slave_event(struct i2c_client *client, - enum i2c_slave_event event, u8 *val) -{ - return client->slave_cb(client, event, val); -} +int i2c_slave_event(struct i2c_client *client, + enum i2c_slave_event event, u8 *val); #else static inline bool i2c_detect_slave_mode(struct device *dev) { return false; } #endif diff --git a/include/trace/events/i2c_slave.h b/include/trace/events/i2c_slave.h new file mode 100644 index 000000000000..811166abbe3a --- /dev/null +++ b/include/trace/events/i2c_slave.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * I2C slave tracepoints + * + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM i2c_slave + +#if !defined(_TRACE_I2C_SLAVE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_I2C_SLAVE_H + +#include +#include + +TRACE_DEFINE_ENUM(I2C_SLAVE_READ_REQUESTED); +TRACE_DEFINE_ENUM(I2C_SLAVE_WRITE_REQUESTED); +TRACE_DEFINE_ENUM(I2C_SLAVE_READ_PROCESSED); +TRACE_DEFINE_ENUM(I2C_SLAVE_WRITE_RECEIVED); +TRACE_DEFINE_ENUM(I2C_SLAVE_STOP); + +#define show_event_type(type) \ + __print_symbolic(type, \ + { I2C_SLAVE_READ_REQUESTED, "RD_REQ" }, \ + { I2C_SLAVE_WRITE_REQUESTED, "WR_REQ" }, \ + { I2C_SLAVE_READ_PROCESSED, "RD_PRO" }, \ + { I2C_SLAVE_WRITE_RECEIVED, "WR_RCV" }, \ + { I2C_SLAVE_STOP, " STOP" }) + +TRACE_EVENT(i2c_slave, + TP_PROTO(const struct i2c_client *client, enum i2c_slave_event event, + __u8 *val, int cb_ret), + TP_ARGS(client, event, val, cb_ret), + TP_STRUCT__entry( + __field(int, adapter_nr ) + __field(int, ret ) + __field(__u16, addr ) + __field(__u16, len ) + __field(enum i2c_slave_event, event ) + __array(__u8, buf, 1) ), + + TP_fast_assign( + __entry->adapter_nr = client->adapter->nr; + __entry->addr = client->addr; + __entry->event = event; + __entry->ret = cb_ret; + switch (event) { + case I2C_SLAVE_READ_REQUESTED: + case I2C_SLAVE_READ_PROCESSED: + case I2C_SLAVE_WRITE_RECEIVED: + __entry->len = 1; + memcpy(__entry->buf, val, __entry->len); + break; + default: + __entry->len = 0; + break; + } + ), + TP_printk("i2c-%d a=%03x ret=%d %s [%*phD]", + __entry->adapter_nr, __entry->addr, __entry->ret, + show_event_type(__entry->event), __entry->len, __entry->buf + )); + +#endif /* _TRACE_I2C_SLAVE_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 31d0bb9763efad30377505f3467f958d1ebe1e3d Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2022 22:02:55 +0100 Subject: netfilter: conntrack: Add and use nf_ct_set_auto_assign_helper_warned() The function sets the pernet boolean to avoid the spurious warning from nf_ct_lookup_helper() when assigning conntrack helpers via nftables. Fixes: 1a64edf54f55 ("netfilter: nft_ct: add helper set support") Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_helper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 37f0fbefb060..9939c366f720 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -177,4 +177,5 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat); int nf_nat_helper_try_module_get(const char *name, u16 l3num, u8 protonum); void nf_nat_helper_put(struct nf_conntrack_helper *helper); +void nf_ct_set_auto_assign_helper_warned(struct net *net); #endif /*_NF_CONNTRACK_HELPER_H*/ -- cgit v1.2.3 From b2d306542ff935a4edf7a88ba8145c108193442a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Mar 2022 18:23:00 +0100 Subject: netfilter: nf_tables: do not reduce read-only expressions Skip register tracking for expressions that perform read-only operations on the registers. Define and use a cookie pointer NFT_REDUCE_READONLY to avoid defining stubs for these expressions. This patch re-enables register tracking which was disabled in ed5f85d42290 ("netfilter: nf_tables: disable register tracking"). Follow up patches add remaining register tracking for existing expressions. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index c4c0861deac1..edabfb9e97ce 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1633,4 +1633,12 @@ static inline struct nftables_pernet *nft_pernet(const struct net *net) return net_generic(net, nf_tables_net_id); } +#define __NFT_REDUCE_READONLY 1UL +#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY + +static inline bool nft_reduce_is_readonly(const struct nft_expr *expr) +{ + return expr->ops->reduce == NFT_REDUCE_READONLY; +} + #endif /* _NET_NF_TABLES_H */ -- cgit v1.2.3 From 34cc9e52884a16c62acbfb309863fb60e4c24f55 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Mar 2022 18:23:01 +0100 Subject: netfilter: nf_tables: cancel tracking for clobbered destination registers Output of expressions might be larger than one single register, this might clobber existing data. Reset tracking for all destination registers that required to store the expression output. This patch adds three new helper functions: - nft_reg_track_update: cancel previous register tracking and update it. - nft_reg_track_cancel: cancel any previous register tracking info. - __nft_reg_track_cancel: cancel only one single register tracking info. Partial register clobbering detection is also supported by checking the .num_reg field which describes the number of register that are used. This patch updates the following expressions: - meta_bridge - bitwise - byteorder - meta - payload to use these helper functions. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 14 ++++++++++++++ include/net/netfilter/nft_meta.h | 1 + 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index edabfb9e97ce..20af9d3557b9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -126,6 +126,7 @@ struct nft_regs_track { struct { const struct nft_expr *selector; const struct nft_expr *bitwise; + u8 num_reg; } regs[NFT_REG32_NUM]; const struct nft_expr *cur; @@ -1641,4 +1642,17 @@ static inline bool nft_reduce_is_readonly(const struct nft_expr *expr) return expr->ops->reduce == NFT_REDUCE_READONLY; } +void nft_reg_track_update(struct nft_regs_track *track, + const struct nft_expr *expr, u8 dreg, u8 len); +void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len); +void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg); + +static inline bool nft_reg_track_cmp(struct nft_regs_track *track, + const struct nft_expr *expr, u8 dreg) +{ + return track->regs[dreg].selector && + track->regs[dreg].selector->ops == expr->ops && + track->regs[dreg].num_reg == 0; +} + #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 2dce55c736f4..246fd023dcf4 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -6,6 +6,7 @@ struct nft_meta { enum nft_meta_keys key:8; + u8 len; union { u8 dreg; u8 sreg; -- cgit v1.2.3 From aaa7b20bd4d637fd4ef0d72b6c828c061b9bc5f7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 14 Mar 2022 18:23:04 +0100 Subject: netfilter: nft_meta: extend reduce support to bridge family its enough to export the meta get reduce helper and then call it from nft_meta_bridge too. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nft_meta.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 246fd023dcf4..9b51cc67de54 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -44,4 +44,6 @@ int nft_meta_set_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data); +bool nft_meta_get_reduce(struct nft_regs_track *track, + const struct nft_expr *expr); #endif -- cgit v1.2.3 From 3c1eb413a45b6c6327fed394705081ec6202b31a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 14 Mar 2022 18:23:12 +0100 Subject: netfilter: nft_fib: add reduce support The fib expression stores to a register, so we can't add empty stub. Check that the register that is being written is in fact redundant. In most cases, this is expected to cancel tracking as re-use is unlikely. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nft_fib.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 237f3757637e..eed099eae672 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -37,4 +37,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, void nft_fib_store_result(void *reg, const struct nft_fib *priv, const struct net_device *dev); + +bool nft_fib_reduce(struct nft_regs_track *track, + const struct nft_expr *expr); #endif -- cgit v1.2.3 From b00fa38a9c1cba044a32a601b49a55a18ed719d1 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 17 Mar 2022 21:55:52 -0700 Subject: bpf: Enable non-atomic allocations in local storage Currently, local storage memory can only be allocated atomically (GFP_ATOMIC). This restriction is too strict for sleepable bpf programs. In this patch, the verifier detects whether the program is sleepable, and passes the corresponding GFP_KERNEL or GFP_ATOMIC flag as a 5th argument to bpf_task/sk/inode_storage_get. This flag will propagate down to the local storage functions that allocate memory. Please note that bpf_task/sk/inode_storage_update_elem functions are invoked by userspace applications through syscalls. Preemption is disabled before bpf_task/sk/inode_storage_update_elem is called, which means they will always have to allocate memory atomically. Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: KP Singh Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220318045553.3091807-2-joannekoong@fb.com --- include/linux/bpf_local_storage.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 37b3906af8b1..493e63258497 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -154,16 +154,17 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem); struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, - bool charge_mem); + bool charge_mem, gfp_t gfp_flags); int bpf_local_storage_alloc(void *owner, struct bpf_local_storage_map *smap, - struct bpf_local_storage_elem *first_selem); + struct bpf_local_storage_elem *first_selem, + gfp_t gfp_flags); struct bpf_local_storage_data * bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, - void *value, u64 map_flags); + void *value, u64 map_flags, gfp_t gfp_flags); void bpf_local_storage_free_rcu(struct rcu_head *rcu); -- cgit v1.2.3 From ee2a098851bfbe8bcdd964c0121f4246f00ff41e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Mar 2022 11:20:41 -0700 Subject: bpf: Adjust BPF stack helper functions to accommodate skip > 0 Let's say that the caller has storage for num_elem stack frames. Then, the BPF stack helper functions walk the stack for only num_elem frames. This means that if skip > 0, one keeps only 'num_elem - skip' frames. This is because it sets init_nr in the perf_callchain_entry to the end of the buffer to save num_elem entries only. I believe it was because the perf callchain code unwound the stack frames until it reached the global max size (sysctl_perf_event_max_stack). However it now has perf_callchain_entry_ctx.max_stack to limit the iteration locally. This simplifies the code to handle init_nr in the BPF callstack entries and removes the confusion with the perf_event's __PERF_SAMPLE_CALLCHAIN_EARLY which sets init_nr to 0. Also change the comment on bpf_get_stack() in the header file to be more explicit what the return value means. Fixes: c195651e565a ("bpf: add bpf_get_stack helper") Signed-off-by: Namhyung Kim Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/30a7b5d5-6726-1cc2-eaee-8da2828a9a9c@oracle.com Link: https://lore.kernel.org/bpf/20220314182042.71025-1-namhyung@kernel.org Based-on-patch-by: Eugene Loh --- include/uapi/linux/bpf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7604e7d5438f..d14b10b85e51 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3009,8 +3009,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack= * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description @@ -4316,8 +4316,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack= * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) * Description -- cgit v1.2.3 From 3387ce4d8a5f2956fab827edf499fe6780e83faa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 21 Mar 2022 11:05:50 +0100 Subject: headers/prep: Fix header to build standalone: Add the dependency to , because cgroup_move_task() will dereference 'struct css_set'. ( Only older toolchains are affected, due to variations in the implementation of rcu_assign_pointer() et al. ) Cc: Peter Zijlstra Cc: Linus Torvalds Reported-by: Sachin Sant Reported-by: Andrew Morton Reported-by: Borislav Petkov Signed-off-by: Ingo Molnar --- include/linux/psi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/psi.h b/include/linux/psi.h index 7f7d1d88c3bb..89784763d19e 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -6,6 +6,7 @@ #include #include #include +#include struct seq_file; struct css_set; -- cgit v1.2.3 From 6d8491910fcd3324d0f0ece3bd68e85ead3a04d7 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 1 Mar 2022 06:03:47 +0000 Subject: KVM: x86: Introduce KVM_CAP_DISABLE_QUIRKS2 KVM_CAP_DISABLE_QUIRKS is irrevocably broken. The capability does not advertise the set of quirks which may be disabled to userspace, so it is impossible to predict the behavior of KVM. Worse yet, KVM_CAP_DISABLE_QUIRKS will tolerate any value for cap->args[0], meaning it fails to reject attempts to set invalid quirk bits. The only valid workaround for the quirky quirks API is to add a new CAP. Actually advertise the set of quirks that can be disabled to userspace so it can predict KVM's behavior. Reject values for cap->args[0] that contain invalid bits. Finally, add documentation for the new capability and describe the existing quirks. Signed-off-by: Oliver Upton Message-Id: <20220301060351.442881-5-oupton@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d2f1efc3aa35..91a6fe4e02c0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1143,6 +1143,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_AIL_MODE_3 210 #define KVM_CAP_S390_MEM_OP_EXTENSION 211 #define KVM_CAP_PMU_CAPABILITY 212 +#define KVM_CAP_DISABLE_QUIRKS2 213 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 8879b32a3a809a183e6e2998725a0f33c4e42d41 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 18 Mar 2022 12:23:41 -0700 Subject: devlink: add explicitly locked flavor of the rate node APIs We'll need an explicitly locked rate node API for netdevsim to switch eswitch mode setting to locked. Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index fd89a17adea1..a30180c0988a 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1490,6 +1490,10 @@ int devl_port_register(struct devlink *devlink, unsigned int port_index); void devl_port_unregister(struct devlink_port *devlink_port); +int devl_rate_leaf_create(struct devlink_port *port, void *priv); +void devl_rate_leaf_destroy(struct devlink_port *devlink_port); +void devl_rate_nodes_destroy(struct devlink *devlink); + struct ib_device; struct net *devlink_net(const struct devlink *devlink); -- cgit v1.2.3 From a43bf604446414103b7535f38e739b65601c4fb2 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 16 Mar 2022 18:24:26 -0400 Subject: NFSv4.1 provide mount option to toggle trunking discovery Introduce a new mount option -- trunkdiscovery,notrunkdiscovery -- to toggle whether or not the client will engage in actively discovery of trunking locations. v2 make notrunkdiscovery default Signed-off-by: Olga Kornievskaia Fixes: 1976b2b31462 ("NFSv4.1 query for fs_location attr on a new file system") Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index ca0959e51e81..b0e3fd550122 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -151,6 +151,7 @@ struct nfs_server { #define NFS_MOUNT_SOFTREVAL 0x800000 #define NFS_MOUNT_WRITE_EAGER 0x01000000 #define NFS_MOUNT_WRITE_WAIT 0x02000000 +#define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000 unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ -- cgit v1.2.3 From 028152260c5782504995e1fe3910ad39d4e4f34a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 21 Mar 2022 11:35:29 -0500 Subject: Revert "of: base: Introduce of_alias_get_alias_list() to check alias IDs" This reverts commit b1078c355d76769b5ddefc67d143fbd9b6e52c05. The single user of of_alias_get_alias_list(), drivers/tty/serial/xilinx_uartps.c, has since been refactored and no longer needs this function. It also contained a Smatch checker warning: drivers/of/base.c:2038 of_alias_get_alias_list() warn: passing negative bit value 's32min-(-2),0-s32max' to 'set_bit()' Reported-by: Dan Carpenter Signed-off-by: Rob Herring --- include/linux/of.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index 2dc77430a91a..04971e85fbc9 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -388,9 +388,6 @@ extern int of_phandle_iterator_args(struct of_phandle_iterator *it, extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); extern int of_alias_get_id(struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); -extern int of_alias_get_alias_list(const struct of_device_id *matches, - const char *stem, unsigned long *bitmap, - unsigned int nbits); extern int of_machine_is_compatible(const char *compat); @@ -766,13 +763,6 @@ static inline int of_alias_get_highest_id(const char *stem) return -ENOSYS; } -static inline int of_alias_get_alias_list(const struct of_device_id *matches, - const char *stem, unsigned long *bitmap, - unsigned int nbits) -{ - return -ENOSYS; -} - static inline int of_machine_is_compatible(const char *compat) { return 0; -- cgit v1.2.3 From 4c65422901154766e5cee17875ed680366a4a141 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 Jan 2022 13:45:25 -0500 Subject: mm/gup: Remove an assumption of a contiguous memmap This assumption needs the inverse of nth_page(), which is temporarily named page_nth() until it's renamed later in this series. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0201d258c646..e3f8755f65ed 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -212,8 +212,10 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) +#define page_nth(head, tail) (page_to_pfn(tail) - page_to_pfn(head)) #else #define nth_page(page,n) ((page) + (n)) +#define page_nth(head, tail) ((tail) - (head)) #endif /* to align the pointer to the (next) page boundary */ -- cgit v1.2.3 From 5232c63f46fdd779303527ec36c518cc1e9c6b4e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 6 Jan 2022 16:46:43 -0500 Subject: mm: Make compound_pincount always available Move compound_pincount from the third page to the second page, which means it's available for all compound pages. That lets us delete hpage_pincount_available(). On 32-bit systems, there isn't enough space for both compound_pincount and compound_nr in the second page (it would collide with page->private, which is in use for pages in the swap cache), so revert the optimisation of storing both compound_order and compound_nr on 32-bit systems. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: John Hubbard Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- include/linux/mm.h | 21 ++++++++------------- include/linux/mm_types.h | 7 +++++-- 2 files changed, 13 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index e3f8755f65ed..c64bd0b67d75 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -887,17 +887,6 @@ static inline void destroy_compound_page(struct page *page) compound_page_dtors[page[1].compound_dtor](page); } -static inline bool hpage_pincount_available(struct page *page) -{ - /* - * Can the page->hpage_pinned_refcount field be used? That field is in - * the 3rd page of the compound page, so the smallest (2-page) compound - * pages cannot support it. - */ - page = compound_head(page); - return PageCompound(page) && compound_order(page) > 1; -} - static inline int head_compound_pincount(struct page *head) { return atomic_read(compound_pincount_ptr(head)); @@ -905,7 +894,7 @@ static inline int head_compound_pincount(struct page *head) static inline int compound_pincount(struct page *page) { - VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); + VM_BUG_ON_PAGE(!PageCompound(page), page); page = compound_head(page); return head_compound_pincount(page); } @@ -913,7 +902,9 @@ static inline int compound_pincount(struct page *page) static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; +#ifdef CONFIG_64BIT page[1].compound_nr = 1U << order; +#endif } /* Returns the number of pages in this potentially compound page. */ @@ -921,7 +912,11 @@ static inline unsigned long compound_nr(struct page *page) { if (!PageHead(page)) return 1; +#ifdef CONFIG_64BIT return page[1].compound_nr; +#else + return 1UL << compound_order(page); +#endif } /* Returns the number of bytes in this potentially compound page. */ @@ -1269,7 +1264,7 @@ void unpin_user_pages(struct page **pages, unsigned long npages); */ static inline bool page_maybe_dma_pinned(struct page *page) { - if (hpage_pincount_available(page)) + if (PageCompound(page)) return compound_pincount(page) > 0; /* diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 475bdb282769..0e274c9b934e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -135,11 +135,14 @@ struct page { unsigned char compound_dtor; unsigned char compound_order; atomic_t compound_mapcount; + atomic_t compound_pincount; +#ifdef CONFIG_64BIT unsigned int compound_nr; /* 1 << compound_order */ +#endif }; struct { /* Second tail page of compound page */ unsigned long _compound_pad_1; /* compound_head */ - atomic_t hpage_pinned_refcount; + unsigned long _compound_pad_2; /* For both global and memcg */ struct list_head deferred_list; }; @@ -300,7 +303,7 @@ static inline atomic_t *compound_mapcount_ptr(struct page *page) static inline atomic_t *compound_pincount_ptr(struct page *page) { - return &page[2].hpage_pinned_refcount; + return &page[1].compound_pincount; } /* -- cgit v1.2.3 From 3d11b225aeb184bc3dc9b4b27b302815a7c531aa Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 27 Dec 2021 18:28:58 -0500 Subject: mm: Add folio_pincount_ptr() This is the folio equivalent of compound_pincount_ptr(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- include/linux/mm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c64bd0b67d75..c45739dfdd04 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1544,6 +1544,11 @@ static inline unsigned long folio_pfn(struct folio *folio) return page_to_pfn(&folio->page); } +static inline atomic_t *folio_pincount_ptr(struct folio *folio) +{ + return &folio_page(folio, 1)->compound_pincount; +} + /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ #ifdef CONFIG_MIGRATION static inline bool is_pinnable_page(struct page *page) -- cgit v1.2.3 From 0b90ddae13441c43a30d2e2689b8193a81891c92 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 27 Dec 2021 18:40:41 -0500 Subject: mm: Turn page_maybe_dma_pinned() into folio_maybe_dma_pinned() Replace three calls to compound_head() with one. This removes the last user of compound_pincount(), so remove that helper too. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- include/linux/mm.h | 129 ++++++++++++++++++++++++++--------------------------- 1 file changed, 63 insertions(+), 66 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c45739dfdd04..35e453ac5c0f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -892,13 +892,6 @@ static inline int head_compound_pincount(struct page *head) return atomic_read(compound_pincount_ptr(head)); } -static inline int compound_pincount(struct page *page) -{ - VM_BUG_ON_PAGE(!PageCompound(page), page); - page = compound_head(page); - return head_compound_pincount(page); -} - static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; @@ -1236,70 +1229,11 @@ void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, bool make_dirty); void unpin_user_pages(struct page **pages, unsigned long npages); -/** - * page_maybe_dma_pinned - Report if a page is pinned for DMA. - * @page: The page. - * - * This function checks if a page has been pinned via a call to - * a function in the pin_user_pages() family. - * - * For non-huge pages, the return value is partially fuzzy: false is not fuzzy, - * because it means "definitely not pinned for DMA", but true means "probably - * pinned for DMA, but possibly a false positive due to having at least - * GUP_PIN_COUNTING_BIAS worth of normal page references". - * - * False positives are OK, because: a) it's unlikely for a page to get that many - * refcounts, and b) all the callers of this routine are expected to be able to - * deal gracefully with a false positive. - * - * For huge pages, the result will be exactly correct. That's because we have - * more tracking data available: the 3rd struct page in the compound page is - * used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS - * scheme). - * - * For more information, please see Documentation/core-api/pin_user_pages.rst. - * - * Return: True, if it is likely that the page has been "dma-pinned". - * False, if the page is definitely not dma-pinned. - */ -static inline bool page_maybe_dma_pinned(struct page *page) -{ - if (PageCompound(page)) - return compound_pincount(page) > 0; - - /* - * page_ref_count() is signed. If that refcount overflows, then - * page_ref_count() returns a negative value, and callers will avoid - * further incrementing the refcount. - * - * Here, for that overflow case, use the signed bit to count a little - * bit higher via unsigned math, and thus still get an accurate result. - */ - return ((unsigned int)page_ref_count(compound_head(page))) >= - GUP_PIN_COUNTING_BIAS; -} - static inline bool is_cow_mapping(vm_flags_t flags) { return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; } -/* - * This should most likely only be called during fork() to see whether we - * should break the cow immediately for a page on the src mm. - */ -static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, - struct page *page) -{ - if (!is_cow_mapping(vma->vm_flags)) - return false; - - if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)) - return false; - - return page_maybe_dma_pinned(page); -} - #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTION_IN_PAGE_FLAGS #endif @@ -1549,6 +1483,69 @@ static inline atomic_t *folio_pincount_ptr(struct folio *folio) return &folio_page(folio, 1)->compound_pincount; } +/** + * folio_maybe_dma_pinned - Report if a folio may be pinned for DMA. + * @folio: The folio. + * + * This function checks if a folio has been pinned via a call to + * a function in the pin_user_pages() family. + * + * For small folios, the return value is partially fuzzy: false is not fuzzy, + * because it means "definitely not pinned for DMA", but true means "probably + * pinned for DMA, but possibly a false positive due to having at least + * GUP_PIN_COUNTING_BIAS worth of normal folio references". + * + * False positives are OK, because: a) it's unlikely for a folio to + * get that many refcounts, and b) all the callers of this routine are + * expected to be able to deal gracefully with a false positive. + * + * For large folios, the result will be exactly correct. That's because + * we have more tracking data available: the compound_pincount is used + * instead of the GUP_PIN_COUNTING_BIAS scheme. + * + * For more information, please see Documentation/core-api/pin_user_pages.rst. + * + * Return: True, if it is likely that the page has been "dma-pinned". + * False, if the page is definitely not dma-pinned. + */ +static inline bool folio_maybe_dma_pinned(struct folio *folio) +{ + if (folio_test_large(folio)) + return atomic_read(folio_pincount_ptr(folio)) > 0; + + /* + * folio_ref_count() is signed. If that refcount overflows, then + * folio_ref_count() returns a negative value, and callers will avoid + * further incrementing the refcount. + * + * Here, for that overflow case, use the sign bit to count a little + * bit higher via unsigned math, and thus still get an accurate result. + */ + return ((unsigned int)folio_ref_count(folio)) >= + GUP_PIN_COUNTING_BIAS; +} + +static inline bool page_maybe_dma_pinned(struct page *page) +{ + return folio_maybe_dma_pinned(page_folio(page)); +} + +/* + * This should most likely only be called during fork() to see whether we + * should break the cow immediately for a page on the src mm. + */ +static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, + struct page *page) +{ + if (!is_cow_mapping(vma->vm_flags)) + return false; + + if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)) + return false; + + return page_maybe_dma_pinned(page); +} + /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ #ifdef CONFIG_MIGRATION static inline bool is_pinnable_page(struct page *page) -- cgit v1.2.3 From 40fcc7fc2c3838f3afe07a3a72709b45566e6cdb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 29 Dec 2021 12:23:55 -0500 Subject: mm: Remove page_cache_add_speculative() and page_cache_get_speculative() These wrappers have no more callers, so delete them. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- include/linux/mm.h | 7 +++---- include/linux/pagemap.h | 10 ---------- 2 files changed, 3 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 35e453ac5c0f..b764057022c8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1215,10 +1215,9 @@ static inline void put_page(struct page *page) * applications that don't have huge page reference counts, this won't be an * issue. * - * Locking: the lockless algorithm described in page_cache_get_speculative() - * and page_cache_gup_pin_speculative() provides safe operation for - * get_user_pages and page_mkclean and other calls that race to set up page - * table entries. + * Locking: the lockless algorithm described in folio_try_get_rcu() + * provides safe operation for get_user_pages(), page_mkclean() and + * other calls that race to set up page table entries. */ #define GUP_PIN_COUNTING_BIAS (1U << 10) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 270bf5136c34..cdb3f118603a 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -283,16 +283,6 @@ static inline struct inode *folio_inode(struct folio *folio) return folio->mapping->host; } -static inline bool page_cache_add_speculative(struct page *page, int count) -{ - return folio_ref_try_add_rcu((struct folio *)page, count); -} - -static inline bool page_cache_get_speculative(struct page *page) -{ - return page_cache_add_speculative(page, 1); -} - /** * folio_attach_private - Attach private data to a folio. * @folio: Folio to attach data to. -- cgit v1.2.3 From 822951d84684d7a0c4f45e7231c960e7fe786d8f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 8 Jan 2022 00:15:04 -0500 Subject: mm/hugetlb: Use try_grab_folio() instead of try_grab_compound_head() follow_hugetlb_page() only cares about success or failure, so it doesn't need to know the type of the returned pointer, only whether it's NULL or not. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- include/linux/mm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index b764057022c8..dca5c99395c9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1124,9 +1124,6 @@ static inline void get_page(struct page *page) } bool __must_check try_grab_page(struct page *page, unsigned int flags); -struct page *try_grab_compound_head(struct page *page, int refs, - unsigned int flags); - static inline __must_check bool try_get_page(struct page *page) { -- cgit v1.2.3 From 659508f9c936aa6e3aaf6e9cf6a4a8836b8f8355 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 23 Dec 2021 10:20:12 -0500 Subject: mm/gup: Turn compound_range_next() into gup_folio_range_next() Convert the only caller to work on folios instead of pages. This removes the last caller of put_compound_head(), so delete it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index dca5c99395c9..0d3f9057a807 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -212,10 +212,10 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) -#define page_nth(head, tail) (page_to_pfn(tail) - page_to_pfn(head)) +#define folio_page_idx(folio, p) (page_to_pfn(p) - folio_pfn(folio)) #else #define nth_page(page,n) ((page) + (n)) -#define page_nth(head, tail) ((tail) - (head)) +#define folio_page_idx(folio, p) ((p) - &(folio)->page) #endif /* to align the pointer to the (next) page boundary */ -- cgit v1.2.3 From 536939ff516382b391a0039262e27fc80c7b3924 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 21 Mar 2022 12:57:38 -0400 Subject: mm: Add three folio wrappers folio_is_zone_device() is equivalent to is_zone_device_page(), folio_is_device_private() is equivalent to is_device_private_page(), and folio_is_pinnable() is equivalent to is_pinnable_page(). All of these tests return the same result for every page in the folio, so we can just pass the head page of the folio to the page variant of the function. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/memremap.h | 5 +++++ include/linux/mm.h | 10 ++++++++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index d6a114dd5ea8..8af304f6b504 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -138,6 +138,11 @@ static inline bool is_device_private_page(const struct page *page) page->pgmap->type == MEMORY_DEVICE_PRIVATE; } +static inline bool folio_is_device_private(const struct folio *folio) +{ + return is_device_private_page(&folio->page); +} + static inline bool is_pci_p2pdma_page(const struct page *page) { return IS_ENABLED(CONFIG_PCI_P2PDMA) && diff --git a/include/linux/mm.h b/include/linux/mm.h index 0d3f9057a807..2ca10c167f35 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1075,6 +1075,11 @@ static inline bool is_zone_device_page(const struct page *page) } #endif +static inline bool folio_is_zone_device(const struct folio *folio) +{ + return is_zone_device_page(&folio->page); +} + static inline bool is_zone_movable_page(const struct page *page) { return page_zonenum(page) == ZONE_MOVABLE; @@ -1556,6 +1561,11 @@ static inline bool is_pinnable_page(struct page *page) } #endif +static inline bool folio_is_pinnable(struct folio *folio) +{ + return is_pinnable_page(&folio->page); +} + static inline void set_page_zone(struct page *page, enum zone_type zone) { page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); -- cgit v1.2.3 From 8927f6473e56e32e328ae8ed43736412f7f76a4e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 23 Dec 2021 16:39:05 -0500 Subject: mm/workingset: Convert workingset_eviction() to take a folio This removes an assumption that THPs are the only kind of compound pages and removes a few hidden calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 1d38d9475c4d..de36f140227e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -328,7 +328,7 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) /* linux/mm/workingset.c */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); -void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); +void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg); void workingset_refault(struct folio *folio, void *shadow); void workingset_activation(struct folio *folio); -- cgit v1.2.3 From 3ecb0087ecee6213544a1e0b838826a0f4831ce5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 27 Dec 2021 21:11:34 -0500 Subject: mm/memcg: Convert mem_cgroup_swapout() to take a folio This removes an assumption that THPs are the only kind of compound pages and removes a couple of hidden calls to compound_head. It also documents that you can't pass a tail page to mem_cgroup_swapout(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/swap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index de36f140227e..e7cb7a1e6ceb 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -741,7 +741,7 @@ static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) #endif #ifdef CONFIG_MEMCG_SWAP -extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); +void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry); extern int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); static inline int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) { @@ -761,7 +761,7 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); extern bool mem_cgroup_swap_full(struct page *page); #else -static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry) +static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) { } -- cgit v1.2.3 From 06d20bdb986815a75fb1addf34655756ba922e3a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Jan 2022 14:40:12 -0500 Subject: mm: Add lru_to_folio() Since page->lru occupies the same bytes as compound_head, any page on the LRU list must be a folio. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/mm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2ca10c167f35..a583b7375445 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -225,6 +225,10 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +static inline struct folio *lru_to_folio(struct list_head *head) +{ + return list_entry((head)->prev, struct folio, lru); +} void setup_initial_init_mm(void *start_code, void *end_code, void *end_data, void *brk); -- cgit v1.2.3 From 5100da38ef3c33d9ad8b60b29c2b671249bf7e1d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 12 Feb 2022 22:48:55 -0500 Subject: mm: Convert remove_mapping() to take a folio Add kernel-doc and return the number of pages removed in order to get the statistics right in __invalidate_mapping_pages(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Miaohe Lin --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index e7cb7a1e6ceb..304f174b4d31 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -395,7 +395,7 @@ extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, unsigned long *nr_scanned); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; -extern int remove_mapping(struct address_space *mapping, struct page *page); +long remove_mapping(struct address_space *mapping, struct folio *folio); extern unsigned long reclaim_pages(struct list_head *page_list); #ifdef CONFIG_NUMA -- cgit v1.2.3 From d6c75dc22c755c567838f12f12a16f2a323ebd4e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 13 Feb 2022 15:22:28 -0500 Subject: mm/truncate: Split invalidate_inode_page() into mapping_evict_folio() Some of the callers already have the address_space and can avoid calling folio_mapping() and checking if the folio was already truncated. Also add kernel-doc and fix the return type (in case we ever support folios larger than 4TB). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Miaohe Lin --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a583b7375445..dede2eda4d7f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1825,7 +1825,6 @@ extern void truncate_setsize(struct inode *inode, loff_t newsize); void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); int generic_error_remove_page(struct address_space *mapping, struct page *page); -int invalidate_inode_page(struct page *page); #ifdef CONFIG_MMU extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, -- cgit v1.2.3 From 261b6840ed10419ac2f554e515592d59dd5c82cf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 13 Feb 2022 16:40:24 -0500 Subject: mm: Turn deactivate_file_page() into deactivate_file_folio() This function has one caller which already has a reference to the page, so we don't need to use get_page_unless_zero(). Also move the prototype to mm/internal.h. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Miaohe Lin --- include/linux/swap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 304f174b4d31..064e60e9f63f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -372,7 +372,6 @@ extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); -extern void deactivate_file_page(struct page *page); extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); extern void swap_setup(void); -- cgit v1.2.3 From c56109dd35c9204cd6c49d2116ef36e5044ef867 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 13 Feb 2022 17:22:10 -0500 Subject: mm/truncate: Combine invalidate_mapping_pagevec() and __invalidate_mapping_pages() We can save a function call by combining these two functions, which are identical except for the return value. Also move the prototype to mm/internal.h. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Miaohe Lin --- include/linux/fs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index e2d892b201b0..85c584c5c623 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2749,10 +2749,6 @@ extern bool is_bad_inode(struct inode *); unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); -void invalidate_mapping_pagevec(struct address_space *mapping, - pgoff_t start, pgoff_t end, - unsigned long *nr_pagevec); - static inline void invalidate_remote_inode(struct inode *inode) { if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || -- cgit v1.2.3 From cbcc268bb1ce5b539e7652d398e08e9b83dc4cef Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 13 Feb 2022 17:23:58 -0500 Subject: fs: Move many prototypes to pagemap.h These functions are page cache functionality and don't need to be declared in fs.h. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Miaohe Lin --- include/linux/fs.h | 116 ------------------------------------------------ include/linux/pagemap.h | 114 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 116 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 85c584c5c623..0961c979e949 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2746,50 +2746,6 @@ extern void init_special_inode(struct inode *, umode_t, dev_t); extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); -unsigned long invalidate_mapping_pages(struct address_space *mapping, - pgoff_t start, pgoff_t end); - -static inline void invalidate_remote_inode(struct inode *inode) -{ - if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) - invalidate_mapping_pages(inode->i_mapping, 0, -1); -} -extern int invalidate_inode_pages2(struct address_space *mapping); -extern int invalidate_inode_pages2_range(struct address_space *mapping, - pgoff_t start, pgoff_t end); -extern int write_inode_now(struct inode *, int); -extern int filemap_fdatawrite(struct address_space *); -extern int filemap_flush(struct address_space *); -extern int filemap_fdatawait_keep_errors(struct address_space *mapping); -extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, - loff_t lend); -extern int filemap_fdatawait_range_keep_errors(struct address_space *mapping, - loff_t start_byte, loff_t end_byte); - -static inline int filemap_fdatawait(struct address_space *mapping) -{ - return filemap_fdatawait_range(mapping, 0, LLONG_MAX); -} - -extern bool filemap_range_has_page(struct address_space *, loff_t lstart, - loff_t lend); -extern int filemap_write_and_wait_range(struct address_space *mapping, - loff_t lstart, loff_t lend); -extern int __filemap_fdatawrite_range(struct address_space *mapping, - loff_t start, loff_t end, int sync_mode); -extern int filemap_fdatawrite_range(struct address_space *mapping, - loff_t start, loff_t end); -extern int filemap_check_errors(struct address_space *mapping); -extern void __filemap_set_wb_err(struct address_space *mapping, int err); -int filemap_fdatawrite_wbc(struct address_space *mapping, - struct writeback_control *wbc); - -static inline int filemap_write_and_wait(struct address_space *mapping) -{ - return filemap_write_and_wait_range(mapping, 0, LLONG_MAX); -} - extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart, loff_t lend); extern int __must_check file_check_and_advance_wb_err(struct file *file); @@ -2801,67 +2757,6 @@ static inline int file_write_and_wait(struct file *file) return file_write_and_wait_range(file, 0, LLONG_MAX); } -/** - * filemap_set_wb_err - set a writeback error on an address_space - * @mapping: mapping in which to set writeback error - * @err: error to be set in mapping - * - * When writeback fails in some way, we must record that error so that - * userspace can be informed when fsync and the like are called. We endeavor - * to report errors on any file that was open at the time of the error. Some - * internal callers also need to know when writeback errors have occurred. - * - * When a writeback error occurs, most filesystems will want to call - * filemap_set_wb_err to record the error in the mapping so that it will be - * automatically reported whenever fsync is called on the file. - */ -static inline void filemap_set_wb_err(struct address_space *mapping, int err) -{ - /* Fastpath for common case of no error */ - if (unlikely(err)) - __filemap_set_wb_err(mapping, err); -} - -/** - * filemap_check_wb_err - has an error occurred since the mark was sampled? - * @mapping: mapping to check for writeback errors - * @since: previously-sampled errseq_t - * - * Grab the errseq_t value from the mapping, and see if it has changed "since" - * the given value was sampled. - * - * If it has then report the latest error set, otherwise return 0. - */ -static inline int filemap_check_wb_err(struct address_space *mapping, - errseq_t since) -{ - return errseq_check(&mapping->wb_err, since); -} - -/** - * filemap_sample_wb_err - sample the current errseq_t to test for later errors - * @mapping: mapping to be sampled - * - * Writeback errors are always reported relative to a particular sample point - * in the past. This function provides those sample points. - */ -static inline errseq_t filemap_sample_wb_err(struct address_space *mapping) -{ - return errseq_sample(&mapping->wb_err); -} - -/** - * file_sample_sb_err - sample the current errseq_t to test for later errors - * @file: file pointer to be sampled - * - * Grab the most current superblock-level errseq_t value for the given - * struct file. - */ -static inline errseq_t file_sample_sb_err(struct file *file) -{ - return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err); -} - extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); @@ -3604,15 +3499,4 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice); -/* - * Flush file data before changing attributes. Caller must hold any locks - * required to prevent further writes to this file until we're done setting - * flags. - */ -static inline int inode_drain_writes(struct inode *inode) -{ - inode_dio_wait(inode); - return filemap_write_and_wait(inode->i_mapping); -} - #endif /* _LINUX_FS_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index cdb3f118603a..f968b36ad771 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -18,6 +18,120 @@ struct folio_batch; +unsigned long invalidate_mapping_pages(struct address_space *mapping, + pgoff_t start, pgoff_t end); + +static inline void invalidate_remote_inode(struct inode *inode) +{ + if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode)) + invalidate_mapping_pages(inode->i_mapping, 0, -1); +} +int invalidate_inode_pages2(struct address_space *mapping); +int invalidate_inode_pages2_range(struct address_space *mapping, + pgoff_t start, pgoff_t end); +int write_inode_now(struct inode *, int sync); +int filemap_fdatawrite(struct address_space *); +int filemap_flush(struct address_space *); +int filemap_fdatawait_keep_errors(struct address_space *mapping); +int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); +int filemap_fdatawait_range_keep_errors(struct address_space *mapping, + loff_t start_byte, loff_t end_byte); + +static inline int filemap_fdatawait(struct address_space *mapping) +{ + return filemap_fdatawait_range(mapping, 0, LLONG_MAX); +} + +bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend); +int filemap_write_and_wait_range(struct address_space *mapping, + loff_t lstart, loff_t lend); +int __filemap_fdatawrite_range(struct address_space *mapping, + loff_t start, loff_t end, int sync_mode); +int filemap_fdatawrite_range(struct address_space *mapping, + loff_t start, loff_t end); +int filemap_check_errors(struct address_space *mapping); +void __filemap_set_wb_err(struct address_space *mapping, int err); +int filemap_fdatawrite_wbc(struct address_space *mapping, + struct writeback_control *wbc); + +static inline int filemap_write_and_wait(struct address_space *mapping) +{ + return filemap_write_and_wait_range(mapping, 0, LLONG_MAX); +} + +/** + * filemap_set_wb_err - set a writeback error on an address_space + * @mapping: mapping in which to set writeback error + * @err: error to be set in mapping + * + * When writeback fails in some way, we must record that error so that + * userspace can be informed when fsync and the like are called. We endeavor + * to report errors on any file that was open at the time of the error. Some + * internal callers also need to know when writeback errors have occurred. + * + * When a writeback error occurs, most filesystems will want to call + * filemap_set_wb_err to record the error in the mapping so that it will be + * automatically reported whenever fsync is called on the file. + */ +static inline void filemap_set_wb_err(struct address_space *mapping, int err) +{ + /* Fastpath for common case of no error */ + if (unlikely(err)) + __filemap_set_wb_err(mapping, err); +} + +/** + * filemap_check_wb_err - has an error occurred since the mark was sampled? + * @mapping: mapping to check for writeback errors + * @since: previously-sampled errseq_t + * + * Grab the errseq_t value from the mapping, and see if it has changed "since" + * the given value was sampled. + * + * If it has then report the latest error set, otherwise return 0. + */ +static inline int filemap_check_wb_err(struct address_space *mapping, + errseq_t since) +{ + return errseq_check(&mapping->wb_err, since); +} + +/** + * filemap_sample_wb_err - sample the current errseq_t to test for later errors + * @mapping: mapping to be sampled + * + * Writeback errors are always reported relative to a particular sample point + * in the past. This function provides those sample points. + */ +static inline errseq_t filemap_sample_wb_err(struct address_space *mapping) +{ + return errseq_sample(&mapping->wb_err); +} + +/** + * file_sample_sb_err - sample the current errseq_t to test for later errors + * @file: file pointer to be sampled + * + * Grab the most current superblock-level errseq_t value for the given + * struct file. + */ +static inline errseq_t file_sample_sb_err(struct file *file) +{ + return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err); +} + +/* + * Flush file data before changing attributes. Caller must hold any locks + * required to prevent further writes to this file until we're done setting + * flags. + */ +static inline int inode_drain_writes(struct inode *inode) +{ + inode_dio_wait(inode); + return filemap_write_and_wait(inode->i_mapping); +} + static inline bool mapping_empty(struct address_space *mapping) { return xa_empty(&mapping->i_pages); -- cgit v1.2.3 From 74e8ee4708a8edabbbc7ab8c12ec24d7a561bb41 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 18 Jan 2022 10:50:48 -0500 Subject: mm: Turn head_compound_mapcount() into folio_entire_mapcount() Adjust documentation to be more clear. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/mm.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index dede2eda4d7f..70f0ca217962 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -776,21 +776,26 @@ static inline int is_vmalloc_or_module_addr(const void *x) } #endif -static inline int head_compound_mapcount(struct page *head) +/* + * How many times the entire folio is mapped as a single unit (eg by a + * PMD or PUD entry). This is probably not what you want, except for + * debugging purposes; look at folio_mapcount() or page_mapcount() + * instead. + */ +static inline int folio_entire_mapcount(struct folio *folio) { - return atomic_read(compound_mapcount_ptr(head)) + 1; + VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); + return atomic_read(folio_mapcount_ptr(folio)) + 1; } /* * Mapcount of compound page as a whole, does not include mapped sub-pages. * - * Must be called only for compound pages or any their tail sub-pages. + * Must be called only for compound pages. */ static inline int compound_mapcount(struct page *page) { - VM_BUG_ON_PAGE(!PageCompound(page), page); - page = compound_head(page); - return head_compound_mapcount(page); + return folio_entire_mapcount(page_folio(page)); } /* -- cgit v1.2.3 From 4ba1119cd53166d853050ff1a9d76079cd8f8e06 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Jan 2022 16:33:26 -0500 Subject: mm: Add folio_mapcount() This implements the same algorithm as total_mapcount(), which is transformed into a wrapper function. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/mm.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 70f0ca217962..0d380dc26847 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -825,8 +825,14 @@ static inline int page_mapcount(struct page *page) return atomic_read(&page->_mapcount) + 1; } +int folio_mapcount(struct folio *folio); + #ifdef CONFIG_TRANSPARENT_HUGEPAGE -int total_mapcount(struct page *page); +static inline int total_mapcount(struct page *page) +{ + return folio_mapcount(page_folio(page)); +} + int page_trans_huge_mapcount(struct page *page); #else static inline int total_mapcount(struct page *page) -- cgit v1.2.3 From 346cf61311f6e348337e95aa2febe29e86137f42 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 18 Jan 2022 08:56:47 -0500 Subject: mm: Add split_folio_to_list() This is a convenience function; split_huge_page_to_list() can take any page in a folio (and does so on purpose because that page will be the one which keeps the refcount). But it's convenient for the callers to pass the folio instead of the first page in the folio. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/huge_mm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e4c18ba8d3bf..71c073d411ac 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -483,6 +483,12 @@ static inline bool thp_migration_supported(void) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline int split_folio_to_list(struct folio *folio, + struct list_head *list) +{ + return split_huge_page_to_list(&folio->page, list); +} + /** * thp_size - Size of a transparent huge page. * @page: Head page of a transparent huge page. -- cgit v1.2.3 From f087b903fc2e4975bff9742a66ee7a837a2f545b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Feb 2022 23:29:45 -0500 Subject: mm: Add folio_pgoff() This is the folio equivalent of page_to_pgoff(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/pagemap.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index f968b36ad771..a73c928e1d74 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -817,6 +817,17 @@ static inline loff_t folio_file_pos(struct folio *folio) return page_file_offset(&folio->page); } +/* + * Get the offset in PAGE_SIZE (even for hugetlb folios). + * (TODO: hugetlb folios should have ->index in PAGE_SIZE) + */ +static inline pgoff_t folio_pgoff(struct folio *folio) +{ + if (unlikely(folio_test_hugetlb(folio))) + return hugetlb_basepage_index(&folio->page); + return folio->index; +} + extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma, unsigned long address); -- cgit v1.2.3 From eed05e54d275b3cfc5d8c79843c5276a5878e94a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 3 Feb 2022 09:06:08 -0500 Subject: mm: Add DEFINE_PAGE_VMA_WALK and DEFINE_FOLIO_VMA_WALK Instead of declaring a struct page_vma_mapped_walk directly, use these helpers to allow us to transition to a PFN approach in the following patches. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/rmap.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index ac29b076082b..0d894a2bfaa1 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -214,6 +214,22 @@ struct page_vma_mapped_walk { unsigned int flags; }; +#define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags) \ + struct page_vma_mapped_walk name = { \ + .page = _page, \ + .vma = _vma, \ + .address = _address, \ + .flags = _flags, \ + } + +#define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags) \ + struct page_vma_mapped_walk name = { \ + .page = &_folio->page, \ + .vma = _vma, \ + .address = _address, \ + .flags = _flags, \ + } + static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) { /* HugeTLB pte is set to the relevant page table entry without pte_mapped. */ -- cgit v1.2.3 From 2aff7a4755bed2870ee23b75bc88cdc8d76cdd03 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 3 Feb 2022 11:40:17 -0500 Subject: mm: Convert page_vma_mapped_walk to work on PFNs page_mapped_in_vma() really just wants to walk one page, but as the code stands, if passed the head page of a compound page, it will walk every page in the compound page. Extract pfn/nr_pages/pgoff from the struct page early, so they can be overridden by page_mapped_in_vma(). Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/hugetlb.h | 5 +++++ include/linux/rmap.h | 17 ++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d1897a69c540..6ba2f8e74fbb 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -970,6 +970,11 @@ static inline struct hstate *page_hstate(struct page *page) return NULL; } +static inline struct hstate *size_to_hstate(unsigned long size) +{ + return NULL; +} + static inline unsigned long huge_page_size(struct hstate *h) { return PAGE_SIZE; diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 0d894a2bfaa1..0c838ba1a8ee 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -11,6 +11,7 @@ #include #include #include +#include /* * The anon_vma heads a list of private "related" vmas, to scan if @@ -201,11 +202,13 @@ int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, /* Avoid racy checks */ #define PVMW_SYNC (1 << 0) -/* Look for migarion entries rather than present PTEs */ +/* Look for migration entries rather than present PTEs */ #define PVMW_MIGRATION (1 << 1) struct page_vma_mapped_walk { - struct page *page; + unsigned long pfn; + unsigned long nr_pages; + pgoff_t pgoff; struct vm_area_struct *vma; unsigned long address; pmd_t *pmd; @@ -216,7 +219,9 @@ struct page_vma_mapped_walk { #define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags) \ struct page_vma_mapped_walk name = { \ - .page = _page, \ + .pfn = page_to_pfn(_page), \ + .nr_pages = compound_nr(page), \ + .pgoff = page_to_pgoff(page), \ .vma = _vma, \ .address = _address, \ .flags = _flags, \ @@ -224,7 +229,9 @@ struct page_vma_mapped_walk { #define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags) \ struct page_vma_mapped_walk name = { \ - .page = &_folio->page, \ + .pfn = folio_pfn(_folio), \ + .nr_pages = folio_nr_pages(_folio), \ + .pgoff = folio_pgoff(_folio), \ .vma = _vma, \ .address = _address, \ .flags = _flags, \ @@ -233,7 +240,7 @@ struct page_vma_mapped_walk { static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) { /* HugeTLB pte is set to the relevant page table entry without pte_mapped. */ - if (pvmw->pte && !PageHuge(pvmw->page)) + if (pvmw->pte && !is_vm_hugetlb_page(pvmw->vma)) pte_unmap(pvmw->pte); if (pvmw->ptl) spin_unlock(pvmw->ptl); -- cgit v1.2.3 From b3ac04132c4b9bc5c9c14608424d410e7ca3b400 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 21 Jan 2022 11:27:31 -0500 Subject: mm/rmap: Turn page_referenced() into folio_referenced() Both its callers pass a page which was previously on an LRU list, so were passing a folio by definition. Use the type system to enforce that and remove a few calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/rmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 0c838ba1a8ee..69a1664216de 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -190,7 +190,7 @@ static inline void page_dup_rmap(struct page *page, bool compound) /* * Called from mm/vmscan.c to handle paging out */ -int page_referenced(struct page *, int is_locked, +int folio_referenced(struct folio *, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags); void try_to_migrate(struct page *page, enum ttu_flags flags); @@ -301,7 +301,7 @@ void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc); #define anon_vma_prepare(vma) (0) #define anon_vma_link(vma) do {} while (0) -static inline int page_referenced(struct page *page, int is_locked, +static inline int folio_referenced(struct folio *folio, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags) { -- cgit v1.2.3 From af28a988b313a601c12c410a42e485ca46adcfee Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 21 Jan 2022 10:44:52 -0500 Subject: mm/huge_memory: Convert __split_huge_pmd() to take a folio Convert split_huge_pmd_address() at the same time since it only passes the folio through, and its two callers already have a folio on hand. Removes numerous calls to compound_head() and removes an assumption that a page cannot be larger than a PMD. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/huge_mm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 71c073d411ac..4368b314d9c8 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -194,7 +194,7 @@ static inline int split_huge_page(struct page *page) void deferred_split_huge_page(struct page *page); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long address, bool freeze, struct page *page); + unsigned long address, bool freeze, struct folio *folio); #define split_huge_pmd(__vma, __pmd, __address) \ do { \ @@ -207,7 +207,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, - bool freeze, struct page *page); + bool freeze, struct folio *folio); void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, unsigned long address); @@ -406,9 +406,9 @@ static inline void deferred_split_huge_page(struct page *page) {} do { } while (0) static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long address, bool freeze, struct page *page) {} + unsigned long address, bool freeze, struct folio *folio) {} static inline void split_huge_pmd_address(struct vm_area_struct *vma, - unsigned long address, bool freeze, struct page *page) {} + unsigned long address, bool freeze, struct folio *folio) {} #define split_huge_pud(__vma, __pmd, __address) \ do { } while (0) -- cgit v1.2.3 From 869f7ee6f6477341f859c8b0949ae81caf9ca7f3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 15 Feb 2022 09:28:49 -0500 Subject: mm/rmap: Convert try_to_unmap() to take a folio Change all three callers and the worker function try_to_unmap_one(). Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/rmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 69a1664216de..a0c5c38c733f 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -194,7 +194,7 @@ int folio_referenced(struct folio *, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags); void try_to_migrate(struct page *page, enum ttu_flags flags); -void try_to_unmap(struct page *, enum ttu_flags flags); +void try_to_unmap(struct folio *, enum ttu_flags flags); int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, unsigned long end, struct page **pages, @@ -309,7 +309,7 @@ static inline int folio_referenced(struct folio *folio, int is_locked, return 0; } -static inline void try_to_unmap(struct page *page, enum ttu_flags flags) +static inline void try_to_unmap(struct folio *folio, enum ttu_flags flags) { } -- cgit v1.2.3 From 4b8554c527f3cfa183f6c06d231a9387873205a0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 28 Jan 2022 14:29:43 -0500 Subject: mm/rmap: Convert try_to_migrate() to folios Convert the callers to pass a folio and the try_to_migrate_one() worker to use a folio throughout. Fixes an assumption that a folio must be <= PMD size. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/rmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index a0c5c38c733f..e6e935c81414 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -193,7 +193,7 @@ static inline void page_dup_rmap(struct page *page, bool compound) int folio_referenced(struct folio *, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags); -void try_to_migrate(struct page *page, enum ttu_flags flags); +void try_to_migrate(struct folio *folio, enum ttu_flags flags); void try_to_unmap(struct folio *, enum ttu_flags flags); int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, -- cgit v1.2.3 From 4eecb8b9163df82c87c91764a02fff228ef25f6d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 28 Jan 2022 23:32:59 -0500 Subject: mm/migrate: Convert remove_migration_ptes() to folios Convert the implementation and all callers. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/rmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index e6e935c81414..21af80d5b711 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -261,7 +261,7 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); */ int folio_mkclean(struct folio *); -void remove_migration_ptes(struct page *old, struct page *new, bool locked); +void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked); /* * Called by memory-failure.c to kill processes. -- cgit v1.2.3 From 9595d76942b8714627d670a7e7ae543812c731ae Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 1 Feb 2022 23:33:08 -0500 Subject: mm/rmap: Turn page_lock_anon_vma_read() into folio_lock_anon_vma_read() Add back page_lock_anon_vma_read() as a wrapper. This saves a few calls to compound_head(). If any callers were passing a tail page before, this would have failed to lock the anon VMA as page->mapping is not valid for tail pages. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/rmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 21af80d5b711..be020d38b0a5 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -267,6 +267,7 @@ void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked); * Called by memory-failure.c to kill processes. */ struct anon_vma *page_lock_anon_vma_read(struct page *page); +struct anon_vma *folio_lock_anon_vma_read(struct folio *folio); void page_unlock_anon_vma_read(struct anon_vma *anon_vma); int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); -- cgit v1.2.3 From e05b34539d008ab819388f699b25eae962ba24ac Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 29 Jan 2022 11:52:52 -0500 Subject: mm: Turn page_anon_vma() into folio_anon_vma() Move the prototype from mm.h to mm/internal.h and convert all callers to pass a folio. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0d380dc26847..a879c583f665 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1730,7 +1730,6 @@ static inline void *folio_address(const struct folio *folio) } extern void *page_rmapping(struct page *page); -extern struct anon_vma *page_anon_vma(struct page *page); extern pgoff_t __page_file_index(struct page *page); /* -- cgit v1.2.3 From 2f031c6f042cb8a9b221a8b6b80e69de5170f830 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 29 Jan 2022 16:06:53 -0500 Subject: mm/rmap: Convert rmap_walk() to take a folio This ripples all the way through to every calling and called function from rmap. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/ksm.h | 4 ++-- include/linux/rmap.h | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index a38a5bca1ba5..0b4f17418f64 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -51,7 +51,7 @@ static inline void ksm_exit(struct mm_struct *mm) struct page *ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address); -void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); +void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc); void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); #else /* !CONFIG_KSM */ @@ -78,7 +78,7 @@ static inline struct page *ksm_might_need_to_copy(struct page *page, return page; } -static inline void rmap_walk_ksm(struct page *page, +static inline void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc) { } diff --git a/include/linux/rmap.h b/include/linux/rmap.h index be020d38b0a5..a74d811c5b3f 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -266,7 +266,6 @@ void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked); /* * Called by memory-failure.c to kill processes. */ -struct anon_vma *page_lock_anon_vma_read(struct page *page); struct anon_vma *folio_lock_anon_vma_read(struct folio *folio); void page_unlock_anon_vma_read(struct anon_vma *anon_vma); int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); @@ -286,15 +285,15 @@ struct rmap_walk_control { * Return false if page table scanning in rmap_walk should be stopped. * Otherwise, return true. */ - bool (*rmap_one)(struct page *page, struct vm_area_struct *vma, + bool (*rmap_one)(struct folio *folio, struct vm_area_struct *vma, unsigned long addr, void *arg); - int (*done)(struct page *page); - struct anon_vma *(*anon_lock)(struct page *page); + int (*done)(struct folio *folio); + struct anon_vma *(*anon_lock)(struct folio *folio); bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); }; -void rmap_walk(struct page *page, struct rmap_walk_control *rwc); -void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc); +void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc); +void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc); #else /* !CONFIG_MMU */ -- cgit v1.2.3 From 84fbbe21894bb9be8e16df408cbfbb9466fe396d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 29 Jan 2022 16:16:54 -0500 Subject: mm/rmap: Constify the rmap_walk_control argument The rmap walking functions do not modify the rmap_walk_control, and page_idle_clear_pte_refs() takes advantage of that to move construction of the rmap_walk_control to compile time. This lets us remove an unclean cast. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/ksm.h | 4 ++-- include/linux/rmap.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 0b4f17418f64..0630e545f4cb 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -51,7 +51,7 @@ static inline void ksm_exit(struct mm_struct *mm) struct page *ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address); -void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc); +void rmap_walk_ksm(struct folio *folio, const struct rmap_walk_control *rwc); void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); #else /* !CONFIG_KSM */ @@ -79,7 +79,7 @@ static inline struct page *ksm_might_need_to_copy(struct page *page, } static inline void rmap_walk_ksm(struct folio *folio, - struct rmap_walk_control *rwc) + const struct rmap_walk_control *rwc) { } diff --git a/include/linux/rmap.h b/include/linux/rmap.h index a74d811c5b3f..17230c458341 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -292,8 +292,8 @@ struct rmap_walk_control { bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); }; -void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc); -void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc); +void rmap_walk(struct folio *folio, const struct rmap_walk_control *rwc); +void rmap_walk_locked(struct folio *folio, const struct rmap_walk_control *rwc); #else /* !CONFIG_MMU */ -- cgit v1.2.3 From e0cd5e7ffa549487cf1a85452f371274cbf0a8f1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Jan 2022 23:35:57 -0500 Subject: mm/vmscan: Convert pageout() to take a folio We always write out an entire folio at once. This conversion removes a few calls to compound_head() and gets the NR_VMSCAN_WRITE statistic right when writing out a large folio. Signed-off-by: Matthew Wilcox (Oracle) --- include/trace/events/vmscan.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index ca2e9009a651..de136dbd623a 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -327,11 +327,11 @@ TRACE_EVENT(mm_vmscan_lru_isolate, __print_symbolic(__entry->lru, LRU_NAMES)) ); -TRACE_EVENT(mm_vmscan_writepage, +TRACE_EVENT(mm_vmscan_write_folio, - TP_PROTO(struct page *page), + TP_PROTO(struct folio *folio), - TP_ARGS(page), + TP_ARGS(folio), TP_STRUCT__entry( __field(unsigned long, pfn) @@ -339,9 +339,9 @@ TRACE_EVENT(mm_vmscan_writepage, ), TP_fast_assign( - __entry->pfn = page_to_pfn(page); + __entry->pfn = folio_pfn(folio); __entry->reclaim_flags = trace_reclaim_flags( - page_is_file_lru(page)); + folio_is_file_lru(folio)); ), TP_printk("page=%p pfn=0x%lx flags=%s", -- cgit v1.2.3 From d4b4084ac3154c51ff5fa71f669264cc44429be2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 4 Feb 2022 14:13:31 -0500 Subject: mm: Turn can_split_huge_page() into can_split_folio() This function already required a head page to be passed, so this just adds type-safety and removes a few implicit calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/huge_mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 4368b314d9c8..e0348bca3d66 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -185,7 +185,7 @@ void prep_transhuge_page(struct page *page); void free_transhuge_page(struct page *page); bool is_transparent_hugepage(struct page *page); -bool can_split_huge_page(struct page *page, int *pextra_pins); +bool can_split_folio(struct folio *folio, int *pextra_pins); int split_huge_page_to_list(struct page *page, struct list_head *list); static inline int split_huge_page(struct page *page) { @@ -387,7 +387,7 @@ static inline bool is_transparent_hugepage(struct page *page) #define thp_get_unmapped_area NULL static inline bool -can_split_huge_page(struct page *page, int *pextra_pins) +can_split_folio(struct folio *folio, int *pextra_pins) { BUILD_BUG(); return false; -- cgit v1.2.3 From 06d44142d49dc2e02d255ea9d72dc4c20f20388f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 10 Oct 2020 11:47:55 -0400 Subject: mm: Fix READ_ONLY_THP warning These counters only exist if CONFIG_READ_ONLY_THP_FOR_FS is defined, but we do not need to warn if the filesystem natively supports large folios. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/pagemap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a73c928e1d74..0a2417fc531c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -326,7 +326,7 @@ static inline void filemap_nr_thps_inc(struct address_space *mapping) if (!mapping_large_folio_support(mapping)) atomic_inc(&mapping->nr_thps); #else - WARN_ON_ONCE(1); + WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0); #endif } @@ -336,7 +336,7 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping) if (!mapping_large_folio_support(mapping)) atomic_dec(&mapping->nr_thps); #else - WARN_ON_ONCE(1); + WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0); #endif } -- cgit v1.2.3 From 421f1ab48452af48b64e205de1caca3d1ba415f4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 15 Jan 2022 23:27:08 -0500 Subject: mm: Make large folios depend on THP Some parts of the VM still depend on THP to handle large folios correctly. Until those are fixed, prevent creating large folios if THP are disabled. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/pagemap.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0a2417fc531c..20d7cbabf654 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -306,9 +306,14 @@ static inline void mapping_set_large_folios(struct address_space *mapping) __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); } +/* + * Large folio support currently depends on THP. These dependencies are + * being worked on but are not yet fixed. + */ static inline bool mapping_large_folio_support(struct address_space *mapping) { - return test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); + return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); } static inline int filemap_nr_thps(struct address_space *mapping) -- cgit v1.2.3 From 18788cfa236967741b83db1035ab24539e2a21bb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 29 May 2020 20:54:38 -0400 Subject: mm: Support arbitrary THP sizes For code which has not yet been converted from THP to folios, use the compound size of the page instead of assuming PTE or PMD size. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/huge_mm.h | 47 ----------------------------------------------- include/linux/mm.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e0348bca3d66..0734aff8fa19 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -250,30 +250,6 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, return NULL; } -/** - * thp_order - Order of a transparent huge page. - * @page: Head page of a transparent huge page. - */ -static inline unsigned int thp_order(struct page *page) -{ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - if (PageHead(page)) - return HPAGE_PMD_ORDER; - return 0; -} - -/** - * thp_nr_pages - The number of regular pages in this huge page. - * @page: The head page of a huge page. - */ -static inline int thp_nr_pages(struct page *page) -{ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - if (PageHead(page)) - return HPAGE_PMD_NR; - return 1; -} - /** * folio_test_pmd_mappable - Can we map this folio with a PMD? * @folio: The folio to test @@ -336,18 +312,6 @@ static inline struct list_head *page_deferred_list(struct page *page) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) -static inline unsigned int thp_order(struct page *page) -{ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - return 0; -} - -static inline int thp_nr_pages(struct page *page) -{ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - return 1; -} - static inline bool folio_test_pmd_mappable(struct folio *folio) { return false; @@ -489,15 +453,4 @@ static inline int split_folio_to_list(struct folio *folio, return split_huge_page_to_list(&folio->page, list); } -/** - * thp_size - Size of a transparent huge page. - * @page: Head page of a transparent huge page. - * - * Return: Number of bytes in this page. - */ -static inline unsigned long thp_size(struct page *page) -{ - return PAGE_SIZE << thp_order(page); -} - #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index a879c583f665..c1966ad34142 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -939,6 +939,37 @@ static inline unsigned int page_shift(struct page *page) return PAGE_SHIFT + compound_order(page); } +/** + * thp_order - Order of a transparent huge page. + * @page: Head page of a transparent huge page. + */ +static inline unsigned int thp_order(struct page *page) +{ + VM_BUG_ON_PGFLAGS(PageTail(page), page); + return compound_order(page); +} + +/** + * thp_nr_pages - The number of regular pages in this huge page. + * @page: The head page of a huge page. + */ +static inline int thp_nr_pages(struct page *page) +{ + VM_BUG_ON_PGFLAGS(PageTail(page), page); + return compound_nr(page); +} + +/** + * thp_size - Size of a transparent huge page. + * @page: Head page of a transparent huge page. + * + * Return: Number of bytes in this page. + */ +static inline unsigned long thp_size(struct page *page) +{ + return PAGE_SIZE << thp_order(page); +} + void free_compound_page(struct page *page); #ifdef CONFIG_MMU -- cgit v1.2.3 From 351bdbb6419ca988802882badadc321d384d0254 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 21 Mar 2022 10:22:37 +0100 Subject: net: Revert the softirq will run annotation in ____napi_schedule(). The lockdep annotation lockdep_assert_softirq_will_run() expects that either hard or soft interrupts are disabled because both guaranty that the "raised" soft-interrupts will be processed once the context is left. This triggers in flush_smp_call_function_from_idle() but it this case it explicitly calls do_softirq() in case of pending softirqs. Revert the "softirq will run" annotation in ____napi_schedule() and move the check back to __netif_rx() as it was. Keep the IRQ-off assert in ____napi_schedule() because this is always required. Fixes: fbd9a2ceba5c7 ("net: Add lockdep asserts to ____napi_schedule().") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Jason A. Donenfeld Link: https://lore.kernel.org/r/YjhD3ZKWysyw8rc6@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/lockdep.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 0cc65d216701..467b94257105 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -329,12 +329,6 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); #define lockdep_assert_none_held_once() \ lockdep_assert_once(!current->lockdep_depth) -/* - * Ensure that softirq is handled within the callchain and not delayed and - * handled by chance. - */ -#define lockdep_assert_softirq_will_run() \ - lockdep_assert_once(hardirq_count() | softirq_count()) #define lockdep_recursing(tsk) ((tsk)->lockdep_recursion) @@ -420,7 +414,6 @@ extern int lockdep_is_held(const void *); #define lockdep_assert_held_read(l) do { (void)(l); } while (0) #define lockdep_assert_held_once(l) do { (void)(l); } while (0) #define lockdep_assert_none_held_once() do { } while (0) -#define lockdep_assert_softirq_will_run() do { } while (0) #define lockdep_recursing(tsk) (0) -- cgit v1.2.3 From f5bfa23f576fdcc8e0b7fbff44cf70bd69ff9bdb Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Feb 2022 16:46:54 -0800 Subject: RISC-V: Add a perf core library for pmu drivers Implement a perf core library that can support all the essential perf features in future. It can also accommodate any type of PMU implementation in future. Currently, both SBI based perf driver and legacy driver implemented uses the library. Most of the common perf functionalities are kept in this core library wile PMU specific driver can implement PMU specific features. For example, the SBI specific functionality will be implemented in the SBI specific driver. Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- include/linux/perf/riscv_pmu.h | 65 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 include/linux/perf/riscv_pmu.h (limited to 'include') diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h new file mode 100644 index 000000000000..0d8979765d79 --- /dev/null +++ b/include/linux/perf/riscv_pmu.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 SiFive + * Copyright (C) 2018 Andes Technology Corporation + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * + */ + +#ifndef _ASM_RISCV_PERF_EVENT_H +#define _ASM_RISCV_PERF_EVENT_H + +#include +#include +#include + +#ifdef CONFIG_RISCV_PMU + +/* + * The RISCV_MAX_COUNTERS parameter should be specified. + */ + +#define RISCV_MAX_COUNTERS 64 +#define RISCV_OP_UNSUPP (-EOPNOTSUPP) +#define RISCV_PMU_PDEV_NAME "riscv-pmu" + +#define RISCV_PMU_STOP_FLAG_RESET 1 + +struct cpu_hw_events { + /* currently enabled events */ + int n_events; + /* currently enabled events */ + struct perf_event *events[RISCV_MAX_COUNTERS]; + /* currently enabled counters */ + DECLARE_BITMAP(used_event_ctrs, RISCV_MAX_COUNTERS); +}; + +struct riscv_pmu { + struct pmu pmu; + char *name; + + irqreturn_t (*handle_irq)(int irq_num, void *dev); + + int num_counters; + u64 (*ctr_read)(struct perf_event *event); + int (*ctr_get_idx)(struct perf_event *event); + int (*ctr_get_width)(int idx); + void (*ctr_clear_idx)(struct perf_event *event); + void (*ctr_start)(struct perf_event *event, u64 init_val); + void (*ctr_stop)(struct perf_event *event, unsigned long flag); + int (*event_map)(struct perf_event *event, u64 *config); + + struct cpu_hw_events __percpu *hw_events; + struct hlist_node node; +}; + +#define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu)) +unsigned long riscv_pmu_ctr_read_csr(unsigned long csr); +int riscv_pmu_event_set_period(struct perf_event *event); +uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event); +u64 riscv_pmu_event_update(struct perf_event *event); +struct riscv_pmu *riscv_pmu_alloc(void); + +#endif /* CONFIG_RISCV_PMU */ + +#endif /* _ASM_RISCV_PERF_EVENT_H */ -- cgit v1.2.3 From 9b3e150e310ee71d7bae1e31c38a300cfa5e951b Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Feb 2022 16:46:55 -0800 Subject: RISC-V: Add a simple platform driver for RISC-V legacy perf The old RISC-V perf implementation allowed counting of only cycle/instruction counters using perf. Restore that feature by implementing a simple platform driver under a separate config to provide backward compatibility. Any existing software stack will continue to work as it is. However, it provides an easy way out in future where we can remove the legacy driver. Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- include/linux/perf/riscv_pmu.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 0d8979765d79..9140c491fc54 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -22,6 +22,7 @@ #define RISCV_MAX_COUNTERS 64 #define RISCV_OP_UNSUPP (-EOPNOTSUPP) #define RISCV_PMU_PDEV_NAME "riscv-pmu" +#define RISCV_PMU_LEGACY_PDEV_NAME "riscv-pmu-legacy" #define RISCV_PMU_STOP_FLAG_RESET 1 @@ -58,6 +59,11 @@ unsigned long riscv_pmu_ctr_read_csr(unsigned long csr); int riscv_pmu_event_set_period(struct perf_event *event); uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event); u64 riscv_pmu_event_update(struct perf_event *event); +#ifdef CONFIG_RISCV_PMU_LEGACY +void riscv_pmu_legacy_skip_init(void); +#else +static inline void riscv_pmu_legacy_skip_init(void) {}; +#endif struct riscv_pmu *riscv_pmu_alloc(void); #endif /* CONFIG_RISCV_PMU */ -- cgit v1.2.3 From e9991434596f5373dfd75857b445eb92a9253c56 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Feb 2022 16:46:57 -0800 Subject: RISC-V: Add perf platform driver based on SBI PMU extension RISC-V SBI specification added a PMU extension that allows to configure start/stop any pmu counter. The RISC-V perf can use most of the generic perf features except interrupt overflow and event filtering based on privilege mode which will be added in future. It also allows to monitor a handful of firmware counters that can provide insights into firmware activity during a performance analysis. Signed-off-by: Atish Patra Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- include/linux/cpuhotplug.h | 1 + include/linux/perf/riscv_pmu.h | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 411a428ace4d..51c7e4929df7 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -165,6 +165,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, CPUHP_AP_PERF_ARM_ACPI_STARTING, CPUHP_AP_PERF_ARM_STARTING, + CPUHP_AP_PERF_RISCV_STARTING, CPUHP_AP_ARM_L2X0_STARTING, CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 9140c491fc54..0f226948c0ca 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -31,8 +31,10 @@ struct cpu_hw_events { int n_events; /* currently enabled events */ struct perf_event *events[RISCV_MAX_COUNTERS]; - /* currently enabled counters */ - DECLARE_BITMAP(used_event_ctrs, RISCV_MAX_COUNTERS); + /* currently enabled hardware counters */ + DECLARE_BITMAP(used_hw_ctrs, RISCV_MAX_COUNTERS); + /* currently enabled firmware counters */ + DECLARE_BITMAP(used_fw_ctrs, RISCV_MAX_COUNTERS); }; struct riscv_pmu { -- cgit v1.2.3 From 4905ec2fb7e6421c14c9fb7276f5aa92f60f2b98 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Feb 2022 16:46:58 -0800 Subject: RISC-V: Add sscofpmf extension support The sscofpmf extension allows counter overflow and filtering for programmable counters. Enable the perf driver to handle the overflow interrupt. The overflow interrupt is a hart local interrupt. Thus, per cpu overflow interrupts are setup as a child under the root INTC irq domain. Signed-off-by: Atish Patra Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- include/linux/perf/riscv_pmu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 0f226948c0ca..46f9b6fe306e 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -29,6 +29,8 @@ struct cpu_hw_events { /* currently enabled events */ int n_events; + /* Counter overflow interrupt */ + int irq; /* currently enabled events */ struct perf_event *events[RISCV_MAX_COUNTERS]; /* currently enabled hardware counters */ -- cgit v1.2.3 From 863a66cdb4df25fd146d9851c3289072298566d5 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Mar 2022 09:27:08 +0800 Subject: lib/sbitmap: allocate sb->map via kvzalloc_node sbitmap has been used in scsi for replacing atomic operations on sdev->device_busy, so IOPS on some fast scsi storage can be improved. However, sdev->device_busy can be changed in fast path, so we have to allocate the sb->map statically. sdev->device_busy has been capped to 1024, but some drivers may configure the default depth as < 8, then cause each sbitmap word to hold only one bit. Finally 1024 * 128( sizeof(sbitmap_word)) bytes is needed for sb->map, given it is order 5 allocation, sometimes it may fail. Avoid the issue by using kvzalloc_node() for allocating sb->map. Cc: Ewan D. Milne Signed-off-by: Ming Lei Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220316012708.354668-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index dffeb8281c2d..8f5a86e210b9 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -174,7 +174,7 @@ static inline unsigned int __map_depth(const struct sbitmap *sb, int index) static inline void sbitmap_free(struct sbitmap *sb) { free_percpu(sb->alloc_hint); - kfree(sb->map); + kvfree(sb->map); sb->map = NULL; } -- cgit v1.2.3 From 4a0cb83ba6e0cd73a50fa4f84736846bf0029f2b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 21 Mar 2022 22:10:53 -0700 Subject: netdevice: add missing dm_private kdoc Building htmldocs complains: include/linux/netdevice.h:2295: warning: Function parameter or member 'dm_private' not described in 'net_device' Fixes: b26ef81c46ed ("drop_monitor: remove quadratic behavior") Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20220322051053.1883186-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e01a8ce7181f..cd7a597c55b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1899,6 +1899,8 @@ enum netdev_ml_priv_type { * @garp_port: GARP * @mrp_port: MRP * + * @dm_private: Drop monitor private + * * @dev: Class/net/name entry * @sysfs_groups: Space for optional device, statistics and wireless * sysfs groups -- cgit v1.2.3 From 0313bc278dac7cd9ce83a8d384581dc043156965 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 22 Mar 2022 09:17:20 -0700 Subject: Revert "random: block in /dev/urandom" This reverts commit 6f98a4bfee72c22f50aedb39fb761567969865fe. It turns out we still can't do this. Way too many platforms that don't have any real source of randomness at boot and no jitter entropy because they don't even have a cycle counter. As reported by Guenter Roeck: "This causes a large number of qemu boot test failures for various architectures (arm, m68k, microblaze, sparc32, xtensa are the ones I observed). Common denominator is that boot hangs at 'Saving random seed:'" This isn't hugely unexpected - we tried it, it failed, so now we'll revert it. Link: https://lore.kernel.org/all/20220322155820.GA1745955@roeck-us.net/ Reported-and-bisected-by: Guenter Roeck Cc: Jason Donenfeld Signed-off-by: Linus Torvalds --- include/linux/random.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index c0baffe7afb1..f673fbb838b3 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -48,7 +48,7 @@ extern int unregister_random_ready_notifier(struct notifier_block *nb); extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes); #ifndef MODULE -extern const struct file_operations random_fops; +extern const struct file_operations random_fops, urandom_fops; #endif u32 get_random_u32(void); -- cgit v1.2.3 From 89f42494f92f448747bd8a7ab1ae8b5d5520577d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Mar 2022 19:10:43 -0400 Subject: SUNRPC: Don't call connect() more than once on a TCP socket Avoid socket state races due to repeated calls to ->connect() using the same socket. If connect() returns 0 due to the connection having completed, but we are in fact in a closing state, then we may leave the XPRT_CONNECTING flag set on the transport. Reported-by: Enrico Scholz Fixes: 3be232f11a3c ("SUNRPC: Prevent immediate close+reconnect") Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 8c2a712cb242..689062afdd61 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -89,5 +89,6 @@ struct sock_xprt { #define XPRT_SOCK_WAKE_WRITE (5) #define XPRT_SOCK_WAKE_PENDING (6) #define XPRT_SOCK_WAKE_DISCONNECT (7) +#define XPRT_SOCK_CONNECT_SENT (8) #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ -- cgit v1.2.3 From 2790a624d43084de590884934969e19c7a82316a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Mar 2022 08:12:40 -0400 Subject: SUNRPC: Replace internal use of SOCKWQ_ASYNC_NOSPACE The socket's SOCKWQ_ASYNC_NOSPACE can be cleared by various actors in the socket layer, so replace it with our own flag in the transport sock_state field. Reported-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 689062afdd61..3eb0079669c5 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -90,5 +90,6 @@ struct sock_xprt { #define XPRT_SOCK_WAKE_PENDING (6) #define XPRT_SOCK_WAKE_DISCONNECT (7) #define XPRT_SOCK_CONNECT_SENT (8) +#define XPRT_SOCK_NOSPACE (9) #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ -- cgit v1.2.3 From 33e5c765bc1ea5e06ea7603637f14d727e6fcdf3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Mar 2022 22:02:22 -0400 Subject: NFS: Fix memory allocation in rpc_malloc() When in a low memory situation, we do want rpciod to kick off direct reclaim in the case where that helps, however we don't want it looping forever in mempool_alloc(). So first try allocating from the slab using GFP_KERNEL | __GFP_NORETRY, and then fall back to a GFP_NOWAIT allocation from the mempool. Ditto for rpc_alloc_task() Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 56710f8056d3..1d7a3e51b795 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -262,6 +262,7 @@ void rpc_destroy_mempool(void); extern struct workqueue_struct *rpciod_workqueue; extern struct workqueue_struct *xprtiod_workqueue; void rpc_prepare_task(struct rpc_task *task); +gfp_t rpc_task_gfp_mask(void); static inline int rpc_wait_for_completion_task(struct rpc_task *task) { -- cgit v1.2.3 From 515dcdcd48736576c6f5c197814da6f81c60a21e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 Mar 2022 12:34:19 -0400 Subject: NFS: nfsiod should not block forever in mempool_alloc() The concern is that since nfsiod is sometimes required to kick off a commit, it can get locked up waiting forever in mempool_alloc() instead of failing gracefully and leaving the commit until later. Try to allocate from the slab first, with GFP_KERNEL | __GFP_NORETRY, then fall back to a non-blocking attempt to allocate from the memory pool. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c47c448befc8..db305abafc9e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -580,7 +580,7 @@ extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page *page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); -extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); +extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); -- cgit v1.2.3 From 92ee3c60ec9fe64404dc035e7c41277d74aa26cb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 22 Mar 2022 18:07:17 +0100 Subject: ALSA: pcm: Fix races among concurrent hw_params and hw_free calls Currently we have neither proper check nor protection against the concurrent calls of PCM hw_params and hw_free ioctls, which may result in a UAF. Since the existing PCM stream lock can't be used for protecting the whole ioctl operations, we need a new mutex to protect those racy calls. This patch introduced a new mutex, runtime->buffer_mutex, and applies it to both hw_params and hw_free ioctl code paths. Along with it, the both functions are slightly modified (the mmap_count check is moved into the state-check block) for code simplicity. Reported-by: Hu Jiahui Cc: Reviewed-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20220322170720.3529-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 36da42cd0774..314f2779cab5 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -401,6 +401,7 @@ struct snd_pcm_runtime { wait_queue_head_t tsleep; /* transfer sleep */ struct fasync_struct *fasync; bool stop_operating; /* sync_stop will be called */ + struct mutex buffer_mutex; /* protect for buffer changes */ /* -- private section -- */ void *private_data; -- cgit v1.2.3 From 62eb29526b48d20704668a2fdf97a49d01bf52ce Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 22 Mar 2022 14:38:33 -0700 Subject: linux/kthread.h: remove unused macros Ever since these macros were introduced in commit b56c0d8937e6 ("kthread: implement kthread_worker"), there has been precisely one user (commit 4d115420707a, "NVMe: Async IO queue deletion"), and that user went away in 2016 with db3cbfff5bcc ("NVMe: IO queue deletion re-write"). Apart from being unused, these macros are also awkward to use (which may contribute to them not being used): Having a way to statically (or on-stack) allocating the storage for the struct kthread_worker itself doesn't help much, since obviously one needs to have some code for actually _spawning_ the worker thread, which must have error checking. And these days we have the kthread_create_worker() interface which both allocates the struct kthread_worker and spawns the kthread. Link: https://lkml.kernel.org/r/20220314145343.494694-1-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Tejun Heo Cc: "Eric W. Biederman" Cc: Petr Mladek Cc: David Hildenbrand Cc: Yafang Shao Cc: Cai Huoqing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 3df4ea04716f..de5d75bafd66 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -141,12 +141,6 @@ struct kthread_delayed_work { struct timer_list timer; }; -#define KTHREAD_WORKER_INIT(worker) { \ - .lock = __RAW_SPIN_LOCK_UNLOCKED((worker).lock), \ - .work_list = LIST_HEAD_INIT((worker).work_list), \ - .delayed_work_list = LIST_HEAD_INIT((worker).delayed_work_list),\ - } - #define KTHREAD_WORK_INIT(work, fn) { \ .node = LIST_HEAD_INIT((work).node), \ .func = (fn), \ @@ -158,9 +152,6 @@ struct kthread_delayed_work { TIMER_IRQSAFE), \ } -#define DEFINE_KTHREAD_WORKER(worker) \ - struct kthread_worker worker = KTHREAD_WORKER_INIT(worker) - #define DEFINE_KTHREAD_WORK(work, fn) \ struct kthread_work work = KTHREAD_WORK_INIT(work, fn) @@ -168,19 +159,6 @@ struct kthread_delayed_work { struct kthread_delayed_work dwork = \ KTHREAD_DELAYED_WORK_INIT(dwork, fn) -/* - * kthread_worker.lock needs its own lockdep class key when defined on - * stack with lockdep enabled. Use the following macros in such cases. - */ -#ifdef CONFIG_LOCKDEP -# define KTHREAD_WORKER_INIT_ONSTACK(worker) \ - ({ kthread_init_worker(&worker); worker; }) -# define DEFINE_KTHREAD_WORKER_ONSTACK(worker) \ - struct kthread_worker worker = KTHREAD_WORKER_INIT_ONSTACK(worker) -#else -# define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker) -#endif - extern void __kthread_init_worker(struct kthread_worker *worker, const char *name, struct lock_class_key *key); -- cgit v1.2.3 From bf507030f312c68fdbb17c2d33f317cda109a484 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 22 Mar 2022 14:38:48 -0700 Subject: doc: convert 'subsection' to 'section' in gfp.h Patch series "Remove remaining parts of congestion tracking code", v2. This patch (of 11): Various DOC: sections in gfp.h have subsection headers (~~~) but the place where they are included in mm-api.rst does not have section, only chapters. So convert to section headers (---) to avoid confusion. Specifically if sections are added later in mm-api.rst, an error results. Link: https://lkml.kernel.org/r/164549971112.9187.16871723439770288255.stgit@noble.brown Link: https://lkml.kernel.org/r/164549983733.9187.17894407453436115822.stgit@noble.brown Signed-off-by: NeilBrown Cc: Jan Kara Cc: Wu Fengguang Cc: Jaegeuk Kim Cc: Chao Yu Cc: Jeff Layton Cc: Ilya Dryomov Cc: Miklos Szeredi Cc: Trond Myklebust Cc: Anna Schumaker Cc: Ryusuke Konishi Cc: Darrick J. Wong Cc: Philipp Reisner Cc: Lars Ellenberg Cc: Paolo Valente Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 80f63c862be5..20f6fbe12993 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -79,7 +79,7 @@ struct vm_area_struct; * DOC: Page mobility and placement hints * * Page mobility and placement hints - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * --------------------------------- * * These flags provide hints about how mobile the page is. Pages with similar * mobility are placed within the same pageblocks to minimise problems due @@ -112,7 +112,7 @@ struct vm_area_struct; * DOC: Watermark modifiers * * Watermark modifiers -- controls access to emergency reserves - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * ------------------------------------------------------------ * * %__GFP_HIGH indicates that the caller is high-priority and that granting * the request is necessary before the system can make forward progress. @@ -144,7 +144,7 @@ struct vm_area_struct; * DOC: Reclaim modifiers * * Reclaim modifiers - * ~~~~~~~~~~~~~~~~~ + * ----------------- * Please note that all the following flags are only applicable to sleepable * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them). * @@ -224,7 +224,7 @@ struct vm_area_struct; * DOC: Action modifiers * * Action modifiers - * ~~~~~~~~~~~~~~~~ + * ---------------- * * %__GFP_NOWARN suppresses allocation failure reports. * @@ -256,7 +256,7 @@ struct vm_area_struct; * DOC: Useful GFP flag combinations * * Useful GFP flag combinations - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * ---------------------------- * * Useful GFP flag combinations that are commonly used. It is recommended * that subsystems start with one of these combinations and then set/clear -- cgit v1.2.3 From 84dacdbd5352bfef82423760fa2e8bffaeef9e05 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 22 Mar 2022 14:38:51 -0700 Subject: mm: document and polish read-ahead code Add some "big-picture" documentation for read-ahead and polish the code to make it fit this documentation. The meaning of ->async_size is clarified to match its name. i.e. Any request to ->readahead() has a sync part and an async part. The caller will wait for the sync pages to complete, but will not wait for the async pages. The first async page is still marked PG_readahead Note that the current function names page_cache_sync_ra() and page_cache_async_ra() are misleading. All ra request are partly sync and partly async, so either part can be empty. A page_cache_sync_ra() request will usually set ->async_size non-zero, implying it is not all synchronous. When a non-zero req_count is passed to page_cache_async_ra(), the implication is that some prefix of the request is synchronous, though the calculation made there is incorrect - I haven't tried to fix it. Link: https://lkml.kernel.org/r/164549983734.9187.11586890887006601405.stgit@noble.brown Signed-off-by: NeilBrown Cc: Anna Schumaker Cc: Chao Yu Cc: Darrick J. Wong Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jan Kara Cc: Jeff Layton Cc: Jens Axboe Cc: Lars Ellenberg Cc: Miklos Szeredi Cc: Paolo Valente Cc: Philipp Reisner Cc: Ryusuke Konishi Cc: Trond Myklebust Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index e2d892b201b0..8b5c486bd4a2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -930,10 +930,15 @@ struct fown_struct { * struct file_ra_state - Track a file's readahead state. * @start: Where the most recent readahead started. * @size: Number of pages read in the most recent readahead. - * @async_size: Start next readahead when this many pages are left. - * @ra_pages: Maximum size of a readahead request. + * @async_size: Numer of pages that were/are not needed immediately + * and so were/are genuinely "ahead". Start next readahead when + * the first of these pages is accessed. + * @ra_pages: Maximum size of a readahead request, copied from the bdi. * @mmap_miss: How many mmap accesses missed in the page cache. * @prev_pos: The last byte in the most recent read request. + * + * When this structure is passed to ->readahead(), the "most recent" + * readahead means the current readahead. */ struct file_ra_state { pgoff_t start; -- cgit v1.2.3 From 6df25e58532be7a4cd6fb15bcd85805947402d91 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 22 Mar 2022 14:39:01 -0700 Subject: nfs: remove reliance on bdi congestion The bdi congestion tracking in not widely used and will be removed. NFS is one of a small number of filesystems that uses it, setting just the async (write) congestion flag at what it determines are appropriate times. The only remaining effect of the async flag is to cause (some) WB_SYNC_NONE writes to be skipped. So instead of setting the flag, set an internal flag and change: - .writepages to do nothing if WB_SYNC_NONE and the flag is set - .writepage to return AOP_WRITEPAGE_ACTIVATE if WB_SYNC_NONE and the flag is set. The writepages change causes a behavioural change in that pageout() can now return PAGE_ACTIVATE instead of PAGE_KEEP, so SetPageActive() will be called on the page which (I think) wil further delay the next attempt at writeout. This might be a good thing. Link: https://lkml.kernel.org/r/164549983738.9187.3972219847989393182.stgit@noble.brown Signed-off-by: NeilBrown Cc: Anna Schumaker Cc: Chao Yu Cc: Darrick J. Wong Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jan Kara Cc: Jeff Layton Cc: Jens Axboe Cc: Lars Ellenberg Cc: Miklos Szeredi Cc: Paolo Valente Cc: Philipp Reisner Cc: Ryusuke Konishi Cc: Trond Myklebust Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index ca0959e51e81..6aa2a200676a 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -138,6 +138,7 @@ struct nfs_server { struct nlm_host *nlm_host; /* NLM client handle */ struct nfs_iostats __percpu *io_stats; /* I/O statistics */ atomic_long_t writeback; /* number of writeback pages */ + unsigned int write_congested;/* flag set when writeback gets too high */ unsigned int flags; /* various flags */ /* The following are for internal use only. Also see uapi/linux/nfs_mount.h */ -- cgit v1.2.3 From fe55d563d4174f13839a9b7ef7309da5031b5d93 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 22 Mar 2022 14:39:07 -0700 Subject: remove inode_congested() inode_congested() reports if the backing-device for the inode is congested. No bdi reports congestion any more, so this always returns 'false'. So remove inode_congested() and related functions, and remove the call sites, assuming that inode_congested() always returns 'false'. Link: https://lkml.kernel.org/r/164549983741.9187.2174285592262191311.stgit@noble.brown Signed-off-by: NeilBrown Cc: Anna Schumaker Cc: Chao Yu Cc: Darrick J. Wong Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jan Kara Cc: Jeff Layton Cc: Jens Axboe Cc: Lars Ellenberg Cc: Miklos Szeredi Cc: Paolo Valente Cc: Philipp Reisner Cc: Ryusuke Konishi Cc: Trond Myklebust Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 483979c1b9f4..860b675c2929 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -162,7 +162,6 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, gfp_t gfp); void wb_memcg_offline(struct mem_cgroup *memcg); void wb_blkcg_offline(struct blkcg *blkcg); -int inode_congested(struct inode *inode, int cong_bits); /** * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode @@ -390,29 +389,8 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg) { } -static inline int inode_congested(struct inode *inode, int cong_bits) -{ - return wb_congested(&inode_to_bdi(inode)->wb, cong_bits); -} - #endif /* CONFIG_CGROUP_WRITEBACK */ -static inline int inode_read_congested(struct inode *inode) -{ - return inode_congested(inode, 1 << WB_sync_congested); -} - -static inline int inode_write_congested(struct inode *inode) -{ - return inode_congested(inode, 1 << WB_async_congested); -} - -static inline int inode_rw_congested(struct inode *inode) -{ - return inode_congested(inode, (1 << WB_sync_congested) | - (1 << WB_async_congested)); -} - static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits) { return wb_congested(&bdi->wb, cong_bits); -- cgit v1.2.3 From b9b1335e640308acc1b8f26c739b804c80a6c147 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 22 Mar 2022 14:39:10 -0700 Subject: remove bdi_congested() and wb_congested() and related functions These functions are no longer useful as no BDIs report congestions any more. Removing the test on bdi_write_contested() in current_may_throttle() could cause a small change in behaviour, but only when PF_LOCAL_THROTTLE is set. So replace the calls by 'false' and simplify the code - and remove the functions. [akpm@linux-foundation.org: fix build] Link: https://lkml.kernel.org/r/164549983742.9187.2570198746005819592.stgit@noble.brown Signed-off-by: NeilBrown Acked-by: Ryusuke Konishi [nilfs] Cc: Anna Schumaker Cc: Chao Yu Cc: Darrick J. Wong Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jan Kara Cc: Jeff Layton Cc: Jens Axboe Cc: Lars Ellenberg Cc: Miklos Szeredi Cc: Paolo Valente Cc: Philipp Reisner Cc: Trond Myklebust Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 860b675c2929..2d764566280c 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -135,11 +135,6 @@ static inline bool writeback_in_progress(struct bdi_writeback *wb) struct backing_dev_info *inode_to_bdi(struct inode *inode); -static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) -{ - return wb->congested & cong_bits; -} - long congestion_wait(int sync, long timeout); static inline bool mapping_can_writeback(struct address_space *mapping) @@ -391,27 +386,6 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg) #endif /* CONFIG_CGROUP_WRITEBACK */ -static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits) -{ - return wb_congested(&bdi->wb, cong_bits); -} - -static inline int bdi_read_congested(struct backing_dev_info *bdi) -{ - return bdi_congested(bdi, 1 << WB_sync_congested); -} - -static inline int bdi_write_congested(struct backing_dev_info *bdi) -{ - return bdi_congested(bdi, 1 << WB_async_congested); -} - -static inline int bdi_rw_congested(struct backing_dev_info *bdi) -{ - return bdi_congested(bdi, (1 << WB_sync_congested) | - (1 << WB_async_congested)); -} - const char *bdi_dev_name(struct backing_dev_info *bdi); #endif /* _LINUX_BACKING_DEV_H */ -- cgit v1.2.3 From a88f2096d5a2d91179db5dd9aa8f60dc3df9bb3e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 22 Mar 2022 14:39:19 -0700 Subject: remove congestion tracking framework This framework is no longer used - so discard it. Link: https://lkml.kernel.org/r/164549983747.9187.6171768583526866601.stgit@noble.brown Signed-off-by: NeilBrown Cc: Anna Schumaker Cc: Chao Yu Cc: Darrick J. Wong Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jan Kara Cc: Jeff Layton Cc: Jens Axboe Cc: Lars Ellenberg Cc: Miklos Szeredi Cc: Paolo Valente Cc: Philipp Reisner Cc: Ryusuke Konishi Cc: Trond Myklebust Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev-defs.h | 8 -------- include/linux/backing-dev.h | 2 -- include/trace/events/writeback.h | 28 ---------------------------- 3 files changed, 38 deletions(-) (limited to 'include') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 993c5628a726..e863c88df95f 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -207,14 +207,6 @@ struct backing_dev_info { #endif }; -enum { - BLK_RW_ASYNC = 0, - BLK_RW_SYNC = 1, -}; - -void clear_bdi_congested(struct backing_dev_info *bdi, int sync); -void set_bdi_congested(struct backing_dev_info *bdi, int sync); - struct wb_lock_cookie { bool locked; unsigned long flags; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 2d764566280c..87ce24d238f3 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -135,8 +135,6 @@ static inline bool writeback_in_progress(struct bdi_writeback *wb) struct backing_dev_info *inode_to_bdi(struct inode *inode); -long congestion_wait(int sync, long timeout); - static inline bool mapping_can_writeback(struct address_space *mapping) { return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK; diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index a345b1e12daf..86b2a82da546 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -735,34 +735,6 @@ TRACE_EVENT(writeback_sb_inodes_requeue, ) ); -DECLARE_EVENT_CLASS(writeback_congest_waited_template, - - TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed), - - TP_ARGS(usec_timeout, usec_delayed), - - TP_STRUCT__entry( - __field( unsigned int, usec_timeout ) - __field( unsigned int, usec_delayed ) - ), - - TP_fast_assign( - __entry->usec_timeout = usec_timeout; - __entry->usec_delayed = usec_delayed; - ), - - TP_printk("usec_timeout=%u usec_delayed=%u", - __entry->usec_timeout, - __entry->usec_delayed) -); - -DEFINE_EVENT(writeback_congest_waited_template, writeback_congestion_wait, - - TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed), - - TP_ARGS(usec_timeout, usec_delayed) -); - DECLARE_EVENT_CLASS(writeback_single_inode_template, TP_PROTO(struct inode *inode, -- cgit v1.2.3 From a128b054ce029554a4a52fc3abb8c1df8bafcaef Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Tue, 22 Mar 2022 14:39:22 -0700 Subject: mount: warn only once about timestamp range expiration Commit f8b92ba67c5d ("mount: Add mount warning for impending timestamp expiry") introduced a mount warning regarding filesystem timestamp limits, that is printed upon each writable mount or remount. This can result in a lot of unnecessary messages in the kernel log in setups where filesystems are being frequently remounted (or mounted multiple times). Avoid this by setting a superblock flag which indicates that the warning has been emitted at least once for any particular mount, as suggested in [1]. Link: https://lore.kernel.org/CAHk-=wim6VGnxQmjfK_tDg6fbHYKL4EFkmnTjVr9QnRqjDBAeA@mail.gmail.com/ [1] Link: https://lkml.kernel.org/r/20220119202934.26495-1-ailiop@suse.com Signed-off-by: Anthony Iliopoulos Reviewed-by: Christoph Hellwig Acked-by: Christian Brauner Reviewed-by: Darrick J. Wong Cc: Alexander Viro Cc: Deepa Dinamani Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8b5c486bd4a2..ca9445f6cf3d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1440,6 +1440,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */ #define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */ +#define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */ /* Possible states of 'frozen' field */ enum { -- cgit v1.2.3 From eb5279fb7e41804ecc15ed3cf716a9e2b419af57 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 22 Mar 2022 14:39:28 -0700 Subject: filemap: remove find_get_pages() It's unused now. Remove it and clean up the relevant comment. Link: https://lkml.kernel.org/r/20220208134149.47299-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reviewed-by: Christoph Hellwig Cc: Matthew Wilcox (Oracle) Cc: David Howells Cc: William Kucharski Cc: Vlastimil Babka Cc: Kirill A. Shutemov Cc: Johannes Weiner Cc: Andreas Gruenbacher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 270bf5136c34..dc31eb981ea2 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -594,13 +594,6 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, pgoff_t end, unsigned int nr_pages, struct page **pages); -static inline unsigned find_get_pages(struct address_space *mapping, - pgoff_t *start, unsigned int nr_pages, - struct page **pages) -{ - return find_get_pages_range(mapping, start, (pgoff_t)-1, nr_pages, - pages); -} unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, unsigned int nr_pages, struct page **pages); unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, -- cgit v1.2.3 From ad6c441266dcd50be080a47e1178a1b15369923c Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Tue, 22 Mar 2022 14:39:43 -0700 Subject: mm/gup: remove unused pin_user_pages_locked() This routine was used for a short while, but then the calling code was refactored and the only caller was removed. Link: https://lkml.kernel.org/r/20220204020010.68930-4-jhubbard@nvidia.com Signed-off-by: John Hubbard Reviewed-by: David Hildenbrand Reviewed-by: Jason Gunthorpe Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Reviewed-by: Claudio Imbrenda Cc: Alex Williamson Cc: Andrea Arcangeli Cc: Jason Gunthorpe Cc: Kirill A. Shutemov Cc: Lukas Bulwahn Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5744a3fc4716..d4a2b40066fa 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1918,8 +1918,6 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages, struct vm_area_struct **vmas); long get_user_pages_locked(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, int *locked); -long pin_user_pages_locked(unsigned long start, unsigned long nr_pages, - unsigned int gup_flags, struct page **pages, int *locked); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, -- cgit v1.2.3 From 73fd16d8080f7b1537ba7aa29917f64d6fffa664 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Tue, 22 Mar 2022 14:39:50 -0700 Subject: mm/gup: remove unused get_user_pages_locked() Now that the last caller of get_user_pages_locked() is gone, remove it. Link: https://lkml.kernel.org/r/20220204020010.68930-6-jhubbard@nvidia.com Signed-off-by: John Hubbard Reviewed-by: Jan Kara Reviewed-by: Jason Gunthorpe Reviewed-by: Claudio Imbrenda Reviewed-by: Christoph Hellwig Cc: Alex Williamson Cc: Andrea Arcangeli Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: Kirill A. Shutemov Cc: Lukas Bulwahn Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index d4a2b40066fa..c02a8cc16e4f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1916,8 +1916,6 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, long pin_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas); -long get_user_pages_locked(unsigned long start, unsigned long nr_pages, - unsigned int gup_flags, struct page **pages, int *locked); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, -- cgit v1.2.3 From f7cd16a55837f37b4c3835a2c646023e4d0f0e04 Mon Sep 17 00:00:00 2001 From: Xavier Roche Date: Tue, 22 Mar 2022 14:39:55 -0700 Subject: tmpfs: support for file creation time Various filesystems (including ext4) now support file creation time. This adds such support for tmpfs-based filesystems. Note that using shmem_getattr() on other file types than regular requires that shmem_is_huge() check type, to stop incorrect HPAGE_PMD_SIZE blksize. [hughd@google.com: three tweaks to creation time patch] Link: https://lkml.kernel.org/r/b954973a-b8d1-cab8-63bd-6ea8063de3@google.com Link: https://lkml.kernel.org/r/20220314211150.GA123458@xavier-xps Link: https://lkml.kernel.org/r/b954973a-b8d1-cab8-63bd-6ea8063de3@google.com Link: https://lkml.kernel.org/r/20220211213628.GA1919658@xavier-xps Signed-off-by: Xavier Roche Signed-off-by: Hugh Dickins Tested-by: Jean Delvare Tested-by: Sylvain Bellone Reported-by: Xavier Grand Reviewed-by: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index e65b80ed09e7..ab51d3cd39bd 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -24,6 +24,7 @@ struct shmem_inode_info { struct shared_policy policy; /* NUMA memory alloc policy */ struct simple_xattrs xattrs; /* list of xattrs */ atomic_t stop_eviction; /* hold when working on inode */ + struct timespec64 i_crtime; /* file creation time */ struct inode vfs_inode; }; -- cgit v1.2.3 From a8c49af3be5f0b4e105ef678bcf14ef102c270be Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 22 Mar 2022 14:40:10 -0700 Subject: memcg: add per-memcg total kernel memory stat Currently memcg stats show several types of kernel memory: kernel stack, page tables, sock, vmalloc, and slab. However, there are other allocations with __GFP_ACCOUNT (or supersets such as GFP_KERNEL_ACCOUNT) that are not accounted in any of those stats, a few examples are: - various kvm allocations (e.g. allocated pages to create vcpus) - io_uring - tmp_page in pipes during pipe_write() - bpf ringbuffers - unix sockets Keeping track of the total kernel memory is essential for the ease of migration from cgroup v1 to v2 as there are large discrepancies between v1's kmem.usage_in_bytes and the sum of the available kernel memory stats in v2. Adding separate memcg stats for all __GFP_ACCOUNT kernel allocations is an impractical maintenance burden as there a lot of those all over the kernel code, with more use cases likely to show up in the future. Therefore, add a "kernel" memcg stat that is analogous to kmem page counter, with added benefits such as using rstat infrastructure which aggregates stats more efficiently. Additionally, this provides a lighter alternative in case the legacy kmem is deprecated in the future [yosryahmed@google.com: v2] Link: https://lkml.kernel.org/r/20220203193856.972500-1-yosryahmed@google.com Link: https://lkml.kernel.org/r/20220201200823.3283171-1-yosryahmed@google.com Signed-off-by: Yosry Ahmed Acked-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0abbd685703b..8612d7dd0859 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -34,6 +34,7 @@ enum memcg_stat_item { MEMCG_SOCK, MEMCG_PERCPU_B, MEMCG_VMALLOC, + MEMCG_KMEM, MEMCG_NR_STAT, }; -- cgit v1.2.3 From 486bc7060cb510fa60cb85a013d5ed51ce0fe456 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 22 Mar 2022 14:40:16 -0700 Subject: mm/memcg: retrieve parent memcg from css.parent The parent we get from page_counter is correct, while this is two different hierarchy. Let's retrieve the parent memcg from css.parent just like parent_cs(), blkcg_parent(), etc. Link: https://lkml.kernel.org/r/20220201004643.8391-2-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reviewed-by: Muchun Song Acked-by: Michal Hocko Reviewed-by: Roman Gushchin Reviewed-by: Shakeel Butt Cc: Roman Gushchin Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Yang Shi Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8612d7dd0859..ef4b445392a9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -842,9 +842,7 @@ static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) */ static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) { - if (!memcg->memory.parent) - return NULL; - return mem_cgroup_from_counter(memcg->memory.parent, memory); + return mem_cgroup_from_css(memcg->css.parent); } static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, -- cgit v1.2.3 From 6a6b7b77cc0fdc13f50c66c219c8c05500a8dfce Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 22 Mar 2022 14:40:53 -0700 Subject: mm: list_lru: transpose the array of per-node per-memcg lru lists Patch series "Optimize list lru memory consumption", v6. In our server, we found a suspected memory leak problem. The kmalloc-32 consumes more than 6GB of memory. Other kmem_caches consume less than 2GB memory. After our in-depth analysis, the memory consumption of kmalloc-32 slab cache is the cause of list_lru_one allocation. crash> p memcg_nr_cache_ids memcg_nr_cache_ids = $2 = 24574 memcg_nr_cache_ids is very large and memory consumption of each list_lru can be calculated with the following formula. num_numa_node * memcg_nr_cache_ids * 32 (kmalloc-32) There are 4 numa nodes in our system, so each list_lru consumes ~3MB. crash> list super_blocks | wc -l 952 Every mount will register 2 list lrus, one is for inode, another is for dentry. There are 952 super_blocks. So the total memory is 952 * 2 * 3 MB (~5.6GB). But now the number of memory cgroups is less than 500. So I guess more than 12286 memory cgroups have been created on this machine (I do not know why there are so many cgroups, it may be a user's bug or the user really want to do that). Because memcg_nr_cache_ids has not been reduced to a suitable value. It leads to waste a lot of memory. If we want to reduce memcg_nr_cache_ids, we have to *reboot* the server. This is not what we want. In order to reduce memcg_nr_cache_ids, I had posted a patchset [1] to do this. But this did not fundamentally solve the problem. We currently allocate scope for every memcg to be able to tracked on every superblock instantiated in the system, regardless of whether that superblock is even accessible to that memcg. These huge memcg counts come from container hosts where memcgs are confined to just a small subset of the total number of superblocks that instantiated at any given point in time. For these systems with huge container counts, list_lru does not need the capability of tracking every memcg on every superblock. What it comes down to is that the list_lru is only needed for a given memcg if that memcg is instatiating and freeing objects on a given list_lru. As Dave said, "Which makes me think we should be moving more towards 'add the memcg to the list_lru at the first insert' model rather than 'instantiate all at memcg init time just in case'." This patchset aims to optimize the list lru memory consumption from different aspects. I had done a easy test to show the optimization. I create 10k memory cgroups and mount 10k filesystems in the systems. We use free command to show how many memory does the systems comsumes after this operation (There are 2 numa nodes in the system). +-----------------------+------------------------+ | condition | memory consumption | +-----------------------+------------------------+ | without this patchset | 24464 MB | +-----------------------+------------------------+ | after patch 1 | 21957 MB | <--------+ +-----------------------+------------------------+ | | after patch 10 | 6895 MB | | +-----------------------+------------------------+ | | after patch 12 | 4367 MB | | +-----------------------+------------------------+ | | The more the number of nodes, the more obvious the effect---+ BTW, there was a recent discussion [2] on the same issue. [1] https://lore.kernel.org/all/20210428094949.43579-1-songmuchun@bytedance.com/ [2] https://lore.kernel.org/all/20210405054848.GA1077931@in.ibm.com/ This series not only optimizes the memory usage of list_lru but also simplifies the code. This patch (of 16): The current scheme of maintaining per-node per-memcg lru lists looks like: struct list_lru { struct list_lru_node *node; (for each node) struct list_lru_memcg *memcg_lrus; struct list_lru_one *lru[]; (for each memcg) } By effectively transposing the two-dimension array of list_lru_one's structures (per-node per-memcg => per-memcg per-node) it's possible to save some memory and simplify alloc/dealloc paths. The new scheme looks like: struct list_lru { struct list_lru_memcg *mlrus; struct list_lru_per_memcg *mlru[]; (for each memcg) struct list_lru_one node[0]; (for each node) } Memory savings are coming from not only 'struct rcu_head' but also some pointer arrays used to store the pointer to 'struct list_lru_one'. The array is per node and its size is 8 (a pointer) * num_memcgs. So the total size of the arrays is 8 * num_nodes * memcg_nr_cache_ids. After this patch, the size becomes 8 * memcg_nr_cache_ids. Link: https://lkml.kernel.org/r/20220228122126.37293-1-songmuchun@bytedance.com Link: https://lkml.kernel.org/r/20220228122126.37293-2-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Vladimir Davydov Cc: Shakeel Butt Cc: Yang Shi Cc: Alex Shi Cc: Wei Yang Cc: Dave Chinner Cc: Trond Myklebust Cc: Anna Schumaker Cc: Jaegeuk Kim Cc: Chao Yu Cc: Kari Argillander Cc: Vlastimil Babka Cc: Qi Zheng Cc: Xiongchun Duan Cc: Fam Zheng Cc: Roman Gushchin Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list_lru.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 1b5fceb565df..729a27b6ff53 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -31,10 +31,15 @@ struct list_lru_one { long nr_items; }; +struct list_lru_per_memcg { + /* array of per cgroup per node lists, indexed by node id */ + struct list_lru_one node[0]; +}; + struct list_lru_memcg { - struct rcu_head rcu; + struct rcu_head rcu; /* array of per cgroup lists, indexed by memcg_cache_id */ - struct list_lru_one *lru[]; + struct list_lru_per_memcg *mlru[]; }; struct list_lru_node { @@ -42,11 +47,7 @@ struct list_lru_node { spinlock_t lock; /* global list, used for the root cgroup in cgroup aware lrus */ struct list_lru_one lru; -#ifdef CONFIG_MEMCG_KMEM - /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ - struct list_lru_memcg __rcu *memcg_lrus; -#endif - long nr_items; + long nr_items; } ____cacheline_aligned_in_smp; struct list_lru { @@ -55,6 +56,8 @@ struct list_lru { struct list_head list; int shrinker_id; bool memcg_aware; + /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ + struct list_lru_memcg __rcu *mlrus; #endif }; -- cgit v1.2.3 From 88f2ef73fd66491a2f9a82373d22ca6540f23c62 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 22 Mar 2022 14:40:56 -0700 Subject: mm: introduce kmem_cache_alloc_lru We currently allocate scope for every memcg to be able to tracked on every superblock instantiated in the system, regardless of whether that superblock is even accessible to that memcg. These huge memcg counts come from container hosts where memcgs are confined to just a small subset of the total number of superblocks that instantiated at any given point in time. For these systems with huge container counts, list_lru does not need the capability of tracking every memcg on every superblock. What it comes down to is that adding the memcg to the list_lru at the first insert. So introduce kmem_cache_alloc_lru to allocate objects and its list_lru. In the later patch, we will convert all inode and dentry allocation from kmem_cache_alloc to kmem_cache_alloc_lru. Link: https://lkml.kernel.org/r/20220228122126.37293-3-songmuchun@bytedance.com Signed-off-by: Muchun Song Cc: Alex Shi Cc: Anna Schumaker Cc: Chao Yu Cc: Dave Chinner Cc: Fam Zheng Cc: Jaegeuk Kim Cc: Johannes Weiner Cc: Kari Argillander Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Qi Zheng Cc: Roman Gushchin Cc: Shakeel Butt Cc: Theodore Ts'o Cc: Trond Myklebust Cc: Vladimir Davydov Cc: Vlastimil Babka Cc: Wei Yang Cc: Xiongchun Duan Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list_lru.h | 4 ++++ include/linux/memcontrol.h | 14 ++++++++++++++ include/linux/slab.h | 3 +++ 3 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 729a27b6ff53..ab912c49334f 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -56,6 +56,8 @@ struct list_lru { struct list_head list; int shrinker_id; bool memcg_aware; + /* protects ->mlrus->mlru[i] */ + spinlock_t lock; /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ struct list_lru_memcg __rcu *mlrus; #endif @@ -72,6 +74,8 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware, #define list_lru_init_memcg(lru, shrinker) \ __list_lru_init((lru), true, NULL, shrinker) +int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, + gfp_t gfp); int memcg_update_all_list_lrus(int num_memcgs); void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ef4b445392a9..b636732f0c14 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -524,6 +524,20 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } +static inline struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg) +{ + struct mem_cgroup *memcg; + + rcu_read_lock(); +retry: + memcg = obj_cgroup_memcg(objcg); + if (unlikely(!css_tryget(&memcg->css))) + goto retry; + rcu_read_unlock(); + + return memcg; +} + #ifdef CONFIG_MEMCG_KMEM /* * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set. diff --git a/include/linux/slab.h b/include/linux/slab.h index 5b6193fd8bd9..e6addaf91afd 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -135,6 +135,7 @@ #include +struct list_lru; struct mem_cgroup; /* * struct kmem_cache related prototypes @@ -416,6 +417,8 @@ static __always_inline unsigned int __kmalloc_index(size_t size, void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc; +void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, + gfp_t gfpflags) __assume_slab_alignment __malloc; void kmem_cache_free(struct kmem_cache *s, void *objp); /* -- cgit v1.2.3 From 8b9f3ac5b01db85c6cf74c2c3a71280cc3045c9c Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 22 Mar 2022 14:41:00 -0700 Subject: fs: introduce alloc_inode_sb() to allocate filesystems specific inode The allocated inode cache is supposed to be added to its memcg list_lru which should be allocated as well in advance. That can be done by kmem_cache_alloc_lru() which allocates object and list_lru. The file systems is main user of it. So introduce alloc_inode_sb() to allocate file system specific inodes and set up the inode reclaim context properly. The file system is supposed to use alloc_inode_sb() to allocate inodes. In later patches, we will convert all users to the new API. Link: https://lkml.kernel.org/r/20220228122126.37293-4-songmuchun@bytedance.com Signed-off-by: Muchun Song Reviewed-by: Roman Gushchin Cc: Alex Shi Cc: Anna Schumaker Cc: Chao Yu Cc: Dave Chinner Cc: Fam Zheng Cc: Jaegeuk Kim Cc: Johannes Weiner Cc: Kari Argillander Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Qi Zheng Cc: Shakeel Butt Cc: Theodore Ts'o Cc: Trond Myklebust Cc: Vladimir Davydov Cc: Vlastimil Babka Cc: Wei Yang Cc: Xiongchun Duan Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index ca9445f6cf3d..58a73e59e4c0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -3114,6 +3115,16 @@ extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); extern int file_remove_privs(struct file *); +/* + * This must be used for allocating filesystems specific inodes to set + * up the inode reclaim context correctly. + */ +static inline void * +alloc_inode_sb(struct super_block *sb, struct kmem_cache *cache, gfp_t gfp) +{ + return kmem_cache_alloc_lru(cache, &sb->s_inode_lru, gfp); +} + extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) { -- cgit v1.2.3 From 9bbdc0f324097f72b2354c2f8be4cdffd32679b6 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 22 Mar 2022 14:41:12 -0700 Subject: xarray: use kmem_cache_alloc_lru to allocate xa_node The workingset will add the xa_node to the shadow_nodes list. So the allocation of xa_node should be done by kmem_cache_alloc_lru(). Using xas_set_lru() to pass the list_lru which we want to insert xa_node into to set up the xa_node reclaim context correctly. Link: https://lkml.kernel.org/r/20220228122126.37293-9-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Johannes Weiner Acked-by: Roman Gushchin Cc: Alex Shi Cc: Anna Schumaker Cc: Chao Yu Cc: Dave Chinner Cc: Fam Zheng Cc: Jaegeuk Kim Cc: Kari Argillander Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Qi Zheng Cc: Shakeel Butt Cc: Theodore Ts'o Cc: Trond Myklebust Cc: Vladimir Davydov Cc: Vlastimil Babka Cc: Wei Yang Cc: Xiongchun Duan Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 5 ++++- include/linux/xarray.h | 9 ++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 1d38d9475c4d..3db431276d82 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -334,9 +334,12 @@ void workingset_activation(struct folio *folio); /* Only track the nodes of mappings with shadow entries */ void workingset_update_node(struct xa_node *node); +extern struct list_lru shadow_nodes; #define mapping_set_update(xas, mapping) do { \ - if (!dax_mapping(mapping) && !shmem_mapping(mapping)) \ + if (!dax_mapping(mapping) && !shmem_mapping(mapping)) { \ xas_set_update(xas, workingset_update_node); \ + xas_set_lru(xas, &shadow_nodes); \ + } \ } while (0) /* linux/mm/page_alloc.c */ diff --git a/include/linux/xarray.h b/include/linux/xarray.h index d6d5da6ed735..bb52b786be1b 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1317,6 +1317,7 @@ struct xa_state { struct xa_node *xa_node; struct xa_node *xa_alloc; xa_update_node_t xa_update; + struct list_lru *xa_lru; }; /* @@ -1336,7 +1337,8 @@ struct xa_state { .xa_pad = 0, \ .xa_node = XAS_RESTART, \ .xa_alloc = NULL, \ - .xa_update = NULL \ + .xa_update = NULL, \ + .xa_lru = NULL, \ } /** @@ -1631,6 +1633,11 @@ static inline void xas_set_update(struct xa_state *xas, xa_update_node_t update) xas->xa_update = update; } +static inline void xas_set_lru(struct xa_state *xas, struct list_lru *lru) +{ + xas->xa_lru = lru; +} + /** * xas_next_entry() - Advance iterator to next present entry. * @xas: XArray operation state. -- cgit v1.2.3 From 5abc1e37afa0335c52608d640fd30910b2eeda21 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 22 Mar 2022 14:41:19 -0700 Subject: mm: list_lru: allocate list_lru_one only when needed In our server, we found a suspected memory leak problem. The kmalloc-32 consumes more than 6GB of memory. Other kmem_caches consume less than 2GB memory. After our in-depth analysis, the memory consumption of kmalloc-32 slab cache is the cause of list_lru_one allocation. crash> p memcg_nr_cache_ids memcg_nr_cache_ids = $2 = 24574 memcg_nr_cache_ids is very large and memory consumption of each list_lru can be calculated with the following formula. num_numa_node * memcg_nr_cache_ids * 32 (kmalloc-32) There are 4 numa nodes in our system, so each list_lru consumes ~3MB. crash> list super_blocks | wc -l 952 Every mount will register 2 list lrus, one is for inode, another is for dentry. There are 952 super_blocks. So the total memory is 952 * 2 * 3 MB (~5.6GB). But the number of memory cgroup is less than 500. So I guess more than 12286 containers have been deployed on this machine (I do not know why there are so many containers, it may be a user's bug or the user really want to do that). And memcg_nr_cache_ids has not been reduced to a suitable value. This can waste a lot of memory. Now the infrastructure for dynamic list_lru_one allocation is ready, so remove statically allocated memory code to save memory. Link: https://lkml.kernel.org/r/20220228122126.37293-11-songmuchun@bytedance.com Signed-off-by: Muchun Song Cc: Alex Shi Cc: Anna Schumaker Cc: Chao Yu Cc: Dave Chinner Cc: Fam Zheng Cc: Jaegeuk Kim Cc: Johannes Weiner Cc: Kari Argillander Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Qi Zheng Cc: Roman Gushchin Cc: Shakeel Butt Cc: Theodore Ts'o Cc: Trond Myklebust Cc: Vladimir Davydov Cc: Vlastimil Babka Cc: Wei Yang Cc: Xiongchun Duan Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list_lru.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index ab912c49334f..c36db6dc2a65 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -32,14 +32,15 @@ struct list_lru_one { }; struct list_lru_per_memcg { + struct rcu_head rcu; /* array of per cgroup per node lists, indexed by node id */ - struct list_lru_one node[0]; + struct list_lru_one node[]; }; struct list_lru_memcg { struct rcu_head rcu; /* array of per cgroup lists, indexed by memcg_cache_id */ - struct list_lru_per_memcg *mlru[]; + struct list_lru_per_memcg __rcu *mlru[]; }; struct list_lru_node { @@ -77,7 +78,7 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware, int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, gfp_t gfp); int memcg_update_all_list_lrus(int num_memcgs); -void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg); +void memcg_drain_all_list_lrus(struct mem_cgroup *src, struct mem_cgroup *dst); /** * list_lru_add: add an element to the lru list's tail -- cgit v1.2.3 From 1f391eb270791359ee79031945dbe3afeaec6ce3 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 22 Mar 2022 14:41:22 -0700 Subject: mm: list_lru: rename memcg_drain_all_list_lrus to memcg_reparent_list_lrus The purpose of the memcg_drain_all_list_lrus() is list_lrus reparenting. It is very similar to memcg_reparent_objcgs(). Rename it to memcg_reparent_list_lrus() so that the name can more consistent with memcg_reparent_objcgs(). Link: https://lkml.kernel.org/r/20220228122126.37293-12-songmuchun@bytedance.com Signed-off-by: Muchun Song Cc: Alex Shi Cc: Anna Schumaker Cc: Chao Yu Cc: Dave Chinner Cc: Fam Zheng Cc: Jaegeuk Kim Cc: Johannes Weiner Cc: Kari Argillander Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Qi Zheng Cc: Roman Gushchin Cc: Shakeel Butt Cc: Theodore Ts'o Cc: Trond Myklebust Cc: Vladimir Davydov Cc: Vlastimil Babka Cc: Wei Yang Cc: Xiongchun Duan Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list_lru.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index c36db6dc2a65..4b00fd8cb373 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -78,7 +78,7 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware, int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, gfp_t gfp); int memcg_update_all_list_lrus(int num_memcgs); -void memcg_drain_all_list_lrus(struct mem_cgroup *src, struct mem_cgroup *dst); +void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent); /** * list_lru_add: add an element to the lru list's tail -- cgit v1.2.3 From bbca91cca9a902de2e9907370e9c1e0a3d1aab0f Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 22 Mar 2022 14:41:25 -0700 Subject: mm: list_lru: replace linear array with xarray If we run 10k containers in the system, the size of the list_lru_memcg->lrus can be ~96KB per list_lru. When we decrease the number containers, the size of the array will not be shrinked. It is not scalable. The xarray is a good choice for this case. We can save a lot of memory when there are tens of thousands continers in the system. If we use xarray, we also can remove the logic code of resizing array, which can simplify the code. [akpm@linux-foundation.org: remove unused local] Link: https://lkml.kernel.org/r/20220228122126.37293-13-songmuchun@bytedance.com Signed-off-by: Muchun Song Cc: Alex Shi Cc: Anna Schumaker Cc: Chao Yu Cc: Dave Chinner Cc: Fam Zheng Cc: Jaegeuk Kim Cc: Johannes Weiner Cc: Kari Argillander Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Qi Zheng Cc: Roman Gushchin Cc: Shakeel Butt Cc: Theodore Ts'o Cc: Trond Myklebust Cc: Vladimir Davydov Cc: Vlastimil Babka Cc: Wei Yang Cc: Xiongchun Duan Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list_lru.h | 13 ++----------- include/linux/memcontrol.h | 23 ----------------------- 2 files changed, 2 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 4b00fd8cb373..572c263561ac 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -11,6 +11,7 @@ #include #include #include +#include struct mem_cgroup; @@ -37,12 +38,6 @@ struct list_lru_per_memcg { struct list_lru_one node[]; }; -struct list_lru_memcg { - struct rcu_head rcu; - /* array of per cgroup lists, indexed by memcg_cache_id */ - struct list_lru_per_memcg __rcu *mlru[]; -}; - struct list_lru_node { /* protects all lists on the node, including per cgroup */ spinlock_t lock; @@ -57,10 +52,7 @@ struct list_lru { struct list_head list; int shrinker_id; bool memcg_aware; - /* protects ->mlrus->mlru[i] */ - spinlock_t lock; - /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ - struct list_lru_memcg __rcu *mlrus; + struct xarray xa; #endif }; @@ -77,7 +69,6 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware, int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, gfp_t gfp); -int memcg_update_all_list_lrus(int num_memcgs); void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent); /** diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b636732f0c14..066b7a3b8a9e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1685,18 +1685,6 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); extern struct static_key_false memcg_kmem_enabled_key; -extern int memcg_nr_cache_ids; -void memcg_get_cache_ids(void); -void memcg_put_cache_ids(void); - -/* - * Helper macro to loop through all memcg-specific caches. Callers must still - * check if the cache is valid (it is either valid or NULL). - * the slab_mutex must be held when looping through those caches - */ -#define for_each_memcg_cache_index(_idx) \ - for ((_idx) = 0; (_idx) < memcg_nr_cache_ids; (_idx)++) - static inline bool memcg_kmem_enabled(void) { return static_branch_likely(&memcg_kmem_enabled_key); @@ -1753,9 +1741,6 @@ static inline void __memcg_kmem_uncharge_page(struct page *page, int order) { } -#define for_each_memcg_cache_index(_idx) \ - for (; NULL; ) - static inline bool memcg_kmem_enabled(void) { return false; @@ -1766,14 +1751,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return -1; } -static inline void memcg_get_cache_ids(void) -{ -} - -static inline void memcg_put_cache_ids(void) -{ -} - static inline struct mem_cgroup *mem_cgroup_from_obj(void *p) { return NULL; -- cgit v1.2.3 From d70110704d2d52a65a9fe43be409d8c3acce79fe Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 22 Mar 2022 14:41:35 -0700 Subject: mm: list_lru: rename list_lru_per_memcg to list_lru_memcg The name of list_lru_memcg was occupied before and became free since last commit. Rename list_lru_per_memcg to list_lru_memcg since the name is brief. Link: https://lkml.kernel.org/r/20220228122126.37293-16-songmuchun@bytedance.com Signed-off-by: Muchun Song Cc: Alex Shi Cc: Anna Schumaker Cc: Chao Yu Cc: Dave Chinner Cc: Fam Zheng Cc: Jaegeuk Kim Cc: Johannes Weiner Cc: Kari Argillander Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Qi Zheng Cc: Roman Gushchin Cc: Shakeel Butt Cc: Theodore Ts'o Cc: Trond Myklebust Cc: Vladimir Davydov Cc: Vlastimil Babka Cc: Wei Yang Cc: Xiongchun Duan Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list_lru.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 572c263561ac..b35968ee9fb5 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -32,7 +32,7 @@ struct list_lru_one { long nr_items; }; -struct list_lru_per_memcg { +struct list_lru_memcg { struct rcu_head rcu; /* array of per cgroup per node lists, indexed by node id */ struct list_lru_one node[]; -- cgit v1.2.3 From 7c52f65de40ff0e44f821559eb61931d368a4c48 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 22 Mar 2022 14:41:38 -0700 Subject: mm: memcontrol: rename memcg_cache_id to memcg_kmem_id The memcg_cache_id() introduced by commit 2633d7a02823 ("slab/slub: consider a memcg parameter in kmem_create_cache") is used to index in the kmem_cache->memcg_params->memcg_caches array. Since kmem_cache->memcg_params.memcg_caches has been removed by commit 9855609bde03 ("mm: memcg/slab: use a single set of kmem_caches for all accounted allocations"). So the name does not need to reflect cache related. Just rename it to memcg_kmem_id. And it can reflect kmem related. Link: https://lkml.kernel.org/r/20220228122126.37293-17-songmuchun@bytedance.com Signed-off-by: Muchun Song Cc: Alex Shi Cc: Anna Schumaker Cc: Chao Yu Cc: Dave Chinner Cc: Fam Zheng Cc: Jaegeuk Kim Cc: Johannes Weiner Cc: Kari Argillander Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Qi Zheng Cc: Roman Gushchin Cc: Shakeel Butt Cc: Theodore Ts'o Cc: Trond Myklebust Cc: Vladimir Davydov Cc: Vlastimil Babka Cc: Wei Yang Cc: Xiongchun Duan Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 066b7a3b8a9e..a68dce3873fc 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1708,7 +1708,7 @@ static inline void memcg_kmem_uncharge_page(struct page *page, int order) * A helper for accessing memcg's kmem_id, used for getting * corresponding LRU lists. */ -static inline int memcg_cache_id(struct mem_cgroup *memcg) +static inline int memcg_kmem_id(struct mem_cgroup *memcg) { return memcg ? memcg->kmemcg_id : -1; } @@ -1746,7 +1746,7 @@ static inline bool memcg_kmem_enabled(void) return false; } -static inline int memcg_cache_id(struct mem_cgroup *memcg) +static inline int memcg_kmem_id(struct mem_cgroup *memcg) { return -1; } -- cgit v1.2.3 From 16785bd7743104d57257a455001172b75afa7614 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 22 Mar 2022 14:41:47 -0700 Subject: mm: merge pte_mkhuge() call into arch_make_huge_pte() Each call into pte_mkhuge() is invariably followed by arch_make_huge_pte(). Instead arch_make_huge_pte() can accommodate pte_mkhuge() at the beginning. This updates generic fallback stub for arch_make_huge_pte() and available platforms definitions. This makes huge pte creation much cleaner and easier to follow. Link: https://lkml.kernel.org/r/1643860669-26307-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: Christophe Leroy Acked-by: Mike Kravetz Acked-by: Catalin Marinas Cc: Will Deacon Cc: Michael Ellerman Cc: Paul Mackerras Cc: "David S. Miller" Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d1897a69c540..52c462390aee 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -754,7 +754,7 @@ static inline void arch_clear_hugepage_flags(struct page *page) { } static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) { - return entry; + return pte_mkhuge(entry); } #endif -- cgit v1.2.3 From ff11a7ce1f0f8c1e7870de26860024b4ddbf5755 Mon Sep 17 00:00:00 2001 From: Bang Li Date: Tue, 22 Mar 2022 14:43:02 -0700 Subject: mm/vmalloc: fix comments about vmap_area struct The vmap_area_root should be in the "busy" tree and the free_vmap_area_root should be in the "free" tree. Link: https://lkml.kernel.org/r/20220305011510.33596-1-libang.linuxer@gmail.com Fixes: 688fcbfc06e4 ("mm/vmalloc: modify struct vmap_area to reduce its size") Signed-off-by: Bang Li Reviewed-by: Uladzislau Rezki (Sony) Cc: Pengfei Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 880227b9f044..05065915edd7 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -80,8 +80,8 @@ struct vmap_area { /* * The following two variables can be packed, because * a vmap_area object can be either: - * 1) in "free" tree (root is vmap_area_root) - * 2) or "busy" tree (root is free_vmap_area_root) + * 1) in "free" tree (root is free_vmap_area_root) + * 2) or "busy" tree (root is vmap_area_root) */ union { unsigned long subtree_max_size; /* in "free" tree */ -- cgit v1.2.3 From 1dd214b8f21ca46d5431be9b2db8513c59e07a26 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Tue, 22 Mar 2022 14:43:05 -0700 Subject: mm: page_alloc: avoid merging non-fallbackable pageblocks with others This is done in addition to MIGRATE_ISOLATE pageblock merge avoidance. It prepares for the upcoming removal of the MAX_ORDER-1 alignment requirement for CMA and alloc_contig_range(). MIGRATE_HIGHATOMIC should not merge with other migratetypes like MIGRATE_ISOLATE and MIGRARTE_CMA[1], so this commit prevents that too. Remove MIGRATE_CMA and MIGRATE_ISOLATE from fallbacks list, since they are never used. [1] https://lore.kernel.org/linux-mm/20211130100853.GP3366@techsingularity.net/ Link: https://lkml.kernel.org/r/20220124175957.1261961-1-zi.yan@sent.com Signed-off-by: Zi Yan Acked-by: Mel Gorman Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: Mike Rapoport Reviewed-by: Oscar Salvador Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index aed44e9b5d89..71b77aab748d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -83,6 +83,17 @@ static inline bool is_migrate_movable(int mt) return is_migrate_cma(mt) || mt == MIGRATE_MOVABLE; } +/* + * Check whether a migratetype can be merged with another migratetype. + * + * It is only mergeable when it can fall back to other migratetypes for + * allocation. See fallbacks[MIGRATE_TYPES][3] in page_alloc.c. + */ +static inline bool migratetype_is_mergeable(int mt) +{ + return mt < MIGRATE_PCPTYPES; +} + #define for_each_migratetype_order(order, type) \ for (order = 0; order < MAX_ORDER; order++) \ for (type = 0; type < MIGRATE_TYPES; type++) -- cgit v1.2.3 From 7f37e49cbd60ef71b82d25cd55b039a65d06387c Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 22 Mar 2022 14:43:11 -0700 Subject: mm/mmzone.h: remove unused macros Remove pgdat_page_nr, nid_page_nr and NODE_MEM_MAP. They are unused now. Link: https://lkml.kernel.org/r/20220127093210.62293-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reviewed-by: David Hildenbrand Reviewed-by: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 71b77aab748d..c9e6a50109b9 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -931,12 +931,6 @@ typedef struct pglist_data { #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) -#ifdef CONFIG_FLATMEM -#define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr)) -#else -#define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr)) -#endif -#define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) @@ -1112,7 +1106,6 @@ static inline struct pglist_data *NODE_DATA(int nid) { return &contig_page_data; } -#define NODE_MEM_MAP(nid) mem_map #else /* CONFIG_NUMA */ -- cgit v1.2.3 From e16faf26780fc0c8dd693ea9ee8420a7706cb2f5 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 22 Mar 2022 14:43:17 -0700 Subject: cma: factor out minimum alignment requirement Patch series "mm: enforce pageblock_order < MAX_ORDER". Having pageblock_order >= MAX_ORDER seems to be able to happen in corner cases and some parts of the kernel are not prepared for it. For example, Aneesh has shown [1] that such kernels can be compiled on ppc64 with 64k base pages by setting FORCE_MAX_ZONEORDER=8, which will run into a WARN_ON_ONCE(order >= MAX_ORDER) in comapction code right during boot. We can get pageblock_order >= MAX_ORDER when the default hugetlb size is bigger than the maximum allocation granularity of the buddy, in which case we are no longer talking about huge pages but instead gigantic pages. Having pageblock_order >= MAX_ORDER can only make alloc_contig_range() of such gigantic pages more likely to succeed. Reliable use of gigantic pages either requires boot time allcoation or CMA, no need to overcomplicate some places in the kernel to optimize for corner cases that are broken in other areas of the kernel. This patch (of 2): Let's enforce pageblock_order < MAX_ORDER and simplify. Especially patch #1 can be regarded a cleanup before: [PATCH v5 0/6] Use pageblock_order for cma and alloc_contig_range alignment. [2] [1] https://lkml.kernel.org/r/87r189a2ks.fsf@linux.ibm.com [2] https://lkml.kernel.org/r/20220211164135.1803616-1-zi.yan@sent.com Link: https://lkml.kernel.org/r/20220214174132.219303-2-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Acked-by: Rob Herring Cc: Aneesh Kumar K.V Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Frank Rowand Cc: Michael S. Tsirkin Cc: Christoph Hellwig Cc: Marek Szyprowski Cc: Robin Murphy Cc: Minchan Kim Cc: Vlastimil Babka Cc: John Garry via iommu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cma.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/cma.h b/include/linux/cma.h index bd801023504b..75fe188ec4a1 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -20,6 +20,15 @@ #define CMA_MAX_NAME 64 +/* + * TODO: once the buddy -- especially pageblock merging and alloc_contig_range() + * -- can deal with only some pageblocks of a higher-order page being + * MIGRATE_CMA, we can use pageblock_nr_pages. + */ +#define CMA_MIN_ALIGNMENT_PAGES max_t(phys_addr_t, MAX_ORDER_NR_PAGES, \ + pageblock_nr_pages) +#define CMA_MIN_ALIGNMENT_BYTES (PAGE_SIZE * CMA_MIN_ALIGNMENT_PAGES) + struct cma; extern unsigned long totalcma_pages; -- cgit v1.2.3 From b3d40a2b6d10c9d0424d2b398bf962fb6adad87e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 22 Mar 2022 14:43:20 -0700 Subject: mm: enforce pageblock_order < MAX_ORDER Some places in the kernel don't really expect pageblock_order >= MAX_ORDER, and it looks like this is only possible in corner cases: 1) CONFIG_DEFERRED_STRUCT_PAGE_INIT we'll end up freeing pageblock_order pages via __free_pages_core(), which cannot possibly work. 2) find_zone_movable_pfns_for_nodes() will roundup the ZONE_MOVABLE start PFN to MAX_ORDER_NR_PAGES. Consequently with a bigger pageblock_order, we could have a single pageblock partially managed by two zones. 3) compaction code runs into __fragmentation_index() with order >= MAX_ORDER, when checking WARN_ON_ONCE(order >= MAX_ORDER). [1] 4) mm/page_reporting.c won't be reporting any pages with default page_reporting_order == pageblock_order, as we'll be skipping the reporting loop inside page_reporting_process_zone(). 5) __rmqueue_fallback() will never be able to steal with ALLOC_NOFRAGMENT. pageblock_order >= MAX_ORDER is weird either way: it's a pure optimization for making alloc_contig_range(), as used for allcoation of gigantic pages, a little more reliable to succeed. However, if there is demand for somewhat reliable allocation of gigantic pages, affected setups should be using CMA or boottime allocations instead. So let's make sure that pageblock_order < MAX_ORDER and simplify. [1] https://lkml.kernel.org/r/87r189a2ks.fsf@linux.ibm.com Link: https://lkml.kernel.org/r/20220214174132.219303-3-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Cc: Aneesh Kumar K.V Cc: Benjamin Herrenschmidt Cc: Christoph Hellwig Cc: Frank Rowand Cc: John Garry via iommu Cc: Marek Szyprowski Cc: Michael Ellerman Cc: Michael S. Tsirkin Cc: Minchan Kim Cc: Paul Mackerras Cc: Rob Herring Cc: Robin Murphy Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cma.h | 3 +-- include/linux/pageblock-flags.h | 7 +++++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/cma.h b/include/linux/cma.h index 75fe188ec4a1..b1ba94f1cc9c 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -25,8 +25,7 @@ * -- can deal with only some pageblocks of a higher-order page being * MIGRATE_CMA, we can use pageblock_nr_pages. */ -#define CMA_MIN_ALIGNMENT_PAGES max_t(phys_addr_t, MAX_ORDER_NR_PAGES, \ - pageblock_nr_pages) +#define CMA_MIN_ALIGNMENT_PAGES MAX_ORDER_NR_PAGES #define CMA_MIN_ALIGNMENT_BYTES (PAGE_SIZE * CMA_MIN_ALIGNMENT_PAGES) struct cma; diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 973fd731a520..83c7248053a1 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -37,8 +37,11 @@ extern unsigned int pageblock_order; #else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ -/* Huge pages are a constant size */ -#define pageblock_order HUGETLB_PAGE_ORDER +/* + * Huge pages are a constant size, but don't exceed the maximum allocation + * granularity. + */ +#define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_ORDER - 1) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ -- cgit v1.2.3 From 1ca75fa7f19d694c58af681fa023295072b03120 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 22 Mar 2022 14:43:51 -0700 Subject: arch/x86/mm/numa: Do not initialize nodes twice On x86, prior to ("mm: handle uninitialized numa nodes gracecully"), NUMA nodes could be allocated at three different places. - numa_register_memblks - init_cpu_to_node - init_gi_nodes All these calls happen at setup_arch, and have the following order: setup_arch ... x86_numa_init numa_init numa_register_memblks ... init_cpu_to_node init_memory_less_node alloc_node_data free_area_init_memoryless_node init_gi_nodes init_memory_less_node alloc_node_data free_area_init_memoryless_node numa_register_memblks() is only interested in those nodes which have memory, so it skips over any memoryless node it founds. Later on, when we have read ACPI's SRAT table, we call init_cpu_to_node() and init_gi_nodes(), which initialize any memoryless node we might have that have either CPU or Initiator affinity, meaning we allocate pg_data_t struct for them and we mark them as ONLINE. So far so good, but the thing is that after ("mm: handle uninitialized numa nodes gracefully"), we allocate all possible NUMA nodes in free_area_init(), meaning we have a picture like the following: setup_arch x86_numa_init numa_init numa_register_memblks <-- allocate non-memoryless node x86_init.paging.pagetable_init ... free_area_init free_area_init_memoryless <-- allocate memoryless node init_cpu_to_node alloc_node_data <-- allocate memoryless node with CPU free_area_init_memoryless_node init_gi_nodes alloc_node_data <-- allocate memoryless node with Initiator free_area_init_memoryless_node free_area_init() already allocates all possible NUMA nodes, but init_cpu_to_node() and init_gi_nodes() are clueless about that, so they go ahead and allocate a new pg_data_t struct without checking anything, meaning we end up allocating twice. It should be mad clear that this only happens in the case where memoryless NUMA node happens to have a CPU/Initiator affinity. So get rid of init_memory_less_node() and just set the node online. Note that setting the node online is needed, otherwise we choke down the chain when bringup_nonboot_cpus() ends up calling __try_online_node()->register_one_node()->... and we blow up in bus_add_device(). As can be seen here: BUG: kernel NULL pointer dereference, address: 0000000000000060 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.17.0-rc4-1-default+ #45 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/4 RIP: 0010:bus_add_device+0x5a/0x140 Code: 8b 74 24 20 48 89 df e8 84 96 ff ff 85 c0 89 c5 75 38 48 8b 53 50 48 85 d2 0f 84 bb 00 004 RSP: 0000:ffffc9000022bd10 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff888100987400 RCX: ffff8881003e4e19 RDX: ffff8881009a5e00 RSI: ffff888100987400 RDI: ffff888100987400 RBP: 0000000000000000 R08: ffff8881003e4e18 R09: ffff8881003e4c98 R10: 0000000000000000 R11: ffff888100402bc0 R12: ffffffff822ceba0 R13: 0000000000000000 R14: ffff888100987400 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88853fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000060 CR3: 000000000200a001 CR4: 00000000001706b0 Call Trace: device_add+0x4c0/0x910 __register_one_node+0x97/0x2d0 __try_online_node+0x85/0xc0 try_online_node+0x25/0x40 cpu_up+0x4f/0x100 bringup_nonboot_cpus+0x4f/0x60 smp_init+0x26/0x79 kernel_init_freeable+0x130/0x2f1 kernel_init+0x17/0x150 ret_from_fork+0x22/0x30 The reason is simple, by the time bringup_nonboot_cpus() gets called, we did not register the node_subsys bus yet, so we crash when bus_add_device() tries to dereference bus()->p. The following shows the order of the calls: kernel_init_freeable smp_init bringup_nonboot_cpus ... bus_add_device() <- we did not register node_subsys yet do_basic_setup do_initcalls postcore_initcall(register_node_type); register_node_type subsys_system_register subsys_register bus_register <- register node_subsys bus Why setting the node online saves us then? Well, simply because __try_online_node() backs off when the node is online, meaning we do not end up calling register_one_node() in the first place. This is subtle, broken and deserves a deep analysis and thought about how to put this into shape, but for now let us have this easy fix for the leaking memory issue. [osalvador@suse.de: add comments] Link: https://lkml.kernel.org/r/20220221142649.3457-1-osalvador@suse.de Link: https://lkml.kernel.org/r/20220218224302.5282-2-osalvador@suse.de Fixes: da4490c958ad ("mm: handle uninitialized numa nodes gracefully") Signed-off-by: Oscar Salvador Acked-by: Michal Hocko Cc: David Hildenbrand Cc: Rafael Aquini Cc: Dave Hansen Cc: Wei Yang Cc: Dennis Zhou Cc: Alexey Makhalov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c02a8cc16e4f..d0978235775f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2449,7 +2449,6 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) } extern void __init pagecache_init(void); -extern void __init free_area_init_memoryless_node(int nid); extern void free_initmem(void); /* -- cgit v1.2.3 From 888af2701db79b9b27c7e37f9ede528a5ca53b76 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 22 Mar 2022 14:44:44 -0700 Subject: mm/memory-failure.c: fix race with changing page compound again Patch series "A few fixup patches for memory failure", v2. This series contains a few patches to fix the race with changing page compound page, make non-LRU movable pages unhandlable and so on. More details can be found in the respective changelogs. There is a race window where we got the compound_head, the hugetlb page could be freed to buddy, or even changed to another compound page just before we try to get hwpoison page. Think about the below race window: CPU 1 CPU 2 memory_failure_hugetlb struct page *head = compound_head(p); hugetlb page might be freed to buddy, or even changed to another compound page. get_hwpoison_page -- page is not what we want now... If this race happens, just bail out. Also MF_MSG_DIFFERENT_PAGE_SIZE is introduced to record this event. [akpm@linux-foundation.org: s@/**@/*@, per Naoya Horiguchi] Link: https://lkml.kernel.org/r/20220312074613.4798-1-linmiaohe@huawei.com Link: https://lkml.kernel.org/r/20220312074613.4798-2-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Acked-by: Naoya Horiguchi Cc: Tony Luck Cc: Borislav Petkov Cc: Mike Kravetz Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + include/ras/ras_event.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index d0978235775f..45a449e8c209 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3239,6 +3239,7 @@ enum mf_action_page_type { MF_MSG_BUDDY, MF_MSG_DAX, MF_MSG_UNSPLIT_THP, + MF_MSG_DIFFERENT_PAGE_SIZE, MF_MSG_UNKNOWN, }; diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index d0337a41141c..1e694fd239b9 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -374,6 +374,7 @@ TRACE_EVENT(aer_event, EM ( MF_MSG_BUDDY, "free buddy page" ) \ EM ( MF_MSG_DAX, "dax page" ) \ EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \ + EM ( MF_MSG_DIFFERENT_PAGE_SIZE, "different page size" ) \ EMe ( MF_MSG_UNKNOWN, "unknown page" ) /* -- cgit v1.2.3 From 1e7a8181640a620300e98e22223ca3445b349840 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 22 Mar 2022 14:44:53 -0700 Subject: mm, fault-injection: declare should_fail_alloc_page() The mm/ directory can almost fully be built with W=1, which would help in local development. One remaining issue is missing prototype for should_fail_alloc_page(). Thus add it next to the should_failslab() prototype. Note the previous attempt by commit f7173090033c ("mm/page_alloc: make should_fail_alloc_page() static") had to be reverted by commit 54aa386661fe as it caused an unresolved symbol error with CONFIG_DEBUG_INFO_BTF=y Link: https://lkml.kernel.org/r/20220314165724.16071-1-vbabka@suse.cz Signed-off-by: Vlastimil Babka Cc: Mel Gorman Cc: Matthew Wilcox Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fault-inject.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index e525f6957c49..2d04f6448cde 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -64,6 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name, struct kmem_cache; +bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order); + int should_failslab(struct kmem_cache *s, gfp_t gfpflags); #ifdef CONFIG_FAILSLAB extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags); -- cgit v1.2.3 From e7d324850bfcb30df563d144c0363cc44595277d Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 22 Mar 2022 14:45:00 -0700 Subject: mm: hugetlb: free the 2nd vmemmap page associated with each HugeTLB page Patch series "Free the 2nd vmemmap page associated with each HugeTLB page", v7. This series can minimize the overhead of struct page for 2MB HugeTLB pages significantly. It further reduces the overhead of struct page by 12.5% for a 2MB HugeTLB compared to the previous approach, which means 2GB per 1TB HugeTLB. It is a nice gain. Comments and reviews are welcome. Thanks. The main implementation and details can refer to the commit log of patch 1. In this series, I have changed the following four helpers, the following table shows the impact of the overhead of those helpers. +------------------+-----------------------+ | APIs | head page | tail page | +------------------+-----------+-----------+ | PageHead() | Y | N | +------------------+-----------+-----------+ | PageTail() | Y | N | +------------------+-----------+-----------+ | PageCompound() | N | N | +------------------+-----------+-----------+ | compound_head() | Y | N | +------------------+-----------+-----------+ Y: Overhead is increased. N: Overhead is _NOT_ increased. It shows that the overhead of those helpers on a tail page don't change between "hugetlb_free_vmemmap=on" and "hugetlb_free_vmemmap=off". But the overhead on a head page will be increased when "hugetlb_free_vmemmap=on" (except PageCompound()). So I believe that Matthew Wilcox's folio series will help with this. The users of PageHead() and PageTail() are much less than compound_head() and most users of PageTail() are VM_BUG_ON(), so I have done some tests about the overhead of compound_head() on head pages. I have tested the overhead of calling compound_head() on a head page, which is 2.11ns (Measure the call time of 10 million times compound_head(), and then average). For a head page whose address is not aligned with PAGE_SIZE or a non-compound page, the overhead of compound_head() is 2.54ns which is increased by 20%. For a head page whose address is aligned with PAGE_SIZE, the overhead of compound_head() is 2.97ns which is increased by 40%. Most pages are the former. I do not think the overhead is significant since the overhead of compound_head() itself is low. This patch (of 5): This patch minimizes the overhead of struct page for 2MB HugeTLB pages significantly. It further reduces the overhead of struct page by 12.5% for a 2MB HugeTLB compared to the previous approach, which means 2GB per 1TB HugeTLB (2MB type). After the feature of "Free sonme vmemmap pages of HugeTLB page" is enabled, the mapping of the vmemmap addresses associated with a 2MB HugeTLB page becomes the figure below. HugeTLB struct pages(8 pages) page frame(8 pages) +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+---> PG_head | | | 0 | -------------> | 0 | | | +-----------+ +-----------+ | | | 1 | -------------> | 1 | | | +-----------+ +-----------+ | | | 2 | ----------------^ ^ ^ ^ ^ ^ | | +-----------+ | | | | | | | | 3 | ------------------+ | | | | | | +-----------+ | | | | | | | 4 | --------------------+ | | | | 2MB | +-----------+ | | | | | | 5 | ----------------------+ | | | | +-----------+ | | | | | 6 | ------------------------+ | | | +-----------+ | | | | 7 | --------------------------+ | | +-----------+ | | | | | | +-----------+ As we can see, the 2nd vmemmap page frame (indexed by 1) is reused and remaped. However, the 2nd vmemmap page frame is also can be freed to the buddy allocator, then we can change the mapping from the figure above to the figure below. HugeTLB struct pages(8 pages) page frame(8 pages) +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+---> PG_head | | | 0 | -------------> | 0 | | | +-----------+ +-----------+ | | | 1 | ---------------^ ^ ^ ^ ^ ^ ^ | | +-----------+ | | | | | | | | | 2 | -----------------+ | | | | | | | +-----------+ | | | | | | | | 3 | -------------------+ | | | | | | +-----------+ | | | | | | | 4 | ---------------------+ | | | | 2MB | +-----------+ | | | | | | 5 | -----------------------+ | | | | +-----------+ | | | | | 6 | -------------------------+ | | | +-----------+ | | | | 7 | ---------------------------+ | | +-----------+ | | | | | | +-----------+ After we do this, all tail vmemmap pages (1-7) are mapped to the head vmemmap page frame (0). In other words, there are more than one page struct with PG_head associated with each HugeTLB page. We __know__ that there is only one head page struct, the tail page structs with PG_head are fake head page structs. We need an approach to distinguish between those two different types of page structs so that compound_head(), PageHead() and PageTail() can work properly if the parameter is the tail page struct but with PG_head. The following code snippet describes how to distinguish between real and fake head page struct. if (test_bit(PG_head, &page->flags)) { unsigned long head = READ_ONCE(page[1].compound_head); if (head & 1) { if (head == (unsigned long)page + 1) ==> head page struct else ==> tail page struct } else ==> head page struct } We can safely access the field of the @page[1] with PG_head because the @page is a compound page composed with at least two contiguous pages. [songmuchun@bytedance.com: restore lost comment changes] Link: https://lkml.kernel.org/r/20211101031651.75851-1-songmuchun@bytedance.com Link: https://lkml.kernel.org/r/20211101031651.75851-2-songmuchun@bytedance.com Signed-off-by: Muchun Song Reviewed-by: Barry Song Cc: Mike Kravetz Cc: Oscar Salvador Cc: Michal Hocko Cc: David Hildenbrand Cc: Chen Huang Cc: Bodeddula Balasubramaniam Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Xiongchun Duan Cc: Fam Zheng Cc: Qi Zheng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 78 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1c3b6e5c8bfd..111e453f23d2 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -190,13 +190,69 @@ enum pageflags { #ifndef __GENERATING_BOUNDS_H +#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP +extern bool hugetlb_free_vmemmap_enabled; + +/* + * If the feature of freeing some vmemmap pages associated with each HugeTLB + * page is enabled, the head vmemmap page frame is reused and all of the tail + * vmemmap addresses map to the head vmemmap page frame (furture details can + * refer to the figure at the head of the mm/hugetlb_vmemmap.c). In other + * words, there are more than one page struct with PG_head associated with each + * HugeTLB page. We __know__ that there is only one head page struct, the tail + * page structs with PG_head are fake head page structs. We need an approach + * to distinguish between those two different types of page structs so that + * compound_head() can return the real head page struct when the parameter is + * the tail page struct but with PG_head. + * + * The page_fixed_fake_head() returns the real head page struct if the @page is + * fake page head, otherwise, returns @page which can either be a true page + * head or tail. + */ +static __always_inline const struct page *page_fixed_fake_head(const struct page *page) +{ + if (!hugetlb_free_vmemmap_enabled) + return page; + + /* + * Only addresses aligned with PAGE_SIZE of struct page may be fake head + * struct page. The alignment check aims to avoid access the fields ( + * e.g. compound_head) of the @page[1]. It can avoid touch a (possibly) + * cold cacheline in some cases. + */ + if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) && + test_bit(PG_head, &page->flags)) { + /* + * We can safely access the field of the @page[1] with PG_head + * because the @page is a compound page composed with at least + * two contiguous pages. + */ + unsigned long head = READ_ONCE(page[1].compound_head); + + if (likely(head & 1)) + return (const struct page *)(head - 1); + } + return page; +} +#else +static inline const struct page *page_fixed_fake_head(const struct page *page) +{ + return page; +} +#endif + +static __always_inline int page_is_fake_head(struct page *page) +{ + return page_fixed_fake_head(page) != page; +} + static inline unsigned long _compound_head(const struct page *page) { unsigned long head = READ_ONCE(page->compound_head); if (unlikely(head & 1)) return head - 1; - return (unsigned long)page; + return (unsigned long)page_fixed_fake_head(page); } #define compound_head(page) ((typeof(page))_compound_head(page)) @@ -231,12 +287,13 @@ static inline unsigned long _compound_head(const struct page *page) static __always_inline int PageTail(struct page *page) { - return READ_ONCE(page->compound_head) & 1; + return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page); } static __always_inline int PageCompound(struct page *page) { - return test_bit(PG_head, &page->flags) || PageTail(page); + return test_bit(PG_head, &page->flags) || + READ_ONCE(page->compound_head) & 1; } #define PAGE_POISON_PATTERN -1l @@ -695,7 +752,20 @@ static inline bool test_set_page_writeback(struct page *page) return set_page_writeback(page); } -__PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) +static __always_inline bool folio_test_head(struct folio *folio) +{ + return test_bit(PG_head, folio_flags(folio, FOLIO_PF_ANY)); +} + +static __always_inline int PageHead(struct page *page) +{ + PF_POISONED_CHECK(page); + return test_bit(PG_head, &page->flags) && !page_is_fake_head(page); +} + +__SETPAGEFLAG(Head, head, PF_ANY) +__CLEARPAGEFLAG(Head, head, PF_ANY) +CLEARPAGEFLAG(Head, head, PF_ANY) /** * folio_test_large() - Does this folio contain more than one page? -- cgit v1.2.3 From a6b40850c442bf996e729e1d441d3dbc37cea171 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 22 Mar 2022 14:45:03 -0700 Subject: mm: hugetlb: replace hugetlb_free_vmemmap_enabled with a static_key The page_fixed_fake_head() is used throughout memory management and the conditional check requires checking a global variable, although the overhead of this check may be small, it increases when the memory cache comes under pressure. Also, the global variable will not be modified after system boot, so it is very appropriate to use static key machanism. Link: https://lkml.kernel.org/r/20211101031651.75851-3-songmuchun@bytedance.com Signed-off-by: Muchun Song Reviewed-by: Barry Song Cc: Bodeddula Balasubramaniam Cc: Chen Huang Cc: David Hildenbrand Cc: Fam Zheng Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Michal Hocko Cc: Mike Kravetz Cc: Oscar Salvador Cc: Qi Zheng Cc: Xiongchun Duan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 6 ------ include/linux/page-flags.h | 16 ++++++++++++++-- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 52c462390aee..08357b4c7be7 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -1075,12 +1075,6 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr } #endif /* CONFIG_HUGETLB_PAGE */ -#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP -extern bool hugetlb_free_vmemmap_enabled; -#else -#define hugetlb_free_vmemmap_enabled false -#endif - static inline spinlock_t *huge_pte_lock(struct hstate *h, struct mm_struct *mm, pte_t *pte) { diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 111e453f23d2..340cb8156568 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -191,7 +191,14 @@ enum pageflags { #ifndef __GENERATING_BOUNDS_H #ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP -extern bool hugetlb_free_vmemmap_enabled; +DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON, + hugetlb_free_vmemmap_enabled_key); + +static __always_inline bool hugetlb_free_vmemmap_enabled(void) +{ + return static_branch_maybe(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON, + &hugetlb_free_vmemmap_enabled_key); +} /* * If the feature of freeing some vmemmap pages associated with each HugeTLB @@ -211,7 +218,7 @@ extern bool hugetlb_free_vmemmap_enabled; */ static __always_inline const struct page *page_fixed_fake_head(const struct page *page) { - if (!hugetlb_free_vmemmap_enabled) + if (!hugetlb_free_vmemmap_enabled()) return page; /* @@ -239,6 +246,11 @@ static inline const struct page *page_fixed_fake_head(const struct page *page) { return page; } + +static inline bool hugetlb_free_vmemmap_enabled(void) +{ + return false; +} #endif static __always_inline int page_is_fake_head(struct page *page) -- cgit v1.2.3 From e54084173487804f5e2f23facf107fd9336e637e Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 22 Mar 2022 14:45:12 -0700 Subject: mm: sparsemem: move vmemmap related to HugeTLB to CONFIG_HUGETLB_PAGE_FREE_VMEMMAP The vmemmap_remap_free/alloc are relevant to HugeTLB, so move those functiongs to the scope of CONFIG_HUGETLB_PAGE_FREE_VMEMMAP. Link: https://lkml.kernel.org/r/20211101031651.75851-6-songmuchun@bytedance.com Signed-off-by: Muchun Song Reviewed-by: Barry Song Cc: Bodeddula Balasubramaniam Cc: Chen Huang Cc: David Hildenbrand Cc: Fam Zheng Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Michal Hocko Cc: Mike Kravetz Cc: Oscar Salvador Cc: Qi Zheng Cc: Xiongchun Duan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 45a449e8c209..9d58321386cc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3146,10 +3146,12 @@ static inline void print_vma_addr(char *prefix, unsigned long rip) } #endif +#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP int vmemmap_remap_free(unsigned long start, unsigned long end, unsigned long reuse); int vmemmap_remap_alloc(unsigned long start, unsigned long end, unsigned long reuse, gfp_t gfp_mask); +#endif void *sparse_buffer_alloc(unsigned long size); struct page * __populate_section_memmap(unsigned long pfn, -- cgit v1.2.3 From 824ddc601adc2cc48efb7f58b57997986c1c1276 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 22 Mar 2022 14:45:32 -0700 Subject: userfaultfd: provide unmasked address on page-fault Userfaultfd is supposed to provide the full address (i.e., unmasked) of the faulting access back to userspace. However, that is not the case for quite some time. Even running "userfaultfd_demo" from the userfaultfd man page provides the wrong output (and contradicts the man page). Notice that "UFFD_EVENT_PAGEFAULT event" shows the masked address (7fc5e30b3000) and not the first read address (0x7fc5e30b300f). Address returned by mmap() = 0x7fc5e30b3000 fault_handler_thread(): poll() returns: nready = 1; POLLIN = 1; POLLERR = 0 UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fc5e30b3000 (uffdio_copy.copy returned 4096) Read address 0x7fc5e30b300f in main(): A Read address 0x7fc5e30b340f in main(): A Read address 0x7fc5e30b380f in main(): A Read address 0x7fc5e30b3c0f in main(): A The exact address is useful for various reasons and specifically for prefetching decisions. If it is known that the memory is populated by certain objects whose size is not page-aligned, then based on the faulting address, the uffd-monitor can decide whether to prefetch and prefault the adjacent page. This bug has been for quite some time in the kernel: since commit 1a29d85eb0f1 ("mm: use vmf->address instead of of vmf->virtual_address") vmf->virtual_address"), which dates back to 2016. A concern has been raised that existing userspace application might rely on the old/wrong behavior in which the address is masked. Therefore, it was suggested to provide the masked address unless the user explicitly asks for the exact address. Add a new userfaultfd feature UFFD_FEATURE_EXACT_ADDRESS to direct userfaultfd to provide the exact address. Add a new "real_address" field to vmf to hold the unmasked address. Provide the address to userspace accordingly. Initialize real_address in various code-paths to be consistent with address, even when it is not used, to be on the safe side. [namit@vmware.com: initialize real_address on all code paths, per Jan] Link: https://lkml.kernel.org/r/20220226022655.350562-1-namit@vmware.com [akpm@linux-foundation.org: fix typo in comment, per Jan] Link: https://lkml.kernel.org/r/20220218041003.3508-1-namit@vmware.com Signed-off-by: Nadav Amit Acked-by: Peter Xu Reviewed-by: David Hildenbrand Acked-by: Mike Rapoport Reviewed-by: Jan Kara Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 ++- include/uapi/linux/userfaultfd.h | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9d58321386cc..0e4fd101616e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -478,7 +478,8 @@ struct vm_fault { struct vm_area_struct *vma; /* Target VMA */ gfp_t gfp_mask; /* gfp mask to be used for allocations */ pgoff_t pgoff; /* Logical page offset based on vma */ - unsigned long address; /* Faulting virtual address */ + unsigned long address; /* Faulting virtual address - masked */ + unsigned long real_address; /* Faulting virtual address - unmasked */ }; enum fault_flag flags; /* FAULT_FLAG_xxx flags * XXX: should really be 'const' */ diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 05b31d60acf6..ef739054cb1c 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -32,7 +32,8 @@ UFFD_FEATURE_SIGBUS | \ UFFD_FEATURE_THREAD_ID | \ UFFD_FEATURE_MINOR_HUGETLBFS | \ - UFFD_FEATURE_MINOR_SHMEM) + UFFD_FEATURE_MINOR_SHMEM | \ + UFFD_FEATURE_EXACT_ADDRESS) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -189,6 +190,10 @@ struct uffdio_api { * * UFFD_FEATURE_MINOR_SHMEM indicates the same support as * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. + * + * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page + * faults would be provided and the offset within the page would not be + * masked. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -201,6 +206,7 @@ struct uffdio_api { #define UFFD_FEATURE_THREAD_ID (1<<8) #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) #define UFFD_FEATURE_MINOR_SHMEM (1<<10) +#define UFFD_FEATURE_EXACT_ADDRESS (1<<11) __u64 features; __u64 ioctls; -- cgit v1.2.3 From b698f0a1773f7df73f2bb4bfe0e597ea1bb3881f Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 22 Mar 2022 14:45:38 -0700 Subject: mm/fs: delete PF_SWAPWRITE PF_SWAPWRITE has been redundant since v3.2 commit ee72886d8ed5 ("mm: vmscan: do not writeback filesystem pages in direct reclaim"). Coincidentally, NeilBrown's current patch "remove inode_congested()" deletes may_write_to_inode(), which appeared to be the one function which took notice of PF_SWAPWRITE. But if you study the old logic, and the conditions under which may_write_to_inode() was called, you discover that flag and function have been pointless for a decade. Link: https://lkml.kernel.org/r/75e80e7-742d-e3bd-531-614db8961e4@google.com Signed-off-by: Hugh Dickins Cc: NeilBrown Cc: Jan Kara Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 75ba8aa60248..c3c841a02a00 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1689,7 +1689,6 @@ extern struct pid *cad_pid; * I am cleaning dirty pages from some other bdi. */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ -#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */ -- cgit v1.2.3 From 89f6c88a6ab4a11deb14c270f7f1454cda4f73d6 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 22 Mar 2022 14:45:41 -0700 Subject: mm: __isolate_lru_page_prepare() in isolate_migratepages_block() __isolate_lru_page_prepare() conflates two unrelated functions, with the flags to one disjoint from the flags to the other; and hides some of the important checks outside of isolate_migratepages_block(), where the sequence is better to be visible. It comes from the days of lumpy reclaim, before compaction, when the combination made more sense. Move what's needed by mm/compaction.c isolate_migratepages_block() inline there, and what's needed by mm/vmscan.c isolate_lru_pages() inline there. Shorten "isolate_mode" to "mode", so the sequence of conditions is easier to read. Declare a "mapping" variable, to save one call to page_mapping() (but not another: calling again after page is locked is necessary). Simplify isolate_lru_pages() with a "move_to" list pointer. Link: https://lkml.kernel.org/r/879d62a8-91cc-d3c6-fb3b-69768236df68@google.com Signed-off-by: Hugh Dickins Acked-by: David Rientjes Reviewed-by: Alex Shi Cc: Alexander Duyck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 3db431276d82..a246c137678e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -387,7 +387,6 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page, extern unsigned long zone_reclaimable_pages(struct zone *zone); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); -extern bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, -- cgit v1.2.3 From abd4349ff9b8d242376b67711254221f64f447c7 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 22 Mar 2022 14:45:56 -0700 Subject: mm: compaction: cleanup the compaction trace events As Steven suggested [1], we should access the pointers from the trace event to avoid dereferencing them to the tracepoint function when the tracepoint is disabled. [1] https://lkml.org/lkml/2021/11/3/409 Link: https://lkml.kernel.org/r/4cd393b4d57f8f01ed72c001509b28e3a3b1a8c1.1646985115.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Cc: Steven Rostedt (Google) Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/compaction.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index 7d48e7079e48..c6d5d70dc7a5 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -67,10 +67,10 @@ DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_freepages, #ifdef CONFIG_COMPACTION TRACE_EVENT(mm_compaction_migratepages, - TP_PROTO(unsigned long nr_all, + TP_PROTO(struct compact_control *cc, unsigned int nr_succeeded), - TP_ARGS(nr_all, nr_succeeded), + TP_ARGS(cc, nr_succeeded), TP_STRUCT__entry( __field(unsigned long, nr_migrated) @@ -79,7 +79,7 @@ TRACE_EVENT(mm_compaction_migratepages, TP_fast_assign( __entry->nr_migrated = nr_succeeded; - __entry->nr_failed = nr_all - nr_succeeded; + __entry->nr_failed = cc->nr_migratepages - nr_succeeded; ), TP_printk("nr_migrated=%lu nr_failed=%lu", @@ -88,10 +88,10 @@ TRACE_EVENT(mm_compaction_migratepages, ); TRACE_EVENT(mm_compaction_begin, - TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn, - unsigned long free_pfn, unsigned long zone_end, bool sync), + TP_PROTO(struct compact_control *cc, unsigned long zone_start, + unsigned long zone_end, bool sync), - TP_ARGS(zone_start, migrate_pfn, free_pfn, zone_end, sync), + TP_ARGS(cc, zone_start, zone_end, sync), TP_STRUCT__entry( __field(unsigned long, zone_start) @@ -103,8 +103,8 @@ TRACE_EVENT(mm_compaction_begin, TP_fast_assign( __entry->zone_start = zone_start; - __entry->migrate_pfn = migrate_pfn; - __entry->free_pfn = free_pfn; + __entry->migrate_pfn = cc->migrate_pfn; + __entry->free_pfn = cc->free_pfn; __entry->zone_end = zone_end; __entry->sync = sync; ), @@ -118,11 +118,11 @@ TRACE_EVENT(mm_compaction_begin, ); TRACE_EVENT(mm_compaction_end, - TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn, - unsigned long free_pfn, unsigned long zone_end, bool sync, + TP_PROTO(struct compact_control *cc, unsigned long zone_start, + unsigned long zone_end, bool sync, int status), - TP_ARGS(zone_start, migrate_pfn, free_pfn, zone_end, sync, status), + TP_ARGS(cc, zone_start, zone_end, sync, status), TP_STRUCT__entry( __field(unsigned long, zone_start) @@ -135,8 +135,8 @@ TRACE_EVENT(mm_compaction_end, TP_fast_assign( __entry->zone_start = zone_start; - __entry->migrate_pfn = migrate_pfn; - __entry->free_pfn = free_pfn; + __entry->migrate_pfn = cc->migrate_pfn; + __entry->free_pfn = cc->free_pfn; __entry->zone_end = zone_end; __entry->sync = sync; __entry->status = status; -- cgit v1.2.3 From 356ea3865687926e5da7579d1f3351d3f0a322a1 Mon Sep 17 00:00:00 2001 From: "andrew.yang" Date: Tue, 22 Mar 2022 14:46:08 -0700 Subject: mm/migrate: fix race between lock page and clear PG_Isolated When memory is tight, system may start to compact memory for large continuous memory demands. If one process tries to lock a memory page that is being locked and isolated for compaction, it may wait a long time or even forever. This is because compaction will perform non-atomic PG_Isolated clear while holding page lock, this may overwrite PG_waiters set by the process that can't obtain the page lock and add itself to the waiting queue to wait for the lock to be unlocked. CPU1 CPU2 lock_page(page); (successful) lock_page(); (failed) __ClearPageIsolated(page); SetPageWaiters(page) (may be overwritten) unlock_page(page); The solution is to not perform non-atomic operation on page flags while holding page lock. Link: https://lkml.kernel.org/r/20220315030515.20263-1-andrew.yang@mediatek.com Signed-off-by: andrew.yang Cc: Matthias Brugger Cc: Matthew Wilcox Cc: "Vlastimil Babka" Cc: David Howells Cc: "William Kucharski" Cc: David Hildenbrand Cc: Yang Shi Cc: Marc Zyngier Cc: Nicholas Tang Cc: Kuan-Ying Lee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 340cb8156568..88fe1d759cdd 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -1000,7 +1000,7 @@ PAGE_TYPE_OPS(Guard, guard) extern bool is_free_buddy_page(struct page *page); -__PAGEFLAG(Isolated, isolated, PF_ANY); +PAGEFLAG(Isolated, isolated, PF_ANY); #ifdef CONFIG_MMU #define __PG_MLOCKED (1UL << PG_mlocked) -- cgit v1.2.3 From 27d121d0ec6d604d0147c5b579e4181b688a2d64 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Tue, 22 Mar 2022 14:46:14 -0700 Subject: mm/cma: provide option to opt out from exposing pages on activation failure Patch series "powerpc/fadump: handle CMA activation failure appropriately", v3. Commit 072355c1cf2d ("mm/cma: expose all pages to the buddy if activation of an area fails") started exposing all pages to buddy allocator on CMA activation failure. But there can be CMA users that want to handle the reserved memory differently on CMA allocation failure. Provide an option to opt out from exposing pages to buddy for such cases. Link: https://lkml.kernel.org/r/20220117075246.36072-1-hbathini@linux.ibm.com Link: https://lkml.kernel.org/r/20220117075246.36072-2-hbathini@linux.ibm.com Signed-off-by: Hari Bathini Reviewed-by: David Hildenbrand Cc: Oscar Salvador Cc: Mike Kravetz Cc: Mahesh Salgaonkar Cc: Sourabh Jain Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/cma.h b/include/linux/cma.h index b1ba94f1cc9c..90fd742fd1ef 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -58,4 +58,6 @@ extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count); extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data); + +extern void cma_reserve_pages_on_error(struct cma *cma); #endif -- cgit v1.2.3 From e39bb6be9f2b39a6dbaeff484361de76021b175d Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 22 Mar 2022 14:46:20 -0700 Subject: NUMA Balancing: add page promotion counter Patch series "NUMA balancing: optimize memory placement for memory tiering system", v13 With the advent of various new memory types, some machines will have multiple types of memory, e.g. DRAM and PMEM (persistent memory). The memory subsystem of these machines can be called memory tiering system, because the performance of the different types of memory are different. After commit c221c0b0308f ("device-dax: "Hotplug" persistent memory for use like normal RAM"), the PMEM could be used as the cost-effective volatile memory in separate NUMA nodes. In a typical memory tiering system, there are CPUs, DRAM and PMEM in each physical NUMA node. The CPUs and the DRAM will be put in one logical node, while the PMEM will be put in another (faked) logical node. To optimize the system overall performance, the hot pages should be placed in DRAM node. To do that, we need to identify the hot pages in the PMEM node and migrate them to DRAM node via NUMA migration. In the original NUMA balancing, there are already a set of existing mechanisms to identify the pages recently accessed by the CPUs in a node and migrate the pages to the node. So we can reuse these mechanisms to build the mechanisms to optimize the page placement in the memory tiering system. This is implemented in this patchset. At the other hand, the cold pages should be placed in PMEM node. So, we also need to identify the cold pages in the DRAM node and migrate them to PMEM node. In commit 26aa2d199d6f ("mm/migrate: demote pages during reclaim"), a mechanism to demote the cold DRAM pages to PMEM node under memory pressure is implemented. Based on that, the cold DRAM pages can be demoted to PMEM node proactively to free some memory space on DRAM node to accommodate the promoted hot PMEM pages. This is implemented in this patchset too. We have tested the solution with the pmbench memory accessing benchmark with the 80:20 read/write ratio and the Gauss access address distribution on a 2 socket Intel server with Optane DC Persistent Memory Model. The test results shows that the pmbench score can improve up to 95.9%. This patch (of 3): In a system with multiple memory types, e.g. DRAM and PMEM, the CPU and DRAM in one socket will be put in one NUMA node as before, while the PMEM will be put in another NUMA node as described in the description of the commit c221c0b0308f ("device-dax: "Hotplug" persistent memory for use like normal RAM"). So, the NUMA balancing mechanism will identify all PMEM accesses as remote access and try to promote the PMEM pages to DRAM. To distinguish the number of the inter-type promoted pages from that of the inter-socket migrated pages. A new vmstat count is added. The counter is per-node (count in the target node). So this can be used to identify promotion imbalance among the NUMA nodes. Link: https://lkml.kernel.org/r/20220301085329.3210428-1-ying.huang@intel.com Link: https://lkml.kernel.org/r/20220221084529.1052339-1-ying.huang@intel.com Link: https://lkml.kernel.org/r/20220221084529.1052339-2-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: Yang Shi Tested-by: Baolin Wang Reviewed-by: Baolin Wang Acked-by: Johannes Weiner Reviewed-by: Oscar Salvador Cc: Michal Hocko Cc: Rik van Riel Cc: Mel Gorman Cc: Peter Zijlstra Cc: Dave Hansen Cc: Zi Yan Cc: Wei Xu Cc: Shakeel Butt Cc: zhongjiang-ali Cc: Feng Tang Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 3 +++ include/linux/node.h | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c9e6a50109b9..310b6e7ce58a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -221,6 +221,9 @@ enum node_stat_item { NR_PAGETABLE, /* used for pagetables */ #ifdef CONFIG_SWAP NR_SWAPCACHE, +#endif +#ifdef CONFIG_NUMA_BALANCING + PGPROMOTE_SUCCESS, /* promote successfully */ #endif NR_VM_NODE_STAT_ITEMS }; diff --git a/include/linux/node.h b/include/linux/node.h index bb21fd631b16..81bbf1c0afd3 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -181,4 +181,9 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg, #define to_node(device) container_of(device, struct node, dev) +static inline bool node_is_toptier(int node) +{ + return node_state(node, N_CPU); +} + #endif /* _LINUX_NODE_H_ */ -- cgit v1.2.3 From c574bbe917036c8968b984c82c7b13194fe5ce98 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 22 Mar 2022 14:46:23 -0700 Subject: NUMA balancing: optimize page placement for memory tiering system With the advent of various new memory types, some machines will have multiple types of memory, e.g. DRAM and PMEM (persistent memory). The memory subsystem of these machines can be called memory tiering system, because the performance of the different types of memory are usually different. In such system, because of the memory accessing pattern changing etc, some pages in the slow memory may become hot globally. So in this patch, the NUMA balancing mechanism is enhanced to optimize the page placement among the different memory types according to hot/cold dynamically. In a typical memory tiering system, there are CPUs, fast memory and slow memory in each physical NUMA node. The CPUs and the fast memory will be put in one logical node (called fast memory node), while the slow memory will be put in another (faked) logical node (called slow memory node). That is, the fast memory is regarded as local while the slow memory is regarded as remote. So it's possible for the recently accessed pages in the slow memory node to be promoted to the fast memory node via the existing NUMA balancing mechanism. The original NUMA balancing mechanism will stop to migrate pages if the free memory of the target node becomes below the high watermark. This is a reasonable policy if there's only one memory type. But this makes the original NUMA balancing mechanism almost do not work to optimize page placement among different memory types. Details are as follows. It's the common cases that the working-set size of the workload is larger than the size of the fast memory nodes. Otherwise, it's unnecessary to use the slow memory at all. So, there are almost always no enough free pages in the fast memory nodes, so that the globally hot pages in the slow memory node cannot be promoted to the fast memory node. To solve the issue, we have 2 choices as follows, a. Ignore the free pages watermark checking when promoting hot pages from the slow memory node to the fast memory node. This will create some memory pressure in the fast memory node, thus trigger the memory reclaiming. So that, the cold pages in the fast memory node will be demoted to the slow memory node. b. Define a new watermark called wmark_promo which is higher than wmark_high, and have kswapd reclaiming pages until free pages reach such watermark. The scenario is as follows: when we want to promote hot-pages from a slow memory to a fast memory, but fast memory's free pages would go lower than high watermark with such promotion, we wake up kswapd with wmark_promo watermark in order to demote cold pages and free us up some space. So, next time we want to promote hot-pages we might have a chance of doing so. The choice "a" may create high memory pressure in the fast memory node. If the memory pressure of the workload is high, the memory pressure may become so high that the memory allocation latency of the workload is influenced, e.g. the direct reclaiming may be triggered. The choice "b" works much better at this aspect. If the memory pressure of the workload is high, the hot pages promotion will stop earlier because its allocation watermark is higher than that of the normal memory allocation. So in this patch, choice "b" is implemented. A new zone watermark (WMARK_PROMO) is added. Which is larger than the high watermark and can be controlled via watermark_scale_factor. In addition to the original page placement optimization among sockets, the NUMA balancing mechanism is extended to be used to optimize page placement according to hot/cold among different memory types. So the sysctl user space interface (numa_balancing) is extended in a backward compatible way as follow, so that the users can enable/disable these functionality individually. The sysctl is converted from a Boolean value to a bits field. The definition of the flags is, - 0: NUMA_BALANCING_DISABLED - 1: NUMA_BALANCING_NORMAL - 2: NUMA_BALANCING_MEMORY_TIERING We have tested the patch with the pmbench memory accessing benchmark with the 80:20 read/write ratio and the Gauss access address distribution on a 2 socket Intel server with Optane DC Persistent Memory Model. The test results shows that the pmbench score can improve up to 95.9%. Thanks Andrew Morton to help fix the document format error. Link: https://lkml.kernel.org/r/20220221084529.1052339-3-ying.huang@intel.com Signed-off-by: "Huang, Ying" Tested-by: Baolin Wang Reviewed-by: Baolin Wang Acked-by: Johannes Weiner Reviewed-by: Oscar Salvador Reviewed-by: Yang Shi Cc: Michal Hocko Cc: Rik van Riel Cc: Mel Gorman Cc: Peter Zijlstra Cc: Dave Hansen Cc: Zi Yan Cc: Wei Xu Cc: Shakeel Butt Cc: zhongjiang-ali Cc: Randy Dunlap Cc: Feng Tang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 + include/linux/sched/sysctl.h | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 310b6e7ce58a..962b14d403e8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -353,6 +353,7 @@ enum zone_watermarks { WMARK_MIN, WMARK_LOW, WMARK_HIGH, + WMARK_PROMO, NR_WMARK }; diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index c19dd5a2c05c..b5eec8854c5a 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -23,6 +23,16 @@ enum sched_tunable_scaling { SCHED_TUNABLESCALING_END, }; +#define NUMA_BALANCING_DISABLED 0x0 +#define NUMA_BALANCING_NORMAL 0x1 +#define NUMA_BALANCING_MEMORY_TIERING 0x2 + +#ifdef CONFIG_NUMA_BALANCING +extern int sysctl_numa_balancing_mode; +#else +#define sysctl_numa_balancing_mode 0 +#endif + /* * control realtime throttling: * -- cgit v1.2.3 From 4d45c3aff5ebf80d329eba0f90544d20224f612d Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Tue, 22 Mar 2022 14:46:33 -0700 Subject: mm/vmstat: add event for ksm swapping in copy When faults in from swap what used to be a KSM page and that page had been swapped in before, system has to make a copy, and leaves remerging the pages to a later pass of ksmd. That is not good for performace, we'd better to reduce this kind of copy. There are some ways to reduce it, for example lessen swappiness or madvise(, , MADV_MERGEABLE) range. So add this event to support doing this tuning. Just like this patch: "mm, THP, swap: add THP swapping out fallback counting". Link: https://lkml.kernel.org/r/20220113023839.758845-1-yang.yang29@zte.com.cn Signed-off-by: Yang Yang Reviewed-by: Ran Xiaokai Cc: Hugh Dickins Cc: Yang Shi Cc: Dave Hansen Cc: Saravanan D Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 7b2363388bfa..16a0a4fd000b 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -129,6 +129,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_SWAP SWAP_RA, SWAP_RA_HIT, +#ifdef CONFIG_KSM + KSM_SWPIN_COPY, +#endif #endif #ifdef CONFIG_X86 DIRECT_MAP_LEVEL2_SPLIT, -- cgit v1.2.3 From e930d999715073a70d306fb59a394ea8b84d0b45 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 22 Mar 2022 14:46:51 -0700 Subject: mm, memory_hotplug: make arch_alloc_nodedata independent on CONFIG_MEMORY_HOTPLUG Patch series "mm, memory_hotplug: handle unitialized numa node gracefully". The core of the fix is patch 2 which also links existing bug reports. The high level goal is to have all possible numa nodes have their pgdat allocated and initialized so for_each_possible_node(nid) NODE_DATA(nid) will never return garbage. This has proven to be problem in several places when an offline numa node is used for an allocation just to realize that node_data and therefore allocation fallback zonelists are not initialized and such an allocation request blows up. There were attempts to address that by checking node_online in several places including the page allocator. This patchset approaches the problem from a different perspective and instead of special casing, which just adds a runtime overhead, it allocates pglist_data for each possible node. This can add some memory overhead for platforms with high number of possible nodes if they do not contain any memory. This should be a rather rare configuration though. How to test this? David has provided and excellent howto: http://lkml.kernel.org/r/6e5ebc19-890c-b6dd-1924-9f25c441010d@redhat.com Patches 1 and 3-6 are mostly cleanups. The patchset has been reviewed by Rafael (thanks!) and the core fix tested by Rafael and Alexey (thanks to both). David has tested as per instructions above and hasn't found any fallouts in the memory hotplug scenarios. This patch (of 6): This is a preparatory patch and it doesn't introduce any functional change. It merely pulls out arch_alloc_nodedata (and co) outside of CONFIG_MEMORY_HOTPLUG because the following patch will need to call this from the generic MM code. Link: https://lkml.kernel.org/r/20220127085305.20890-1-mhocko@kernel.org Link: https://lkml.kernel.org/r/20220127085305.20890-2-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Rafael Aquini Acked-by: David Hildenbrand Acked-by: Mike Rapoport Reviewed-by: Oscar Salvador Reviewed-by: Wei Yang Cc: Alexey Makhalov Cc: Christoph Lameter Cc: Dennis Zhou Cc: Eric Dumazet Cc: Nico Pache Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 119 ++++++++++++++++++++--------------------- 1 file changed, 59 insertions(+), 60 deletions(-) (limited to 'include') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index be48e003a518..4355983b364d 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -16,6 +16,65 @@ struct memory_group; struct resource; struct vmem_altmap; +#ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION +/* + * For supporting node-hotadd, we have to allocate a new pgdat. + * + * If an arch has generic style NODE_DATA(), + * node_data[nid] = kzalloc() works well. But it depends on the architecture. + * + * In general, generic_alloc_nodedata() is used. + * Now, arch_free_nodedata() is just defined for error path of node_hot_add. + * + */ +extern pg_data_t *arch_alloc_nodedata(int nid); +extern void arch_free_nodedata(pg_data_t *pgdat); +extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat); + +#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ + +#define arch_alloc_nodedata(nid) generic_alloc_nodedata(nid) +#define arch_free_nodedata(pgdat) generic_free_nodedata(pgdat) + +#ifdef CONFIG_NUMA +/* + * XXX: node aware allocation can't work well to get new node's memory at this time. + * Because, pgdat for the new node is not allocated/initialized yet itself. + * To use new node's memory, more consideration will be necessary. + */ +#define generic_alloc_nodedata(nid) \ +({ \ + kzalloc(sizeof(pg_data_t), GFP_KERNEL); \ +}) +/* + * This definition is just for error path in node hotadd. + * For node hotremove, we have to replace this. + */ +#define generic_free_nodedata(pgdat) kfree(pgdat) + +extern pg_data_t *node_data[]; +static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) +{ + node_data[nid] = pgdat; +} + +#else /* !CONFIG_NUMA */ + +/* never called */ +static inline pg_data_t *generic_alloc_nodedata(int nid) +{ + BUG(); + return NULL; +} +static inline void generic_free_nodedata(pg_data_t *pgdat) +{ +} +static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) +{ +} +#endif /* CONFIG_NUMA */ +#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ + #ifdef CONFIG_MEMORY_HOTPLUG struct page *pfn_to_online_page(unsigned long pfn); @@ -154,66 +213,6 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, struct mhp_params *params); #endif /* ARCH_HAS_ADD_PAGES */ -#ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION -/* - * For supporting node-hotadd, we have to allocate a new pgdat. - * - * If an arch has generic style NODE_DATA(), - * node_data[nid] = kzalloc() works well. But it depends on the architecture. - * - * In general, generic_alloc_nodedata() is used. - * Now, arch_free_nodedata() is just defined for error path of node_hot_add. - * - */ -extern pg_data_t *arch_alloc_nodedata(int nid); -extern void arch_free_nodedata(pg_data_t *pgdat); -extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat); - -#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ - -#define arch_alloc_nodedata(nid) generic_alloc_nodedata(nid) -#define arch_free_nodedata(pgdat) generic_free_nodedata(pgdat) - -#ifdef CONFIG_NUMA -/* - * If ARCH_HAS_NODEDATA_EXTENSION=n, this func is used to allocate pgdat. - * XXX: kmalloc_node() can't work well to get new node's memory at this time. - * Because, pgdat for the new node is not allocated/initialized yet itself. - * To use new node's memory, more consideration will be necessary. - */ -#define generic_alloc_nodedata(nid) \ -({ \ - kzalloc(sizeof(pg_data_t), GFP_KERNEL); \ -}) -/* - * This definition is just for error path in node hotadd. - * For node hotremove, we have to replace this. - */ -#define generic_free_nodedata(pgdat) kfree(pgdat) - -extern pg_data_t *node_data[]; -static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) -{ - node_data[nid] = pgdat; -} - -#else /* !CONFIG_NUMA */ - -/* never called */ -static inline pg_data_t *generic_alloc_nodedata(int nid) -{ - BUG(); - return NULL; -} -static inline void generic_free_nodedata(pg_data_t *pgdat) -{ -} -static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) -{ -} -#endif /* CONFIG_NUMA */ -#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ - void get_online_mems(void); void put_online_mems(void); -- cgit v1.2.3 From 09f49dca570a917a8c6bccd7e8c61f5141534e3a Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 22 Mar 2022 14:46:54 -0700 Subject: mm: handle uninitialized numa nodes gracefully We have had several reports [1][2][3] that page allocator blows up when an allocation from a possible node is requested. The underlying reason is that NODE_DATA for the specific node is not allocated. NUMA specific initialization is arch specific and it can vary a lot. E.g. x86 tries to initialize all nodes that have some cpu affinity (see init_cpu_to_node) but this can be insufficient because the node might be cpuless for example. One way to address this problem would be to check for !node_online nodes when trying to get a zonelist and silently fall back to another node. That is unfortunately adding a branch into allocator hot path and it doesn't handle any other potential NODE_DATA users. This patch takes a different approach (following a lead of [3]) and it pre allocates pgdat for all possible nodes in an arch indipendent code - free_area_init. All uninitialized nodes are treated as memoryless nodes. node_state of the node is not changed because that would lead to other side effects - e.g. sysfs representation of such a node and from past discussions [4] it is known that some tools might have problems digesting that. Newly allocated pgdat only gets a minimal initialization and the rest of the work is expected to be done by the memory hotplug - hotadd_new_pgdat (renamed to hotadd_init_pgdat). generic_alloc_nodedata is changed to use the memblock allocator because neither page nor slab allocators are available at the stage when all pgdats are allocated. Hotplug doesn't allocate pgdat anymore so we can use the early boot allocator. The only arch specific implementation is ia64 and that is changed to use the early allocator as well. [1] http://lkml.kernel.org/r/20211101201312.11589-1-amakhalov@vmware.com [2] http://lkml.kernel.org/r/20211207224013.880775-1-npache@redhat.com [3] http://lkml.kernel.org/r/20190114082416.30939-1-mhocko@kernel.org [4] http://lkml.kernel.org/r/20200428093836.27190-1-srikar@linux.vnet.ibm.com [akpm@linux-foundation.org: replace comment, per Mike] Link: https://lkml.kernel.org/r/Yfe7RBeLCijnWBON@dhcp22.suse.cz Reported-by: Alexey Makhalov Tested-by: Alexey Makhalov Reported-by: Nico Pache Acked-by: Rafael Aquini Tested-by: Rafael Aquini Acked-by: David Hildenbrand Reviewed-by: Oscar Salvador Acked-by: Mike Rapoport Signed-off-by: Michal Hocko Cc: Christoph Lameter Cc: Dennis Zhou Cc: Eric Dumazet Cc: Tejun Heo Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 4355983b364d..cdd66bfdf855 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -44,7 +44,7 @@ extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat); */ #define generic_alloc_nodedata(nid) \ ({ \ - kzalloc(sizeof(pg_data_t), GFP_KERNEL); \ + memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); \ }) /* * This definition is just for error path in node hotadd. -- cgit v1.2.3 From 390511e1476eb1cc41d420a7661b33f4d8584c3f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 22 Mar 2022 14:46:57 -0700 Subject: mm, memory_hotplug: drop arch_free_nodedata Prior to "mm: handle uninitialized numa nodes gracefully" memory hotplug used to allocate pgdat when memory has been added to a node (hotadd_init_pgdat) arch_free_nodedata has been only used in the failure path because once the pgdat is exported (to be visible by NODA_DATA(nid)) it cannot really be freed because there is no synchronization available for that. pgdat is allocated for each possible nodes now so the memory hotplug doesn't need to do the ever use arch_free_nodedata so drop it. This patch doesn't introduce any functional change. Link: https://lkml.kernel.org/r/20220127085305.20890-4-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Rafael Aquini Acked-by: David Hildenbrand Acked-by: Mike Rapoport Reviewed-by: Oscar Salvador Cc: Alexey Makhalov Cc: Christoph Lameter Cc: Dennis Zhou Cc: Eric Dumazet Cc: Nico Pache Cc: Tejun Heo Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index cdd66bfdf855..60f09d3ebb3d 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -24,17 +24,14 @@ struct vmem_altmap; * node_data[nid] = kzalloc() works well. But it depends on the architecture. * * In general, generic_alloc_nodedata() is used. - * Now, arch_free_nodedata() is just defined for error path of node_hot_add. * */ extern pg_data_t *arch_alloc_nodedata(int nid); -extern void arch_free_nodedata(pg_data_t *pgdat); extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat); #else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ #define arch_alloc_nodedata(nid) generic_alloc_nodedata(nid) -#define arch_free_nodedata(pgdat) generic_free_nodedata(pgdat) #ifdef CONFIG_NUMA /* -- cgit v1.2.3 From 70b5b46a754245d383811b4d2f2c76c34bb7e145 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 22 Mar 2022 14:47:00 -0700 Subject: mm, memory_hotplug: reorganize new pgdat initialization When a !node_online node is brought up it needs a hotplug specific initialization because the node could be either uninitialized yet or it could have been recycled after previous hotremove. hotadd_init_pgdat is responsible for that. Internal pgdat state is initialized at two places currently - hotadd_init_pgdat - free_area_init_core_hotplug There is no real clear cut what should go where but this patch's chosen to move the whole internal state initialization into free_area_init_core_hotplug. hotadd_init_pgdat is still responsible to pull all the parts together - most notably to initialize zonelists because those depend on the overall topology. This patch doesn't introduce any functional change. Link: https://lkml.kernel.org/r/20220127085305.20890-5-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Rafael Aquini Acked-by: David Hildenbrand Reviewed-by: Oscar Salvador Cc: Alexey Makhalov Cc: Christoph Lameter Cc: Dennis Zhou Cc: Eric Dumazet Cc: Mike Rapoport Cc: Nico Pache Cc: Tejun Heo Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 60f09d3ebb3d..76bf2de86def 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -319,7 +319,7 @@ extern void set_zone_contiguous(struct zone *zone); extern void clear_zone_contiguous(struct zone *zone); #ifdef CONFIG_MEMORY_HOTPLUG -extern void __ref free_area_init_core_hotplug(int nid); +extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat); extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); extern int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); extern int add_memory_resource(int nid, struct resource *resource, -- cgit v1.2.3 From 2848a28b0a6052a4c8450397d2647d7d8e3f6f06 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 22 Mar 2022 14:47:13 -0700 Subject: drivers/base/node: consolidate node device subsystem initialization in node_dev_init() ... and call node_dev_init() after memory_dev_init() from driver_init(), so before any of the existing arch/subsys calls. All online nodes should be known at that point: early during boot, arch code determines node and zone ranges and sets the relevant nodes online; usually this happens in setup_arch(). This is in line with memory_dev_init(), which initializes the memory device subsystem and creates all memory block devices. Similar to memory_dev_init(), panic() if anything goes wrong, we don't want to continue with such basic initialization errors. The important part is that node_dev_init() gets called after memory_dev_init() and after cpu_dev_init(), but before any of the relevant archs call register_cpu() to register the new cpu device under the node device. The latter should be the case for the current users of topology_init(). Link: https://lkml.kernel.org/r/20220203105212.30385-1-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Tested-by: Anatoly Pugachev (sparc64) Cc: Greg Kroah-Hartman Cc: Michal Hocko Cc: Oscar Salvador Cc: Mike Rapoport Cc: Catalin Marinas Cc: Will Deacon Cc: Thomas Bogendoerfer Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Yoshinori Sato Cc: Rich Felker Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/node.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/node.h b/include/linux/node.h index 81bbf1c0afd3..7f876d48af11 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -112,6 +112,7 @@ static inline void link_mem_sections(int nid, unsigned long start_pfn, extern void unregister_node(struct node *node); #ifdef CONFIG_NUMA +extern void node_dev_init(void); /* Core of the node registration - only memory hotplug should use this */ extern int __register_one_node(int nid); @@ -149,6 +150,9 @@ extern void register_hugetlbfs_with_node(node_registration_func_t doregister, node_registration_func_t unregister); #endif #else +static inline void node_dev_init(void) +{ +} static inline int __register_one_node(int nid) { return 0; -- cgit v1.2.3 From cc6515591b25f08ce199e9379844a964f52a27f2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 22 Mar 2022 14:47:28 -0700 Subject: drivers/base/node: rename link_mem_sections() to register_memory_block_under_node() Patch series "drivers/base/memory: determine and store zone for single-zone memory blocks", v2. I remember talking to Michal in the past about removing test_pages_in_a_zone(), which we use for: * verifying that a memory block we intend to offline is really only managed by a single zone. We don't support offlining of memory blocks that are managed by multiple zones (e.g., multiple nodes, DMA and DMA32) * exposing that zone to user space via /sys/devices/system/memory/memory*/valid_zones Now that I identified some more cases where test_pages_in_a_zone() might go wrong, and we received an UBSAN report (see patch #3), let's get rid of this PFN walker. So instead of detecting the zone at runtime with test_pages_in_a_zone() by scanning the memmap, let's determine and remember for each memory block if it's managed by a single zone. The stored zone can then be used for the above two cases, avoiding a manual lookup using test_pages_in_a_zone(). This avoids eventually stumbling over uninitialized memmaps in corner cases, especially when ZONE_DEVICE ranges partly fall into memory block (that are responsible for managing System RAM). Handling memory onlining is easy, because we online to exactly one zone. Handling boot memory is more tricky, because we want to avoid scanning all zones of all nodes to detect possible zones that overlap with the physical memory region of interest. Fortunately, we already have code that determines the applicable nodes for a memory block, to create sysfs links -- we'll hook into that. Patch #1 is a simple cleanup I had laying around for a longer time. Patch #2 contains the main logic to remove test_pages_in_a_zone() and further details. [1] https://lkml.kernel.org/r/20220128144540.153902-1-david@redhat.com [2] https://lkml.kernel.org/r/20220203105212.30385-1-david@redhat.com This patch (of 2): Let's adjust the stale terminology, making it match unregister_memory_block_under_nodes() and do_register_memory_block_under_node(). We're dealing with memory block devices, which span 1..X memory sections. Link: https://lkml.kernel.org/r/20220210184359.235565-1-david@redhat.com Link: https://lkml.kernel.org/r/20220210184359.235565-2-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Oscar Salvador Cc: Greg Kroah-Hartman Cc: Michal Hocko Cc: "Rafael J. Wysocki" Cc: Rafael Parra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/node.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/node.h b/include/linux/node.h index 7f876d48af11..40d641a8bfb0 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -99,13 +99,13 @@ extern struct node *node_devices[]; typedef void (*node_registration_func_t)(struct node *); #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA) -void link_mem_sections(int nid, unsigned long start_pfn, - unsigned long end_pfn, - enum meminit_context context); +void register_memory_blocks_under_node(int nid, unsigned long start_pfn, + unsigned long end_pfn, + enum meminit_context context); #else -static inline void link_mem_sections(int nid, unsigned long start_pfn, - unsigned long end_pfn, - enum meminit_context context) +static inline void register_memory_blocks_under_node(int nid, unsigned long start_pfn, + unsigned long end_pfn, + enum meminit_context context) { } #endif @@ -129,8 +129,8 @@ static inline int register_one_node(int nid) error = __register_one_node(nid); if (error) return error; - /* link memory sections under this node */ - link_mem_sections(nid, start_pfn, end_pfn, MEMINIT_EARLY); + register_memory_blocks_under_node(nid, start_pfn, end_pfn, + MEMINIT_EARLY); } return error; -- cgit v1.2.3 From 395f6081bad49f9c54abafebab49ee23aa985bbd Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 22 Mar 2022 14:47:31 -0700 Subject: drivers/base/memory: determine and store zone for single-zone memory blocks test_pages_in_a_zone() is just another nasty PFN walker that can easily stumble over ZONE_DEVICE memory ranges falling into the same memory block as ordinary system RAM: the memmap of parts of these ranges might possibly be uninitialized. In fact, we observed (on an older kernel) with UBSAN: UBSAN: Undefined behaviour in ./include/linux/mm.h:1133:50 index 7 is out of range for type 'zone [5]' CPU: 121 PID: 35603 Comm: read_all Kdump: loaded Tainted: [...] Hardware name: Dell Inc. PowerEdge R7425/08V001, BIOS 1.12.2 11/15/2019 Call Trace: dump_stack+0x9a/0xf0 ubsan_epilogue+0x9/0x7a __ubsan_handle_out_of_bounds+0x13a/0x181 test_pages_in_a_zone+0x3c4/0x500 show_valid_zones+0x1fa/0x380 dev_attr_show+0x43/0xb0 sysfs_kf_seq_show+0x1c5/0x440 seq_read+0x49d/0x1190 vfs_read+0xff/0x300 ksys_read+0xb8/0x170 do_syscall_64+0xa5/0x4b0 entry_SYSCALL_64_after_hwframe+0x6a/0xdf RIP: 0033:0x7f01f4439b52 We seem to stumble over a memmap that contains a garbage zone id. While we could try inserting pfn_to_online_page() calls, it will just make memory offlining slower, because we use test_pages_in_a_zone() to make sure we're offlining pages that all belong to the same zone. Let's just get rid of this PFN walker and determine the single zone of a memory block -- if any -- for early memory blocks during boot. For memory onlining, we know the single zone already. Let's avoid any additional memmap scanning and just rely on the zone information available during boot. For memory hot(un)plug, we only really care about memory blocks that: * span a single zone (and, thereby, a single node) * are completely System RAM (IOW, no holes, no ZONE_DEVICE) If one of these conditions is not met, we reject memory offlining. Hotplugged memory blocks (starting out offline), always meet both conditions. There are three scenarios to handle: (1) Memory hot(un)plug A memory block with zone == NULL cannot be offlined, corresponding to our previous test_pages_in_a_zone() check. After successful memory onlining/offlining, we simply set the zone accordingly. * Memory onlining: set the zone we just used for onlining * Memory offlining: set zone = NULL So a hotplugged memory block starts with zone = NULL. Once memory onlining is done, we set the proper zone. (2) Boot memory with !CONFIG_NUMA We know that there is just a single pgdat, so we simply scan all zones of that pgdat for an intersection with our memory block PFN range when adding the memory block. If more than one zone intersects (e.g., DMA and DMA32 on x86 for the first memory block) we set zone = NULL and consequently mimic what test_pages_in_a_zone() used to do. (3) Boot memory with CONFIG_NUMA At the point in time we create the memory block devices during boot, we don't know yet which nodes *actually* span a memory block. While we could scan all zones of all nodes for intersections, overlapping nodes complicate the situation and scanning all nodes is possibly expensive. But that problem has already been solved by the code that sets the node of a memory block and creates the link in the sysfs -- do_register_memory_block_under_node(). So, we hook into the code that sets the node id for a memory block. If we already have a different node id set for the memory block, we know that multiple nodes *actually* have PFNs falling into our memory block: we set zone = NULL and consequently mimic what test_pages_in_a_zone() used to do. If there is no node id set, we do the same as (2) for the given node. Note that the call order in driver_init() is: -> memory_dev_init(): create memory block devices -> node_dev_init(): link memory block devices to the node and set the node id So in summary, we detect if there is a single zone responsible for this memory block and we consequently store the zone in that case in the memory block, updating it during memory onlining/offlining. Link: https://lkml.kernel.org/r/20220210184359.235565-3-david@redhat.com Signed-off-by: David Hildenbrand Reported-by: Rafael Parra Reviewed-by: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Greg Kroah-Hartman Cc: Michal Hocko Cc: Rafael Parra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 12 ++++++++++++ include/linux/memory_hotplug.h | 6 ++---- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index 88eb587b5143..aa619464a1df 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -70,6 +70,13 @@ struct memory_block { unsigned long state; /* serialized by the dev->lock */ int online_type; /* for passing data to online routine */ int nid; /* NID for this memory block */ + /* + * The single zone of this memory block if all PFNs of this memory block + * that are System RAM (not a memory hole, not ZONE_DEVICE ranges) are + * managed by a single zone. NULL if multiple zones (including nodes) + * apply. + */ + struct zone *zone; struct device dev; /* * Number of vmemmap pages. These pages @@ -161,6 +168,11 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, }) #define register_hotmemory_notifier(nb) register_memory_notifier(nb) #define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb) + +#ifdef CONFIG_NUMA +void memory_block_add_nid(struct memory_block *mem, int nid, + enum meminit_context context); +#endif /* CONFIG_NUMA */ #endif /* CONFIG_MEMORY_HOTPLUG */ /* diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 76bf2de86def..1ce6f8044f1e 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -163,8 +163,6 @@ extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages); extern int online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group); -extern struct zone *test_pages_in_a_zone(unsigned long start_pfn, - unsigned long end_pfn); extern void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn); @@ -293,7 +291,7 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {} extern void try_offline_node(int nid); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages, - struct memory_group *group); + struct zone *zone, struct memory_group *group); extern int remove_memory(u64 start, u64 size); extern void __remove_memory(u64 start, u64 size); extern int offline_and_remove_memory(u64 start, u64 size); @@ -302,7 +300,7 @@ extern int offline_and_remove_memory(u64 start, u64 size); static inline void try_offline_node(int nid) {} static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages, - struct memory_group *group) + struct zone *zone, struct memory_group *group) { return -EINVAL; } -- cgit v1.2.3 From 734c15700cdf9062ae98d8b131c6fe873dfad26d Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 22 Mar 2022 14:47:37 -0700 Subject: mm: only re-generate demotion targets when a numa node changes its N_CPU state Abhishek reported that after patch [1], hotplug operations are taking roughly double the expected time. [2] The reason behind is that the CPU callbacks that migrate_on_reclaim_init() sets always call set_migration_target_nodes() whenever a CPU is brought up/down. But we only care about numa nodes going from having cpus to become cpuless, and vice versa, as that influences the demotion_target order. We do already have two CPU callbacks (vmstat_cpu_online() and vmstat_cpu_dead()) that check exactly that, so get rid of the CPU callbacks in migrate_on_reclaim_init() and only call set_migration_target_nodes() from vmstat_cpu_{dead,online}() whenever a numa node change its N_CPU state. [1] https://lore.kernel.org/linux-mm/20210721063926.3024591-2-ying.huang@intel.com/ [2] https://lore.kernel.org/linux-mm/eb438ddd-2919-73d4-bd9f-b7eecdd9577a@linux.vnet.ibm.com/ [osalvador@suse.de: add feedback from Huang Ying] Link: https://lkml.kernel.org/r/20220314150945.12694-1-osalvador@suse.de Link: https://lkml.kernel.org/r/20220310120749.23077-1-osalvador@suse.de Fixes: 884a6e5d1f93b ("mm/migrate: update node demotion order on hotplug events") Signed-off-by: Oscar Salvador Reviewed-by: Baolin Wang Tested-by: Baolin Wang Reported-by: Abhishek Goel Cc: Dave Hansen Cc: "Huang, Ying" Cc: Abhishek Goel Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index db96e10eb8da..90e75d5a54d6 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -48,7 +48,15 @@ int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); extern bool numa_demotion_enabled; +extern void migrate_on_reclaim_init(void); +#ifdef CONFIG_HOTPLUG_CPU +extern void set_migration_target_nodes(void); #else +static inline void set_migration_target_nodes(void) {} +#endif +#else + +static inline void set_migration_target_nodes(void) {} static inline void putback_movable_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t new, -- cgit v1.2.3 From 6eada26ffc80bfe1f2db088be0c44ec82b5cd3dc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Mar 2022 14:47:46 -0700 Subject: mm: remove usercopy_warn() Users of usercopy_warn() were removed by commit 53944f171a89 ("mm: remove HARDENED_USERCOPY_FALLBACK") Remove it. Link: https://lkml.kernel.org/r/5f26643fc70b05f8455b60b99c30c17d635fa640.1644231910.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Reviewed-by: Miaohe Lin Reviewed-by: Stephen Kitt Reviewed-by: Muchun Song Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uaccess.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index ac0394087f7d..bca27b4e5eb2 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -401,8 +401,6 @@ static inline void user_access_restore(unsigned long flags) { } #endif #ifdef CONFIG_HARDENED_USERCOPY -void usercopy_warn(const char *name, const char *detail, bool to_user, - unsigned long offset, unsigned long len); void __noreturn usercopy_abort(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len); -- cgit v1.2.3 From ad7489d5262d2aa775b5e5a1782793925fa90065 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Mar 2022 14:47:49 -0700 Subject: mm: uninline copy_overflow() While building a small config with CONFIG_CC_OPTIMISE_FOR_SIZE, I ended up with more than 50 times the following function in vmlinux because GCC doesn't honor the 'inline' keyword: c00243bc : c00243bc: 94 21 ff f0 stwu r1,-16(r1) c00243c0: 7c 85 23 78 mr r5,r4 c00243c4: 7c 64 1b 78 mr r4,r3 c00243c8: 3c 60 c0 62 lis r3,-16286 c00243cc: 7c 08 02 a6 mflr r0 c00243d0: 38 63 5e e5 addi r3,r3,24293 c00243d4: 90 01 00 14 stw r0,20(r1) c00243d8: 4b ff 82 45 bl c001c61c <__warn_printk> c00243dc: 0f e0 00 00 twui r0,0 c00243e0: 80 01 00 14 lwz r0,20(r1) c00243e4: 38 21 00 10 addi r1,r1,16 c00243e8: 7c 08 03 a6 mtlr r0 c00243ec: 4e 80 00 20 blr With -Winline, GCC tells: /include/linux/thread_info.h:212:20: warning: inlining failed in call to 'copy_overflow': call is unlikely and code size would grow [-Winline] copy_overflow() is a non conditional warning called by check_copy_size() on an error path. check_copy_size() have to remain inlined in order to benefit from constant folding, but copy_overflow() is not worth inlining. Uninline the warning when CONFIG_BUG is selected. When CONFIG_BUG is not selected, WARN() does nothing so skip it. This reduces the size of vmlinux by almost 4kbytes. Link: https://lkml.kernel.org/r/e1723b9cfa924bcefcd41f69d0025b38e4c9364e.1644819985.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Cc: David Laight Cc: Anshuman Khandual Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/thread_info.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 73a6f34b3847..9f392ec76f2b 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -209,9 +209,12 @@ __bad_copy_from(void); extern void __compiletime_error("copy destination size is too small") __bad_copy_to(void); +void __copy_overflow(int size, unsigned long count); + static inline void copy_overflow(int size, unsigned long count) { - WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); + if (IS_ENABLED(CONFIG_BUG)) + __copy_overflow(size, count); } static __always_inline __must_check bool -- cgit v1.2.3 From d7ca25c53e25a9a628aaa19b5a031f115d8c353d Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 22 Mar 2022 14:47:58 -0700 Subject: highmem: document kunmap_local() Some users of kmap() add an offset to the kmap() address to be used during the mapping. When converting to kmap_local_page() the base address does not need to be stored because any address within the page can be used in kunmap_local(). However, this was not clear from the documentation and cause some questions.[1] Document that any address in the page can be used in kunmap_local() to clarify this for future users. [1] https://lore.kernel.org/lkml/20211213154543.GM3538886@iweiny-DESK2.sc.intel.com/ [ira.weiny@intel.com: updates per Christoph] Link: https://lkml.kernel.org/r/20220124182138.816693-1-ira.weiny@intel.com Link: https://lkml.kernel.org/r/20220124013045.806718-1-ira.weiny@intel.com Signed-off-by: Ira Weiny Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem-internal.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 0a0b2b09b1b8..a77be5630209 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -246,6 +246,16 @@ do { \ __kunmap_atomic(__addr); \ } while (0) +/** + * kunmap_local - Unmap a page mapped via kmap_local_page(). + * @__addr: An address within the page mapped + * + * @__addr can be any address within the mapped page. Commonly it is the + * address return from kmap_local_page(), but it can also include offsets. + * + * Unmapping should be done in the reverse order of the mapping. See + * kmap_local_page() for details. + */ #define kunmap_local(__addr) \ do { \ BUILD_BUG_ON(__same_type((__addr), struct page *)); \ -- cgit v1.2.3 From 436428255d5981e49ff015fc8e398ecf2ba10c24 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 22 Mar 2022 14:48:37 -0700 Subject: mm/damon/core: move damon_set_targets() into dbgfs damon_set_targets() function is defined in the core for general use cases, but called from only dbgfs. Also, because the function is for general use cases, dbgfs does additional handling of pid type target id case. To make the situation simpler, this commit moves the function into dbgfs and makes it to do the pid type case handling on its own. Link: https://lkml.kernel.org/r/20211230100723.2238-4-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 5e1e3a128b77..bd021af5db3d 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -484,8 +484,6 @@ unsigned int damon_nr_regions(struct damon_target *t); struct damon_ctx *damon_new_ctx(void); void damon_destroy_ctx(struct damon_ctx *ctx); -int damon_set_targets(struct damon_ctx *ctx, - unsigned long *ids, ssize_t nr_ids); int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, unsigned long aggr_int, unsigned long primitive_upd_int, unsigned long min_nr_reg, unsigned long max_nr_reg); -- cgit v1.2.3 From 1971bd630452e943380429336a851c55b027eed1 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 22 Mar 2022 14:48:40 -0700 Subject: mm/damon: remove the target id concept DAMON asks each monitoring target ('struct damon_target') to have one 'unsigned long' integer called 'id', which should be unique among the targets of same monitoring context. Meaning of it is, however, totally up to the monitoring primitives that registered to the monitoring context. For example, the virtual address spaces monitoring primitives treats the id as a 'struct pid' pointer. This makes the code flexible, but ugly, not well-documented, and type-unsafe[1]. Also, identification of each target can be done via its index. For the reason, this commit removes the concept and uses clear type definition. For now, only 'struct pid' pointer is used for the virtual address spaces monitoring. If DAMON is extended in future so that we need to put another identifier field in the struct, we will use a union for such primitives-dependent fields and document which primitives are using which type. [1] https://lore.kernel.org/linux-mm/20211013154535.4aaeaaf9d0182922e405dd1e@linux-foundation.org/ Link: https://lkml.kernel.org/r/20211230100723.2238-5-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index bd021af5db3d..7c1d915b3587 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -60,19 +60,18 @@ struct damon_region { /** * struct damon_target - Represents a monitoring target. - * @id: Unique identifier for this target. + * @pid: The PID of the virtual address space to monitor. * @nr_regions: Number of monitoring target regions of this target. * @regions_list: Head of the monitoring target regions of this target. * @list: List head for siblings. * * Each monitoring context could have multiple targets. For example, a context * for virtual memory address spaces could have multiple target processes. The - * @id of each target should be unique among the targets of the context. For - * example, in the virtual address monitoring context, it could be a pidfd or - * an address of an mm_struct. + * @pid should be set for appropriate address space monitoring primitives + * including the virtual address spaces monitoring primitives. */ struct damon_target { - unsigned long id; + struct pid *pid; unsigned int nr_regions; struct list_head regions_list; struct list_head list; @@ -475,7 +474,7 @@ struct damos *damon_new_scheme( void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); void damon_destroy_scheme(struct damos *s); -struct damon_target *damon_new_target(unsigned long id); +struct damon_target *damon_new_target(void); void damon_add_target(struct damon_ctx *ctx, struct damon_target *t); bool damon_targets_empty(struct damon_ctx *ctx); void damon_free_target(struct damon_target *t); -- cgit v1.2.3 From f7d911c39cbbb88d625216a0cfd0517a3047c46e Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 22 Mar 2022 14:48:46 -0700 Subject: mm/damon: rename damon_primitives to damon_operations Patch series "Allow DAMON user code independent of monitoring primitives". In-kernel DAMON user code is required to configure the monitoring context (struct damon_ctx) with proper monitoring primitives (struct damon_primitive). This makes the user code dependent to all supporting monitoring primitives. For example, DAMON debugfs interface depends on both DAMON_VADDR and DAMON_PADDR, though some users have interest in only one use case. As more monitoring primitives are introduced, the problem will be bigger. To minimize such unnecessary dependency, this patchset makes monitoring primitives can be registered by the implemnting code and later dynamically searched and selected by the user code. In addition to that, this patchset renames monitoring primitives to monitoring operations, which is more easy to intuitively understand what it means and how it would be structed. This patch (of 8): DAMON has a set of callback functions called monitoring primitives and let it can be configured with various implementations for easy extension for different address spaces and usages. However, the word 'primitive' is not so explicit. Meanwhile, many other structs resembles similar purpose calls themselves 'operations'. To make the code easier to be understood, this commit renames 'damon_primitives' to 'damon_operations' before it is too late to rename. Link: https://lkml.kernel.org/r/20220215184603.1479-1-sj@kernel.org Link: https://lkml.kernel.org/r/20220215184603.1479-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Xin Hao Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 7c1d915b3587..00baeb42c18e 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -67,8 +67,8 @@ struct damon_region { * * Each monitoring context could have multiple targets. For example, a context * for virtual memory address spaces could have multiple target processes. The - * @pid should be set for appropriate address space monitoring primitives - * including the virtual address spaces monitoring primitives. + * @pid should be set for appropriate &struct damon_operations including the + * virtual address spaces monitoring operations. */ struct damon_target { struct pid *pid; @@ -120,9 +120,9 @@ enum damos_action { * uses smaller one as the effective quota. * * For selecting regions within the quota, DAMON prioritizes current scheme's - * target memory regions using the &struct damon_primitive->get_scheme_score. + * target memory regions using the &struct damon_operations->get_scheme_score. * You could customize the prioritization logic by setting &weight_sz, - * &weight_nr_accesses, and &weight_age, because monitoring primitives are + * &weight_nr_accesses, and &weight_age, because monitoring operations are * encouraged to respect those. */ struct damos_quota { @@ -256,10 +256,10 @@ struct damos { struct damon_ctx; /** - * struct damon_primitive - Monitoring primitives for given use cases. + * struct damon_operations - Monitoring operations for given use cases. * - * @init: Initialize primitive-internal data structures. - * @update: Update primitive-internal data structures. + * @init: Initialize operations-related data structures. + * @update: Update operations-related data structures. * @prepare_access_checks: Prepare next access check of target regions. * @check_accesses: Check the accesses to target regions. * @reset_aggregated: Reset aggregated accesses monitoring results. @@ -269,18 +269,18 @@ struct damon_ctx; * @cleanup: Clean up the context. * * DAMON can be extended for various address spaces and usages. For this, - * users should register the low level primitives for their target address - * space and usecase via the &damon_ctx.primitive. Then, the monitoring thread + * users should register the low level operations for their target address + * space and usecase via the &damon_ctx.ops. Then, the monitoring thread * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting - * the monitoring, @update after each &damon_ctx.primitive_update_interval, and + * the monitoring, @update after each &damon_ctx.ops_update_interval, and * @check_accesses, @target_valid and @prepare_access_checks after each * &damon_ctx.sample_interval. Finally, @reset_aggregated is called after each * &damon_ctx.aggr_interval. * - * @init should initialize primitive-internal data structures. For example, + * @init should initialize operations-related data structures. For example, * this could be used to construct proper monitoring target regions and link * those to @damon_ctx.adaptive_targets. - * @update should update the primitive-internal data structures. For example, + * @update should update the operations-related data structures. For example, * this could be used to update monitoring target regions for current status. * @prepare_access_checks should manipulate the monitoring regions to be * prepared for the next access check. @@ -300,7 +300,7 @@ struct damon_ctx; * monitoring. * @cleanup is called from @kdamond just before its termination. */ -struct damon_primitive { +struct damon_operations { void (*init)(struct damon_ctx *context); void (*update)(struct damon_ctx *context); void (*prepare_access_checks)(struct damon_ctx *context); @@ -354,15 +354,15 @@ struct damon_callback { * * @sample_interval: The time between access samplings. * @aggr_interval: The time between monitor results aggregations. - * @primitive_update_interval: The time between monitoring primitive updates. + * @ops_update_interval: The time between monitoring operations updates. * * For each @sample_interval, DAMON checks whether each region is accessed or * not. It aggregates and keeps the access information (number of accesses to * each region) for @aggr_interval time. DAMON also checks whether the target * memory regions need update (e.g., by ``mmap()`` calls from the application, * in case of virtual memory monitoring) and applies the changes for each - * @primitive_update_interval. All time intervals are in micro-seconds. - * Please refer to &struct damon_primitive and &struct damon_callback for more + * @ops_update_interval. All time intervals are in micro-seconds. + * Please refer to &struct damon_operations and &struct damon_callback for more * detail. * * @kdamond: Kernel thread who does the monitoring. @@ -374,7 +374,7 @@ struct damon_callback { * * Once started, the monitoring thread runs until explicitly required to be * terminated or every monitoring target is invalid. The validity of the - * targets is checked via the &damon_primitive.target_valid of @primitive. The + * targets is checked via the &damon_operations.target_valid of @ops. The * termination can also be explicitly requested by writing non-zero to * @kdamond_stop. The thread sets @kdamond to NULL when it terminates. * Therefore, users can know whether the monitoring is ongoing or terminated by @@ -384,7 +384,7 @@ struct damon_callback { * Note that the monitoring thread protects only @kdamond and @kdamond_stop via * @kdamond_lock. Accesses to other fields must be protected by themselves. * - * @primitive: Set of monitoring primitives for given use cases. + * @ops: Set of monitoring operations for given use cases. * @callback: Set of callbacks for monitoring events notifications. * * @min_nr_regions: The minimum number of adaptive monitoring regions. @@ -395,17 +395,17 @@ struct damon_callback { struct damon_ctx { unsigned long sample_interval; unsigned long aggr_interval; - unsigned long primitive_update_interval; + unsigned long ops_update_interval; /* private: internal use only */ struct timespec64 last_aggregation; - struct timespec64 last_primitive_update; + struct timespec64 last_ops_update; /* public: */ struct task_struct *kdamond; struct mutex kdamond_lock; - struct damon_primitive primitive; + struct damon_operations ops; struct damon_callback callback; unsigned long min_nr_regions; @@ -484,7 +484,7 @@ unsigned int damon_nr_regions(struct damon_target *t); struct damon_ctx *damon_new_ctx(void); void damon_destroy_ctx(struct damon_ctx *ctx); int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, - unsigned long aggr_int, unsigned long primitive_upd_int, + unsigned long aggr_int, unsigned long ops_upd_int, unsigned long min_nr_reg, unsigned long max_nr_reg); int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, ssize_t nr_schemes); @@ -497,12 +497,12 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); #ifdef CONFIG_DAMON_VADDR bool damon_va_target_valid(void *t); -void damon_va_set_primitives(struct damon_ctx *ctx); +void damon_va_set_operations(struct damon_ctx *ctx); #endif /* CONFIG_DAMON_VADDR */ #ifdef CONFIG_DAMON_PADDR bool damon_pa_target_valid(void *t); -void damon_pa_set_primitives(struct damon_ctx *ctx); +void damon_pa_set_operations(struct damon_ctx *ctx); #endif /* CONFIG_DAMON_PADDR */ #endif /* _DAMON_H */ -- cgit v1.2.3 From 9f7b053a0f6121f89e00d1688bfca0bf278caa25 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 22 Mar 2022 14:48:49 -0700 Subject: mm/damon: let monitoring operations can be registered and selected In-kernel DAMON user code like DAMON debugfs interface should set 'struct damon_operations' of its 'struct damon_ctx' on its own. Therefore, the client code should depend on all supporting monitoring operations implementations that it could use. For example, DAMON debugfs interface depends on both vaddr and paddr, while some of the users are not always interested in both. To minimize such unnecessary dependencies, this commit makes the monitoring operations can be registered by implementing code and then dynamically selected by the user code without build-time dependency. Link: https://lkml.kernel.org/r/20220215184603.1479-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: David Rientjes Cc: Xin Hao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 00baeb42c18e..076da277b249 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -253,11 +253,24 @@ struct damos { struct list_head list; }; +/** + * enum damon_ops_id - Identifier for each monitoring operations implementation + * + * @DAMON_OPS_VADDR: Monitoring operations for virtual address spaces + * @DAMON_OPS_PADDR: Monitoring operations for the physical address space + */ +enum damon_ops_id { + DAMON_OPS_VADDR, + DAMON_OPS_PADDR, + NR_DAMON_OPS, +}; + struct damon_ctx; /** * struct damon_operations - Monitoring operations for given use cases. * + * @id: Identifier of this operations set. * @init: Initialize operations-related data structures. * @update: Update operations-related data structures. * @prepare_access_checks: Prepare next access check of target regions. @@ -277,6 +290,8 @@ struct damon_ctx; * &damon_ctx.sample_interval. Finally, @reset_aggregated is called after each * &damon_ctx.aggr_interval. * + * Each &struct damon_operations instance having valid @id can be registered + * via damon_register_ops() and selected by damon_select_ops() later. * @init should initialize operations-related data structures. For example, * this could be used to construct proper monitoring target regions and link * those to @damon_ctx.adaptive_targets. @@ -301,6 +316,7 @@ struct damon_ctx; * @cleanup is called from @kdamond just before its termination. */ struct damon_operations { + enum damon_ops_id id; void (*init)(struct damon_ctx *context); void (*update)(struct damon_ctx *context); void (*prepare_access_checks)(struct damon_ctx *context); @@ -489,6 +505,8 @@ int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, ssize_t nr_schemes); int damon_nr_running_ctxs(void); +int damon_register_ops(struct damon_operations *ops); +int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id); int damon_start(struct damon_ctx **ctxs, int nr_ctxs); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); -- cgit v1.2.3 From 851040566a008f7248cb754d5bb9a3e34f2effe5 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 22 Mar 2022 14:49:07 -0700 Subject: mm/damon/paddr,vaddr: remove damon_{p,v}a_{target_valid,set_operations}() Because DAMON debugfs interface and DAMON-based proactive reclaim are now using monitoring operations via registration mechanism, damon_{p,v}a_{target_valid,set_operations}() functions have no user. This commit clean them up. Link: https://lkml.kernel.org/r/20220215184603.1479-9-sj@kernel.org Signed-off-by: SeongJae Park Cc: David Rientjes Cc: Xin Hao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 076da277b249..49c4a11ecf20 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -513,14 +513,4 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); #endif /* CONFIG_DAMON */ -#ifdef CONFIG_DAMON_VADDR -bool damon_va_target_valid(void *t); -void damon_va_set_operations(struct damon_ctx *ctx); -#endif /* CONFIG_DAMON_VADDR */ - -#ifdef CONFIG_DAMON_PADDR -bool damon_pa_target_valid(void *t); -void damon_pa_set_operations(struct damon_ctx *ctx); -#endif /* CONFIG_DAMON_PADDR */ - #endif /* _DAMON_H */ -- cgit v1.2.3 From 8b9b0d335a345cbc590baa5aa8aa02c467c1e4e8 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 22 Mar 2022 14:49:21 -0700 Subject: mm/damon/core: allow non-exclusive DAMON start/stop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "Introduce DAMON sysfs interface", v3. Introduction ============ DAMON's debugfs-based user interface (DAMON_DBGFS) served very well, so far. However, it unnecessarily depends on debugfs, while DAMON is not aimed to be used for only debugging. Also, the interface receives multiple values via one file. For example, schemes file receives 18 values. As a result, it is inefficient, hard to be used, and difficult to be extended. Especially, keeping backward compatibility of user space tools is getting only challenging. It would be better to implement another reliable and flexible interface and deprecate DAMON_DBGFS in long term. For the reason, this patchset introduces a sysfs-based new user interface of DAMON. The idea of the new interface is, using directory hierarchies and having one dedicated file for each value. For a short example, users can do the virtual address monitoring via the interface as below: # cd /sys/kernel/mm/damon/admin/ # echo 1 > kdamonds/nr_kdamonds # echo 1 > kdamonds/0/contexts/nr_contexts # echo vaddr > kdamonds/0/contexts/0/operations # echo 1 > kdamonds/0/contexts/0/targets/nr_targets # echo $(pidof ) > kdamonds/0/contexts/0/targets/0/pid_target # echo on > kdamonds/0/state A brief representation of the files hierarchy of DAMON sysfs interface is as below. Childs are represented with indentation, directories are having '/' suffix, and files in each directory are separated by comma. /sys/kernel/mm/damon/admin │ kdamonds/nr_kdamonds │ │ 0/state,pid │ │ │ contexts/nr_contexts │ │ │ │ 0/operations │ │ │ │ │ monitoring_attrs/ │ │ │ │ │ │ intervals/sample_us,aggr_us,update_us │ │ │ │ │ │ nr_regions/min,max │ │ │ │ │ targets/nr_targets │ │ │ │ │ │ 0/pid_target │ │ │ │ │ │ │ regions/nr_regions │ │ │ │ │ │ │ │ 0/start,end │ │ │ │ │ │ │ │ ... │ │ │ │ │ │ ... │ │ │ │ │ schemes/nr_schemes │ │ │ │ │ │ 0/action │ │ │ │ │ │ │ access_pattern/ │ │ │ │ │ │ │ │ sz/min,max │ │ │ │ │ │ │ │ nr_accesses/min,max │ │ │ │ │ │ │ │ age/min,max │ │ │ │ │ │ │ quotas/ms,bytes,reset_interval_ms │ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil │ │ │ │ │ │ │ watermarks/metric,interval_us,high,mid,low │ │ │ │ │ │ │ stats/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds │ │ │ │ │ │ ... │ │ │ │ ... │ │ ... Detailed usage of the files will be described in the final Documentation patch of this patchset. Main Difference Between DAMON_DBGFS and DAMON_SYSFS --------------------------------------------------- At the moment, DAMON_DBGFS and DAMON_SYSFS provides same features. One important difference between them is their exclusiveness. DAMON_DBGFS works in an exclusive manner, so that no DAMON worker thread (kdamond) in the system can run concurrently and interfere somehow. For the reason, DAMON_DBGFS asks users to construct all monitoring contexts and start them at once. It's not a big problem but makes the operation a little bit complex and unflexible. For more flexible usage, DAMON_SYSFS moves the responsibility of preventing any possible interference to the admins and work in a non-exclusive manner. That is, users can configure and start contexts one by one. Note that DAMON respects both exclusive groups and non-exclusive groups of contexts, in a manner similar to that of reader-writer locks. That is, if any exclusive monitoring contexts (e.g., contexts that started via DAMON_DBGFS) are running, DAMON_SYSFS does not start new contexts, and vice versa. Future Plan of DAMON_DBGFS Deprecation ====================================== Once this patchset is merged, DAMON_DBGFS development will be frozen. That is, we will maintain it to work as is now so that no users will be break. But, it will not be extended to provide any new feature of DAMON. The support will be continued only until next LTS release. After that, we will drop DAMON_DBGFS. User-space Tooling Compatibility -------------------------------- As DAMON_SYSFS provides all features of DAMON_DBGFS, all user space tooling can move to DAMON_SYSFS. As we will continue supporting DAMON_DBGFS until next LTS kernel release, user space tools would have enough time to move to DAMON_SYSFS. The official user space tool, damo[1], is already supporting both DAMON_SYSFS and DAMON_DBGFS. Both correctness tests[2] and performance tests[3] of DAMON using DAMON_SYSFS also passed. [1] https://github.com/awslabs/damo [2] https://github.com/awslabs/damon-tests/tree/master/corr [3] https://github.com/awslabs/damon-tests/tree/master/perf Sequence of Patches =================== First two patches (patches 1-2) make core changes for DAMON_SYSFS. The first one (patch 1) allows non-exclusive DAMON contexts so that DAMON_SYSFS can work in non-exclusive mode, while the second one (patch 2) adds size of DAMON enum types so that DAMON API users can safely iterate the enums. Third patch (patch 3) implements basic sysfs stub for virtual address spaces monitoring. Note that this implements only sysfs files and DAMON is not linked. Fourth patch (patch 4) links the DAMON_SYSFS to DAMON so that users can control DAMON using the sysfs files. Following six patches (patches 5-10) implements other DAMON features that DAMON_DBGFS supports one by one (physical address space monitoring, DAMON-based operation schemes, schemes quotas, schemes prioritization weights, schemes watermarks, and schemes stats). Following patch (patch 11) adds a simple selftest for DAMON_SYSFS, and the final one (patch 12) documents DAMON_SYSFS. This patch (of 13): To avoid interference between DAMON contexts monitoring overlapping memory regions, damon_start() works in an exclusive manner. That is, damon_start() does nothing bug fails if any context that started by another instance of the function is still running. This makes its usage a little bit restrictive. However, admins could aware each DAMON usage and address such interferences on their own in some cases. This commit hence implements non-exclusive mode of the function and allows the callers to select the mode. Note that the exclusive groups and non-exclusive groups of contexts will respect each other in a manner similar to that of reader-writer locks. Therefore, this commit will not cause any behavioral change to the exclusive groups. Link: https://lkml.kernel.org/r/20220228081314.5770-1-sj@kernel.org Link: https://lkml.kernel.org/r/20220228081314.5770-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Cc: David Rientjes Cc: Xin Hao Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 49c4a11ecf20..f8e99e47d747 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -508,7 +508,7 @@ int damon_nr_running_ctxs(void); int damon_register_ops(struct damon_operations *ops); int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id); -int damon_start(struct damon_ctx **ctxs, int nr_ctxs); +int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); #endif /* CONFIG_DAMON */ -- cgit v1.2.3 From 5257f36ec289d544532f2889cbed11abbb06cf0c Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 22 Mar 2022 14:49:24 -0700 Subject: mm/damon/core: add number of each enum type values This commit declares the number of legal values for each DAMON enum types to make traversals of such DAMON enum types easy and safe. Link: https://lkml.kernel.org/r/20220228081314.5770-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: David Rientjes Cc: Greg Kroah-Hartman Cc: Jonathan Corbet Cc: Shuah Khan Cc: Xin Hao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index f8e99e47d747..f23cbfa4248d 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -87,6 +87,7 @@ struct damon_target { * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. * @DAMOS_STAT: Do nothing but count the stat. + * @NR_DAMOS_ACTIONS: Total number of DAMOS actions */ enum damos_action { DAMOS_WILLNEED, @@ -95,6 +96,7 @@ enum damos_action { DAMOS_HUGEPAGE, DAMOS_NOHUGEPAGE, DAMOS_STAT, /* Do nothing but only record the stat */ + NR_DAMOS_ACTIONS, }; /** @@ -157,10 +159,12 @@ struct damos_quota { * * @DAMOS_WMARK_NONE: Ignore the watermarks of the given scheme. * @DAMOS_WMARK_FREE_MEM_RATE: Free memory rate of the system in [0,1000]. + * @NR_DAMOS_WMARK_METRICS: Total number of DAMOS watermark metrics */ enum damos_wmark_metric { DAMOS_WMARK_NONE, DAMOS_WMARK_FREE_MEM_RATE, + NR_DAMOS_WMARK_METRICS, }; /** -- cgit v1.2.3 From 2af7e566a8616c278e1d7287ce86cd3900bed943 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 22 Mar 2022 10:22:24 -0700 Subject: net/mlx5e: Fix build warning, detected write beyond size of field When merged with Linus tree, the cited patch below will cause the following build warning: In function 'fortify_memset_chk', inlined from 'mlx5e_xmit_xdp_frame' at drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c:438:3: include/linux/fortify-string.h:242:25: error: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror=attribute-warning] 242 | __write_overflow_field(p_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix that by grouping the fields to memeset in struct_group() to avoid the false alarm. Fixes: 9ded70fa1d81 ("net/mlx5e: Don't prefill WQEs in XDP SQ in the multi buffer mode") Reported-by: Stephen Rothwell Suggested-by: Stephen Rothwell Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20220322172224.31849-1-saeed@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/mlx5/qp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 61e48d459b23..8bda3ba5b109 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -202,6 +202,9 @@ struct mlx5_wqe_fmr_seg { struct mlx5_wqe_ctrl_seg { __be32 opmod_idx_opcode; __be32 qpn_ds; + + struct_group(trailer, + u8 signature; u8 rsvd[2]; u8 fm_ce_se; @@ -211,6 +214,8 @@ struct mlx5_wqe_ctrl_seg { __be32 umr_mkey; __be32 tis_tir_num; }; + + ); /* end of trailer group */ }; #define MLX5_WQE_CTRL_DS_MASK 0x3f -- cgit v1.2.3 From d578c770c85233af592e54537f93f3831bde7e9a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 23 Mar 2022 09:13:08 +0800 Subject: block: avoid calling blkg_free() in atomic context blkg_free() can currently be called in atomic context, either spin lock is held, or run in rcu callback. Meantime either request queue's release handler or ->pd_free_fn can sleep. Fix the issue by scheduling a work function for freeing blkcg_gq the instance. [ 148.553894] BUG: sleeping function called from invalid context at block/blk-sysfs.c:767 [ 148.557381] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 0, name: swapper/13 [ 148.560741] preempt_count: 101, expected: 0 [ 148.562577] RCU nest depth: 0, expected: 0 [ 148.564379] 1 lock held by swapper/13/0: [ 148.566127] #0: ffffffff82615f80 (rcu_callback){....}-{0:0}, at: rcu_lock_acquire+0x0/0x1b [ 148.569640] Preemption disabled at: [ 148.569642] [] ___slab_alloc+0x554/0x661 [ 148.573559] CPU: 13 PID: 0 Comm: swapper/13 Kdump: loaded Not tainted 5.17.0_up+ #110 [ 148.576834] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-1.fc33 04/01/2014 [ 148.579768] Call Trace: [ 148.580567] [ 148.581262] dump_stack_lvl+0x56/0x7c [ 148.582367] ? ___slab_alloc+0x554/0x661 [ 148.583526] __might_resched+0x1af/0x1c8 [ 148.584678] blk_release_queue+0x24/0x109 [ 148.585861] kobject_cleanup+0xc9/0xfe [ 148.586979] blkg_free+0x46/0x63 [ 148.587962] rcu_do_batch+0x1c5/0x3db [ 148.589057] rcu_core+0x14a/0x184 [ 148.590065] __do_softirq+0x14d/0x2c7 [ 148.591167] __irq_exit_rcu+0x7a/0xd4 [ 148.592264] sysvec_apic_timer_interrupt+0x82/0xa5 [ 148.593649] [ 148.594354] [ 148.595058] asm_sysvec_apic_timer_interrupt+0x12/0x20 Cc: Tejun Heo Fixes: 0a9a25ca7843 ("block: let blkcg_gq grab request queue's refcnt") Reported-by: Christoph Hellwig Link: https://lore.kernel.org/linux-block/20220322093322.GA27283@lst.de/ Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220323011308.2010380-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index f2ad8ed8f777..652cd05b0924 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -95,7 +95,10 @@ struct blkcg_gq { spinlock_t async_bio_lock; struct bio_list async_bios; - struct work_struct async_bio_work; + union { + struct work_struct async_bio_work; + struct work_struct free_work; + }; atomic_t use_delay; atomic64_t delay_nsec; -- cgit v1.2.3 From 553f685ebf9689e701ec6b95c0ca3595cf1a743f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 11 Mar 2022 20:55:18 +0800 Subject: mfd: db8500-prcmu: Remove unused inline function commit b0e846248de5 ("mfd: db8500-prcmu: Remove dead code for a non-existing config") left behind this, remove it. Signed-off-by: YueHaibing Reviewed-by: Linus Walleij Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220311125518.31064-1-yuehaibing@huawei.com --- include/linux/mfd/dbx500-prcmu.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h index 2a255362b5ac..e7a7e70fdb38 100644 --- a/include/linux/mfd/dbx500-prcmu.h +++ b/include/linux/mfd/dbx500-prcmu.h @@ -561,10 +561,6 @@ static inline unsigned long prcmu_qos_get_cpufreq_opp_delay(void) return 0; } -static inline void prcmu_qos_set_cpufreq_opp_delay(unsigned long n) {} - -static inline void prcmu_qos_force_opp(int prcmu_qos_class, s32 i) {} - static inline int prcmu_qos_requirement(int prcmu_qos_class) { return 0; -- cgit v1.2.3 From 30d024b5058e0433914022f87d917a97a9527632 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 23 Mar 2022 15:35:10 +1200 Subject: cacheflush.h: Add forward declaration for struct folio The struct folio is not declared in cacheflush.h so we need to provide a forward declaration as otherwise users of this header file may get warnings. Reported-by: Guenter Roeck Fixes: 522a0032af00 ("Add linux/cacheflush.h") Signed-off-by: Herbert Xu Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Linus Torvalds --- include/linux/cacheflush.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h index fef8b607f97e..a6189d21f2ba 100644 --- a/include/linux/cacheflush.h +++ b/include/linux/cacheflush.h @@ -4,6 +4,8 @@ #include +struct folio; + #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO void flush_dcache_folio(struct folio *folio); -- cgit v1.2.3 From 054d5575cd6ed2792611a7cbb8c88663cc873780 Mon Sep 17 00:00:00 2001 From: Louis Peens Date: Wed, 23 Mar 2022 11:25:06 +0200 Subject: net/sched: fix incorrect vlan_push_eth dest field Seems like a potential copy-paste bug slipped in here, the second memcpy should of course be populating src and not dest. Fixes: ab95465cde23 ("net/sched: add vlan push_eth and pop_eth action to the hardware IR") Signed-off-by: Louis Peens Link: https://lore.kernel.org/r/20220323092506.21639-1-louis.peens@corigine.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_vlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index a97600f742de..904eddfc1826 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -84,7 +84,7 @@ static inline void tcf_vlan_push_eth(unsigned char *src, unsigned char *dest, { rcu_read_lock(); memcpy(dest, rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_dst, ETH_ALEN); - memcpy(dest, rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_src, ETH_ALEN); + memcpy(src, rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_src, ETH_ALEN); rcu_read_unlock(); } -- cgit v1.2.3 From d91612d7f01aca454469976d25db761c5085ae4d Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 2 Feb 2022 20:17:35 -0600 Subject: clk: sunxi-ng: Add support for the sun6i RTC clocks The RTC power domain in sun6i and newer SoCs manages the 16 MHz RC oscillator (called "IOSC" or "osc16M") and the optional 32 kHz crystal oscillator (called "LOSC" or "osc32k"). Starting with the H6, this power domain also handles the 24 MHz DCXO (called variously "HOSC", "dcxo24M", or "osc24M") as well. The H6 also adds a calibration circuit for IOSC. Later SoCs introduce further variations on the design: - H616 adds an additional mux for the 32 kHz fanout source. - R329 adds an additional mux for the RTC timekeeping clock, a clock for the SPI bus between power domains inside the RTC, and removes the IOSC calibration functionality. Take advantage of the CCU framework to handle this increased complexity. This driver is intended to be a drop-in replacement for the existing RTC clock provider. So some runtime adjustment of the clock parents is needed, both to handle hardware differences, and to support the old binding which omitted some of the input clocks. Signed-off-by: Samuel Holland Acked-by: Stephen Boyd Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220203021736.13434-6-samuel@sholland.org --- include/linux/clk/sunxi-ng.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/clk/sunxi-ng.h b/include/linux/clk/sunxi-ng.h index cf32123b39f5..57c8ec44ab4e 100644 --- a/include/linux/clk/sunxi-ng.h +++ b/include/linux/clk/sunxi-ng.h @@ -9,4 +9,6 @@ int sunxi_ccu_set_mmc_timing_mode(struct clk *clk, bool new_mode); int sunxi_ccu_get_mmc_timing_mode(struct clk *clk); +int sun6i_rtc_ccu_probe(struct device *dev, void __iomem *reg); + #endif -- cgit v1.2.3 From 5c0a04a663019dd14cb000d370cff0ced6df7ef1 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 9 Mar 2022 17:22:33 +0100 Subject: rtc: ds1685: drop no_irq No platforms are currently setting no_irq. Anyway, letting platform_get_irq fail is fine as this means that there is no IRQ. In that case, clear RTC_FEATURE_ALARM so the core knows there are no alarms. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220309162301.61679-2-alexandre.belloni@bootlin.com --- include/linux/rtc/ds1685.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/rtc/ds1685.h b/include/linux/rtc/ds1685.h index 67ee9d20cc5a..5a41c3bbcbe3 100644 --- a/include/linux/rtc/ds1685.h +++ b/include/linux/rtc/ds1685.h @@ -46,7 +46,6 @@ struct ds1685_priv { u32 regstep; int irq_num; bool bcd_mode; - bool no_irq; u8 (*read)(struct ds1685_priv *, int); void (*write)(struct ds1685_priv *, int, u8); void (*prepare_poweroff)(void); -- cgit v1.2.3 From e99653afeb9585350644c5ae4b0ca987cbe8d053 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 9 Mar 2022 17:22:53 +0100 Subject: rtc: add new RTC_FEATURE_ALARM_WAKEUP_ONLY feature Some RTCs have an IRQ pin that is not connected to a CPU interrupt but rather directly to a PMIC or power supply. In that case, it is still useful to be able to set alarms but we shouldn't expect interrupts. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220309162301.61679-22-alexandre.belloni@bootlin.com --- include/uapi/linux/rtc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index 03e5b776e597..97aca4503a6a 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -133,7 +133,8 @@ struct rtc_param { #define RTC_FEATURE_UPDATE_INTERRUPT 4 #define RTC_FEATURE_CORRECTION 5 #define RTC_FEATURE_BACKUP_SWITCH_MODE 6 -#define RTC_FEATURE_CNT 7 +#define RTC_FEATURE_ALARM_WAKEUP_ONLY 7 +#define RTC_FEATURE_CNT 8 /* parameter list */ #define RTC_PARAM_FEATURES 0 -- cgit v1.2.3 From 1a31d63632553a54af6c0c3c5b5930e931a94ee4 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 9 Mar 2022 17:23:00 +0100 Subject: rtc: remove uie_unsupported uie_unsupported is not used by any drivers anymore, remove it. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220309162301.61679-29-alexandre.belloni@bootlin.com --- include/linux/rtc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 47fd1c2d3a57..1fd9c6a21ebe 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -110,8 +110,6 @@ struct rtc_device { struct hrtimer pie_timer; /* sub second exp, so needs hrtimer */ int pie_enabled; struct work_struct irqwork; - /* Some hardware can't support UIE mode */ - int uie_unsupported; /* * This offset specifies the update timing of the RTC. -- cgit v1.2.3 From de7a9e949f4f094741f708cd05572f932d009d02 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Wed, 23 Mar 2022 22:15:49 +0530 Subject: drivers/nvdimm: Fix build failure when CONFIG_PERF_EVENTS is not set The following build failure occurs when CONFIG_PERF_EVENTS is not set as generic pmu functions are not visible in that scenario. |-- s390-randconfig-r044-20220313 | |-- nd_perf.c:(.text):undefined-reference-to-perf_pmu_migrate_context | |-- nd_perf.c:(.text):undefined-reference-to-perf_pmu_register | `-- nd_perf.c:(.text):undefined-reference-to-perf_pmu_unregister Similar build failure in nds32 architecture: nd_perf.c:(.text+0x21e): undefined reference to `perf_pmu_migrate_context' nd_perf.c:(.text+0x434): undefined reference to `perf_pmu_register' nd_perf.c:(.text+0x57c): undefined reference to `perf_pmu_unregister' Fix this issue by adding check for CONFIG_PERF_EVENTS config option and disabling the nvdimm perf interface incase this config is not set. Also remove function declaration of perf_pmu_migrate_context, perf_pmu_register, perf_pmu_unregister functions from nd.h as these are common pmu functions which are part of perf_event.h and since we are disabling nvdimm perf interface incase CONFIG_PERF_EVENTS option is not set, we not need to declare them in nd.h Also move the platform_device header file addition part from nd.h to nd_perf.c and add stub functions for register_nvdimm_pmu and unregister_nvdimm_pmu functions to handle CONFIG_PERF_EVENTS=n case. Fixes: 0fab1ba6ad6b ("drivers/nvdimm: Add perf interface to expose nvdimm performance stats") (Commit id based on libnvdimm-for-next tree) Signed-off-by: Kajol Jain Link: https://lore.kernel.org/all/62317124.YBQFU33+s%2FwdvWGj%25lkp@intel.com/ Reported-by: kernel test robot Link: https://lore.kernel.org/r/20220323164550.109768-1-kjain@linux.ibm.com Signed-off-by: Dan Williams --- include/linux/nd.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/nd.h b/include/linux/nd.h index 7b2ccbdc1cbc..b9771ba1ef87 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -9,7 +9,6 @@ #include #include #include -#include enum nvdimm_event { NVDIMM_REVALIDATE_POISON, @@ -57,15 +56,24 @@ struct nvdimm_pmu { struct cpumask arch_cpumask; }; +struct platform_device; + +#ifdef CONFIG_PERF_EVENTS extern ssize_t nvdimm_events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); int register_nvdimm_pmu(struct nvdimm_pmu *nvdimm, struct platform_device *pdev); void unregister_nvdimm_pmu(struct nvdimm_pmu *nd_pmu); -void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu); -int perf_pmu_register(struct pmu *pmu, const char *name, int type); -void perf_pmu_unregister(struct pmu *pmu); + +#else +static inline int register_nvdimm_pmu(struct nvdimm_pmu *nvdimm, struct platform_device *pdev) +{ + return -ENXIO; +} + +static inline void unregister_nvdimm_pmu(struct nvdimm_pmu *nd_pmu) { } +#endif struct nd_device_driver { struct device_driver drv; -- cgit v1.2.3 From c724c866bb70cb8c607081a26823a1f0ebde4387 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 23 Mar 2022 16:05:29 -0700 Subject: linux/types.h: remove unnecessary __bitwise__ There are no users of "__bitwise__" except the definition of "__bitwise". Remove __bitwise__ and define __bitwise directly. This is a follow-up to 05de97003c77 ("linux/types.h: enable endian checks for all sparse builds"). [akpm@linux-foundation.org: change the tools/include/linux/types.h definition also] Link: https://lkml.kernel.org/r/20220310220927.245704-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Cc: Michael S. Tsirkin Cc: Jonathan Corbet Cc: Nathan Chancellor Cc: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/types.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index f6d2f83cbe29..71696f424ac8 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -20,11 +20,10 @@ */ #ifdef __CHECKER__ -#define __bitwise__ __attribute__((bitwise)) +#define __bitwise __attribute__((bitwise)) #else -#define __bitwise__ +#define __bitwise #endif -#define __bitwise __bitwise__ typedef __u16 __bitwise __le16; typedef __u16 __bitwise __be16; -- cgit v1.2.3 From 179fd6ba3bacbf7b19cbdf9b14be109d54318394 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 23 Mar 2022 16:05:32 -0700 Subject: Documentation/sparse: add hints about __CHECKER__ Several attributes depend on __CHECKER__, but previously there was no clue in the tree about when __CHECKER__ might be defined. Add hints at the most common places (__kernel, __user, __iomem, __bitwise) and in the sparse documentation. Link: https://lkml.kernel.org/r/20220310220927.245704-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Cc: Jonathan Corbet Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: "Michael S . Tsirkin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler_types.h | 1 + include/uapi/linux/types.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 3c1795fdb568..4b3915ce38a4 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ +/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ #ifdef __CHECKER__ /* address spaces */ # define __kernel __attribute__((address_space(0))) diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index 71696f424ac8..c4dc597f3dcf 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -19,6 +19,7 @@ * any application/library that wants linux/types.h. */ +/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ #ifdef __CHECKER__ #define __bitwise __attribute__((bitwise)) #else -- cgit v1.2.3 From 14e83077d55ff4b88fe39f5e98fb8230c2ccb4fb Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 23 Mar 2022 16:05:41 -0700 Subject: include: drop pointless __compiler_offsetof indirection (1) compiler_types.h is unconditionally included via an -include flag (see scripts/Makefile.lib), and it defines __compiler_offsetof unconditionally. So testing for definedness of __compiler_offsetof is mostly pointless. (2) Every relevant compiler provides __builtin_offsetof (even sparse has had that for 14 years), and if for whatever reason one would end up picking up the poor man's fallback definition (C file compiler with completely custom CFLAGS?), newer clang versions won't treat the result as an Integer Constant Expression, so if used in place where such is required (static initializer or static_assert), one would get errors like t.c:11:16: error: static_assert expression is not an integral constant expression t.c:11:16: note: cast that performs the conversions of a reinterpret_cast is not allowed in a constant expression t.c:4:33: note: expanded from macro 'offsetof' #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) So just define offsetof unconditionally and directly in terms of __builtin_offsetof. Link: https://lkml.kernel.org/r/20220202102147.326672-1-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Reviewed-by: Miguel Ojeda Reviewed-by: Nathan Chancellor Reviewed-by: Kees Cook Acked-by: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler_types.h | 2 -- include/linux/stddef.h | 6 +----- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 4b3915ce38a4..232dbd97f8b1 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -138,8 +138,6 @@ struct ftrace_likely_data { */ #define __naked __attribute__((__naked__)) notrace -#define __compiler_offsetof(a, b) __builtin_offsetof(a, b) - /* * Prefer gnu_inline, so that extern inline functions do not emit an * externally visible function. This makes extern inline behave as per gnu89 diff --git a/include/linux/stddef.h b/include/linux/stddef.h index ca507bd5f808..929d67710cc5 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -13,11 +13,7 @@ enum { }; #undef offsetof -#ifdef __compiler_offsetof -#define offsetof(TYPE, MEMBER) __compiler_offsetof(TYPE, MEMBER) -#else -#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) -#endif +#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER) /** * sizeof_field() - Report the size of a struct field in bytes -- cgit v1.2.3 From f334f5668bedf7307f6df1d98b14f55902931926 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 23 Mar 2022 16:05:44 -0700 Subject: ilog2: force inlining of __ilog2_u32() and __ilog2_u64() Building a kernel with CONFIG_CC_OPTIMISE_FOR_SIZE leads to __ilog2_u32() being duplicated 50 times and __ilog2_u64() 3 times in vmlinux on a tiny powerpc32 config. __ilog2_u32() being 2 instructions it is not worth being kept out of line, so force inlining. Allthough the u64 version is a bit bigger, there is still a small benefit in keeping it inlined. On a 64 bits config there's a real benefit. With this change the size of vmlinux text is reduced by 1 kbytes, which is approx 50% more than the size of the removed functions. Before the patch there is for instance: c00d2a94 <__ilog2_u32>: c00d2a94: 7c 63 00 34 cntlzw r3,r3 c00d2a98: 20 63 00 1f subfic r3,r3,31 c00d2a9c: 4e 80 00 20 blr c00d36d8 <__order_base_2>: c00d36d8: 28 03 00 01 cmplwi r3,1 c00d36dc: 40 81 00 2c ble c00d3708 <__order_base_2+0x30> c00d36e0: 94 21 ff f0 stwu r1,-16(r1) c00d36e4: 7c 08 02 a6 mflr r0 c00d36e8: 38 63 ff ff addi r3,r3,-1 c00d36ec: 90 01 00 14 stw r0,20(r1) c00d36f0: 4b ff f3 a5 bl c00d2a94 <__ilog2_u32> c00d36f4: 80 01 00 14 lwz r0,20(r1) c00d36f8: 38 63 00 01 addi r3,r3,1 c00d36fc: 7c 08 03 a6 mtlr r0 c00d3700: 38 21 00 10 addi r1,r1,16 c00d3704: 4e 80 00 20 blr c00d3708: 38 60 00 00 li r3,0 c00d370c: 4e 80 00 20 blr With the patch it has become: c00d356c <__order_base_2>: c00d356c: 28 03 00 01 cmplwi r3,1 c00d3570: 40 81 00 14 ble c00d3584 <__order_base_2+0x18> c00d3574: 38 63 ff ff addi r3,r3,-1 c00d3578: 7c 63 00 34 cntlzw r3,r3 c00d357c: 20 63 00 20 subfic r3,r3,32 c00d3580: 4e 80 00 20 blr c00d3584: 38 60 00 00 li r3,0 c00d3588: 4e 80 00 20 blr No more need for __order_base_2() to setup a stack frame and save/restore caller address. And the following 'add 1' is merged in the subtract. Another typical use of it: c080ff28 : ... c080fff8: 7f c3 f3 78 mr r3,r30 c080fffc: 4b 8f 81 f1 bl c01081ec <__ilog2_u32> c0810000: 38 63 ff f2 addi r3,r3,-14 ... Becomes c080ff1c : ... c080ffec: 7f c3 00 34 cntlzw r3,r30 c080fff0: 20 63 00 11 subfic r3,r3,17 ... Here no need to move r30 argument to r3 then substract 14 to result. Just work on r30 and merge the 'sub 14' with the 'sub from 31'. Link: https://lkml.kernel.org/r/803a2ac3d923ebcfd0dd40f5886b05cae7bb0aba.1644243860.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/log2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/log2.h b/include/linux/log2.h index df0b155c2141..9f30d087a128 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -18,7 +18,7 @@ * - the arch is not required to handle n==0 if implementing the fallback */ #ifndef CONFIG_ARCH_HAS_ILOG2_U32 -static inline __attribute__((const)) +static __always_inline __attribute__((const)) int __ilog2_u32(u32 n) { return fls(n) - 1; @@ -26,7 +26,7 @@ int __ilog2_u32(u32 n) #endif #ifndef CONFIG_ARCH_HAS_ILOG2_U64 -static inline __attribute__((const)) +static __always_inline __attribute__((const)) int __ilog2_u64(u64 n) { return fls64(n) - 1; -- cgit v1.2.3 From 25cb5b7ac6a7f7943e22a6831b74a6aa964d2a0c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 23 Mar 2022 16:05:47 -0700 Subject: bitfield: add explicit inclusions to the example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not obvious that bitfield.h doesn't guarantee the bits.h inclusion and the example in the former is confusing. Some developers think that it's okay to just include bitfield.h to get it working. Change example to explicitly include necessary headers in order to avoid confusion. Link: https://lkml.kernel.org/r/20220207123341.47533-1-andriy.shevchenko@linux.intel.com Fixes: 3e9b3112ec74 ("add basic register-field manipulation macros") Depends-on: 8bd9cb51daac ("locking/atomics, asm-generic: Move some macros from to a new file") Signed-off-by: Andy Shevchenko Reported-by: Jan Dąbroś Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitfield.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 6093fa6db260..c9be1657f03d 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -19,6 +19,9 @@ * * Example: * + * #include + * #include + * * #define REG_FIELD_A GENMASK(6, 0) * #define REG_FIELD_B BIT(7) * #define REG_FIELD_C GENMASK(15, 8) -- cgit v1.2.3 From abc7da58c4b388ab9f6a756c0f297c29d3af665f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 23 Mar 2022 16:06:11 -0700 Subject: init.h: improve __setup and early_param documentation Igor noted in [1] that there are quite a few __setup() handling functions that return incorrect values. Doing this can be harmless, but it can also cause strings to be added to init's argument or environment list, polluting them. Since __setup() handling and return values are not documented, first add documentation for that. Also add more documentation for early_param() handling and return values. For __setup() functions, returning 0 (not handled) has questionable value if it is just a malformed option value, as in rodata=junk since returning 0 would just cause "rodata=junk" to be added to init's environment unnecessarily: Run /sbin/init as init process with arguments: /sbin/init with environment: HOME=/ TERM=linux splash=native rodata=junk Also, there are no recommendations on whether to print a warning when an unknown parameter value is seen. I am not addressing that here. [1] lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru Link: https://lkml.kernel.org/r/20220221050852.1147-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Reported-by: Igor Zhbanov Cc: Ingo Molnar Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/init.h b/include/linux/init.h index d82b4b2e1d25..baf0b29a7010 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -320,12 +320,19 @@ struct obs_kernel_param { __aligned(__alignof__(struct obs_kernel_param)) \ = { __setup_str_##unique_id, fn, early } +/* + * NOTE: __setup functions return values: + * @fn returns 1 (or non-zero) if the option argument is "handled" + * and returns 0 if the option argument is "not handled". + */ #define __setup(str, fn) \ __setup_param(str, fn, fn, 0) /* - * NOTE: fn is as per module_param, not __setup! - * Emits warning if fn returns non-zero. + * NOTE: @fn is as per module_param, not __setup! + * I.e., @fn returns 0 for no error or non-zero for error + * (possibly @fn returns a -errno value, but it does not matter). + * Emits warning if @fn returns non-zero. */ #define early_param(str, fn) \ __setup_param(str, fn, fn, 1) -- cgit v1.2.3 From f05fa10901aa4b11a84257ecb7ac7f8fe7c4ee1b Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 23 Mar 2022 16:06:33 -0700 Subject: kexec: make crashk_res, crashk_low_res and crash_notes symbols always visible Patch series "kexec: use IS_ENABLED(CONFIG_KEXEC_CORE) instead of #ifdef", v2. Replace the conditional compilation using "#ifdef CONFIG_KEXEC_CORE" by a check for "IS_ENABLED(CONFIG_KEXEC_CORE)", to simplify the code and increase compile coverage. I only modified x86, arm, arm64 and riscv, other architectures such as sh, powerpc and s390 are better to be kept kexec code as-is so they are not touched. This patch (of 5): Make the forward declarations of crashk_res, crashk_low_res and crash_notes always visible. Code referring to these symbols can then just check for IS_ENABLED(CONFIG_KEXEC_CORE), instead of requiring conditional compilation using an #ifdef, thus preparing to increase compile coverage and simplify the code. Link: https://lkml.kernel.org/r/20211206160514.2000-1-jszhang@kernel.org Link: https://lkml.kernel.org/r/20211206160514.2000-2-jszhang@kernel.org Signed-off-by: Jisheng Zhang Acked-by: Baoquan He Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Eric W. Biederman Cc: Alexandre Ghiti Cc: Palmer Dabbelt Cc: Russell King (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 0c994ae37729..58d1b58a971e 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -20,6 +20,12 @@ #include +/* Location of a reserved region to hold the crash kernel. + */ +extern struct resource crashk_res; +extern struct resource crashk_low_res; +extern note_buf_t __percpu *crash_notes; + #ifdef CONFIG_KEXEC_CORE #include #include @@ -350,12 +356,6 @@ extern int kexec_load_disabled; #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ KEXEC_FILE_NO_INITRAMFS) -/* Location of a reserved region to hold the crash kernel. - */ -extern struct resource crashk_res; -extern struct resource crashk_low_res; -extern note_buf_t __percpu *crash_notes; - /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; -- cgit v1.2.3 From 7ef66d186eb95f987a97fb3329b65c840e2dc9bf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 24 Mar 2022 06:53:18 -0600 Subject: io_uring: remove IORING_CQE_F_MSG This was introduced with the message ring opcode, but isn't strictly required for the request itself. The sender can encode what is needed in user_data, which is passed to the receiver. It's unclear if having a separate flag that essentially says "This CQE did not originate from an SQE on this ring" provides any real utility to applications. While we can always re-introduce a flag to provide this information, we cannot take it away at a later point in time. Remove the flag while we still can, before it's in a released kernel. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index d2be4eb22008..784adc6f6ed2 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -201,11 +201,9 @@ struct io_uring_cqe { * * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries - * IORING_CQE_F_MSG If set, CQE was generated with IORING_OP_MSG_RING */ #define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_MORE (1U << 1) -#define IORING_CQE_F_MSG (1U << 2) enum { IORING_CQE_BUFFER_SHIFT = 16, -- cgit v1.2.3 From d645552e9bd96671079b27015294ec7f9748fa2b Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 24 Mar 2022 15:02:40 +0100 Subject: netfilter: egress: Report interface as outgoing Otherwise packets in egress chains seem like they are being received by the interface, not sent out via it. Fixes: 42df6e1d221dd ("netfilter: Introduce egress hook") Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- include/linux/netfilter_netdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index e6487a691136..8676316547cc 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -99,7 +99,7 @@ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, return skb; nf_hook_state_init(&state, NF_NETDEV_EGRESS, - NFPROTO_NETDEV, dev, NULL, NULL, + NFPROTO_NETDEV, NULL, dev, NULL, dev_net(dev), NULL); /* nf assumes rcu_read_lock, not just read_lock_bh */ -- cgit v1.2.3 From bb43b14b576228c580bdc7e1aeeded54d540b5ef Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 24 Mar 2022 18:09:49 -0700 Subject: mm: delete __ClearPageWaiters() The PG_waiters bit is not included in PAGE_FLAGS_CHECK_AT_FREE, and vmscan.c's free_unref_page_list() callers rely on that not to generate bad_page() alerts. So __page_cache_release(), put_pages_list() and release_pages() (and presumably copy-and-pasted free_zone_device_page()) are redundant and misleading to make a special point of clearing it (as the "__" implies, it could only safely be used on the freeing path). Delete __ClearPageWaiters(). Remark on this in one of the "possible" comments in folio_wake_bit(), and delete the superfluous comments. Link: https://lkml.kernel.org/r/3eafa969-5b1a-accf-88fe-318784c791a@google.com Signed-off-by: Hugh Dickins Tested-by: Yu Zhao Reviewed-by: Yang Shi Reviewed-by: David Hildenbrand Cc: Matthew Wilcox Cc: Nicholas Piggin Cc: Yu Zhao Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 88fe1d759cdd..9d8eeaa67d05 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -481,7 +481,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) -PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) +PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) PAGEFLAG(Referenced, referenced, PF_HEAD) TESTCLEARFLAG(Referenced, referenced, PF_HEAD) -- cgit v1.2.3 From 283fd6fe0528ae2fe0222a35ecfdfcbbaeee8159 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 24 Mar 2022 18:09:58 -0700 Subject: mm/migration: add trace events for THP migrations Patch series "mm/migration: Add trace events", v3. This adds trace events for all migration scenarios including base page, THP and HugeTLB. This patch (of 3): This adds two trace events for PMD based THP migration without split. These events closely follow the implementation details like setting and removing of PMD migration entries, which are essential operations for THP migration. This moves CREATE_TRACE_POINTS into generic THP from powerpc for these new trace events to be available on other platforms as well. Link: https://lkml.kernel.org/r/1643368182-9588-1-git-send-email-anshuman.khandual@arm.com Link: https://lkml.kernel.org/r/1643368182-9588-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Cc: Steven Rostedt Cc: Ingo Molnar Cc: Zi Yan Cc: Naoya Horiguchi Cc: John Hubbard Cc: Matthew Wilcox Cc: Michael Ellerman Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/thp.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/trace/events/thp.h b/include/trace/events/thp.h index ca3f2767828a..202b3e3e67ff 100644 --- a/include/trace/events/thp.h +++ b/include/trace/events/thp.h @@ -48,6 +48,33 @@ TRACE_EVENT(hugepage_update, TP_printk("hugepage update at addr 0x%lx and pte = 0x%lx clr = 0x%lx, set = 0x%lx", __entry->addr, __entry->pte, __entry->clr, __entry->set) ); +DECLARE_EVENT_CLASS(migration_pmd, + + TP_PROTO(unsigned long addr, unsigned long pmd), + + TP_ARGS(addr, pmd), + + TP_STRUCT__entry( + __field(unsigned long, addr) + __field(unsigned long, pmd) + ), + + TP_fast_assign( + __entry->addr = addr; + __entry->pmd = pmd; + ), + TP_printk("addr=%lx, pmd=%lx", __entry->addr, __entry->pmd) +); + +DEFINE_EVENT(migration_pmd, set_migration_pmd, + TP_PROTO(unsigned long addr, unsigned long pmd), + TP_ARGS(addr, pmd) +); + +DEFINE_EVENT(migration_pmd, remove_migration_pmd, + TP_PROTO(unsigned long addr, unsigned long pmd), + TP_ARGS(addr, pmd) +); #endif /* _TRACE_THP_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 4cc79b3303f224a920f3aff21f3d231749d73384 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 24 Mar 2022 18:10:01 -0700 Subject: mm/migration: add trace events for base page and HugeTLB migrations This adds two trace events for base page and HugeTLB page migrations. These events, closely follow the implementation details like setting and removing of PTE migration entries, which are essential operations for migration. The new CREATE_TRACE_POINTS in covers both and based trace events. Hence drop redundant CREATE_TRACE_POINTS from other places which could have otherwise conflicted during build. Link: https://lkml.kernel.org/r/1643368182-9588-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reported-by: kernel test robot Cc: Steven Rostedt Cc: Ingo Molnar Cc: Zi Yan Cc: Naoya Horiguchi Cc: John Hubbard Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/migrate.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index 779f3fad9ecd..061b5128f335 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h @@ -105,6 +105,37 @@ TRACE_EVENT(mm_migrate_pages_start, __print_symbolic(__entry->reason, MIGRATE_REASON)) ); +DECLARE_EVENT_CLASS(migration_pte, + + TP_PROTO(unsigned long addr, unsigned long pte, int order), + + TP_ARGS(addr, pte, order), + + TP_STRUCT__entry( + __field(unsigned long, addr) + __field(unsigned long, pte) + __field(int, order) + ), + + TP_fast_assign( + __entry->addr = addr; + __entry->pte = pte; + __entry->order = order; + ), + + TP_printk("addr=%lx, pte=%lx order=%d", __entry->addr, __entry->pte, __entry->order) +); + +DEFINE_EVENT(migration_pte, set_migration_pte, + TP_PROTO(unsigned long addr, unsigned long pte, int order), + TP_ARGS(addr, pte, order) +); + +DEFINE_EVENT(migration_pte, remove_migration_pte, + TP_PROTO(unsigned long addr, unsigned long pte, int order), + TP_ARGS(addr, pte, order) +); + #endif /* _TRACE_MIGRATE_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 7c13c163e036c646b77753deacfe2f5478b654bc Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:10:10 -0700 Subject: kasan, page_alloc: merge kasan_free_pages into free_pages_prepare Currently, the code responsible for initializing and poisoning memory in free_pages_prepare() is scattered across two locations: kasan_free_pages() for HW_TAGS KASAN and free_pages_prepare() itself. This is confusing. This and a few following patches combine the code from these two locations. Along the way, these patches also simplify the performed checks to make them easier to follow. Replaces the only caller of kasan_free_pages() with its implementation. As kasan_has_integrated_init() is only true when CONFIG_KASAN_HW_TAGS is enabled, moving the code does no functional changes. This patch is not useful by itself but makes the simplifications in the following patches easier to follow. Link: https://lkml.kernel.org/r/303498d15840bb71905852955c6e2390ecc87139.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Acked-by: Marco Elver Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b6a93261c92a..dd2161af84e9 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -85,7 +85,6 @@ static inline void kasan_disable_current(void) {} #ifdef CONFIG_KASAN_HW_TAGS void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags); -void kasan_free_pages(struct page *page, unsigned int order); #else /* CONFIG_KASAN_HW_TAGS */ @@ -96,13 +95,6 @@ static __always_inline void kasan_alloc_pages(struct page *page, BUILD_BUG(); } -static __always_inline void kasan_free_pages(struct page *page, - unsigned int order) -{ - /* Only available for integrated init. */ - BUILD_BUG(); -} - #endif /* CONFIG_KASAN_HW_TAGS */ static inline bool kasan_has_integrated_init(void) -- cgit v1.2.3 From c82ce3195fd17e57a370e4dc8f36c195b8807b95 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:10:22 -0700 Subject: mm: clarify __GFP_ZEROTAGS comment __GFP_ZEROTAGS is intended as an optimization: if memory is zeroed during allocation, it's possible to set memory tags at the same time with little performance impact. Clarify this intention of __GFP_ZEROTAGS in the comment. Link: https://lkml.kernel.org/r/cdffde013973c5634a447513e10ec0d21e8eee29.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 20f6fbe12993..72dc4ca75cf3 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -232,8 +232,10 @@ struct vm_area_struct; * * %__GFP_ZERO returns a zeroed page on success. * - * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if - * __GFP_ZERO is set. + * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself + * is being zeroed (either via __GFP_ZERO or via init_on_alloc). This flag is + * intended for optimization: setting memory tags at the same time as zeroing + * memory has minimal additional performace impact. * * %__GFP_SKIP_KASAN_POISON returns a page which does not need to be poisoned * on deallocation. Typically used for userspace pages. Currently only has an -- cgit v1.2.3 From b42090ae6f3aa07b0a39403545d688489548a6a8 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:10:31 -0700 Subject: kasan, page_alloc: merge kasan_alloc_pages into post_alloc_hook Currently, the code responsible for initializing and poisoning memory in post_alloc_hook() is scattered across two locations: kasan_alloc_pages() hook for HW_TAGS KASAN and post_alloc_hook() itself. This is confusing. This and a few following patches combine the code from these two locations. Along the way, these patches do a step-by-step restructure the many performed checks to make them easier to follow. Replace the only caller of kasan_alloc_pages() with its implementation. As kasan_has_integrated_init() is only true when CONFIG_KASAN_HW_TAGS is enabled, moving the code does no functional changes. Also move init and init_tags variables definitions out of kasan_has_integrated_init() clause in post_alloc_hook(), as they have the same values regardless of what the if condition evaluates to. This patch is not useful by itself but makes the simplifications in the following patches easier to follow. Link: https://lkml.kernel.org/r/5ac7e0b30f5cbb177ec363ddd7878a3141289592.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index dd2161af84e9..4ed94616c8f0 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -84,17 +84,8 @@ static inline void kasan_disable_current(void) {} #ifdef CONFIG_KASAN_HW_TAGS -void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags); - #else /* CONFIG_KASAN_HW_TAGS */ -static __always_inline void kasan_alloc_pages(struct page *page, - unsigned int order, gfp_t flags) -{ - /* Only available for integrated init. */ - BUILD_BUG(); -} - #endif /* CONFIG_KASAN_HW_TAGS */ static inline bool kasan_has_integrated_init(void) -- cgit v1.2.3 From 63840de296472f3914bb933b11ba2b764590755e Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:10:52 -0700 Subject: kasan, x86, arm64, s390: rename functions for modules shadow Rename kasan_free_shadow to kasan_free_module_shadow and kasan_module_alloc to kasan_alloc_module_shadow. These functions are used to allocate/free shadow memory for kernel modules when KASAN_VMALLOC is not enabled. The new names better reflect their purpose. Also reword the comment next to their declaration to improve clarity. Link: https://lkml.kernel.org/r/36db32bde765d5d0b856f77d2d806e838513fe84.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Catalin Marinas Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 4ed94616c8f0..b450c7653137 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -433,17 +433,17 @@ static inline void kasan_populate_early_vm_area_shadow(void *start, !defined(CONFIG_KASAN_VMALLOC) /* - * These functions provide a special case to support backing module - * allocations with real shadow memory. With KASAN vmalloc, the special - * case is unnecessary, as the work is handled in the generic case. + * These functions allocate and free shadow memory for kernel modules. + * They are only required when KASAN_VMALLOC is not supported, as otherwise + * shadow memory is allocated by the generic vmalloc handlers. */ -int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask); -void kasan_free_shadow(const struct vm_struct *vm); +int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask); +void kasan_free_module_shadow(const struct vm_struct *vm); #else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; } -static inline void kasan_free_shadow(const struct vm_struct *vm) {} +static inline int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask) { return 0; } +static inline void kasan_free_module_shadow(const struct vm_struct *vm) {} #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -- cgit v1.2.3 From 0b7ccc70ee1d5b499d1626c9d28f729507b1c036 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:10:55 -0700 Subject: kasan, vmalloc: drop outdated VM_KASAN comment The comment about VM_KASAN in include/linux/vmalloc.c is outdated. VM_KASAN is currently only used to mark vm_areas allocated for kernel modules when CONFIG_KASAN_VMALLOC is disabled. Drop the comment. Link: https://lkml.kernel.org/r/780395afea83a147b3b5acc36cf2e38f7f8479f9.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Acked-by: Marco Elver Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 5a0c3b556848..2ca95c7db463 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -35,17 +35,6 @@ struct notifier_block; /* in notifier.h */ #define VM_DEFER_KMEMLEAK 0 #endif -/* - * VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC. - * - * If IS_ENABLED(CONFIG_KASAN_VMALLOC), VM_KASAN is set on a vm_struct after - * shadow memory has been mapped. It's used to handle allocation errors so that - * we don't try to poison shadow on free if it was never allocated. - * - * Otherwise, VM_KASAN is set for kasan_module_alloc() allocations and used to - * determine which allocations need the module shadow freed. - */ - /* bits [20..32] reserved for arch specific ioremap internals */ /* -- cgit v1.2.3 From 5bd9bae22a455321327982d0b595a7e76d425fd0 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:10:58 -0700 Subject: kasan: reorder vmalloc hooks Group functions that [de]populate shadow memory for vmalloc. Group functions that [un]poison memory for vmalloc. This patch does no functional changes but prepares KASAN code for adding vmalloc support to HW_TAGS KASAN. Link: https://lkml.kernel.org/r/aeef49eb249c206c4c9acce2437728068da74c28.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Acked-by: Marco Elver Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b450c7653137..5f9ed1372eef 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -397,34 +397,32 @@ static inline void kasan_init_hw_tags(void) { } #ifdef CONFIG_KASAN_VMALLOC +void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); int kasan_populate_vmalloc(unsigned long addr, unsigned long size); -void kasan_poison_vmalloc(const void *start, unsigned long size); -void kasan_unpoison_vmalloc(const void *start, unsigned long size); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); -void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); +void kasan_unpoison_vmalloc(const void *start, unsigned long size); +void kasan_poison_vmalloc(const void *start, unsigned long size); #else /* CONFIG_KASAN_VMALLOC */ +static inline void kasan_populate_early_vm_area_shadow(void *start, + unsigned long size) { } static inline int kasan_populate_vmalloc(unsigned long start, unsigned long size) { return 0; } - -static inline void kasan_poison_vmalloc(const void *start, unsigned long size) -{ } -static inline void kasan_unpoison_vmalloc(const void *start, unsigned long size) -{ } static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, - unsigned long free_region_end) {} + unsigned long free_region_end) { } -static inline void kasan_populate_early_vm_area_shadow(void *start, - unsigned long size) +static inline void kasan_unpoison_vmalloc(const void *start, unsigned long size) +{ } +static inline void kasan_poison_vmalloc(const void *start, unsigned long size) { } #endif /* CONFIG_KASAN_VMALLOC */ -- cgit v1.2.3 From 579fb0ac085be2015529a5f7bd1061899a6374de Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:01 -0700 Subject: kasan: add wrappers for vmalloc hooks Add wrappers around functions that [un]poison memory for vmalloc allocations. These functions will be used by HW_TAGS KASAN and therefore need to be disabled when kasan=off command line argument is provided. This patch does no functional changes for software KASAN modes. Link: https://lkml.kernel.org/r/3b8728eac438c55389fb0f9a8a2145d71dd77487.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Acked-by: Marco Elver Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5f9ed1372eef..a6d6fdefb278 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -403,8 +403,21 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); -void kasan_unpoison_vmalloc(const void *start, unsigned long size); -void kasan_poison_vmalloc(const void *start, unsigned long size); +void __kasan_unpoison_vmalloc(const void *start, unsigned long size); +static __always_inline void kasan_unpoison_vmalloc(const void *start, + unsigned long size) +{ + if (kasan_enabled()) + __kasan_unpoison_vmalloc(start, size); +} + +void __kasan_poison_vmalloc(const void *start, unsigned long size); +static __always_inline void kasan_poison_vmalloc(const void *start, + unsigned long size) +{ + if (kasan_enabled()) + __kasan_poison_vmalloc(start, size); +} #else /* CONFIG_KASAN_VMALLOC */ -- cgit v1.2.3 From 1d96320f8d5320423737da61e6e6937f6b475b5c Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:13 -0700 Subject: kasan, vmalloc: add vmalloc tagging for SW_TAGS Add vmalloc tagging support to SW_TAGS KASAN. - __kasan_unpoison_vmalloc() now assigns a random pointer tag, poisons the virtual mapping accordingly, and embeds the tag into the returned pointer. - __get_vm_area_node() (used by vmalloc() and vmap()) and pcpu_get_vm_areas() save the tagged pointer into vm_struct->addr (note: not into vmap_area->addr). This requires putting kasan_unpoison_vmalloc() after setup_vmalloc_vm[_locked](); otherwise the latter will overwrite the tagged pointer. The tagged pointer then is naturally propagateed to vmalloc() and vmap(). - vm_map_ram() returns the tagged pointer directly. As a result of this change, vm_struct->addr is now tagged. Enabling KASAN_VMALLOC with SW_TAGS is not yet allowed. Link: https://lkml.kernel.org/r/4a78f3c064ce905e9070c29733aca1dd254a74f1.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index a6d6fdefb278..d3efb6cbd0f5 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -403,12 +403,13 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); -void __kasan_unpoison_vmalloc(const void *start, unsigned long size); -static __always_inline void kasan_unpoison_vmalloc(const void *start, - unsigned long size) +void *__kasan_unpoison_vmalloc(const void *start, unsigned long size); +static __always_inline void *kasan_unpoison_vmalloc(const void *start, + unsigned long size) { if (kasan_enabled()) - __kasan_unpoison_vmalloc(start, size); + return __kasan_unpoison_vmalloc(start, size); + return (void *)start; } void __kasan_poison_vmalloc(const void *start, unsigned long size); @@ -433,8 +434,11 @@ static inline void kasan_release_vmalloc(unsigned long start, unsigned long free_region_start, unsigned long free_region_end) { } -static inline void kasan_unpoison_vmalloc(const void *start, unsigned long size) -{ } +static inline void *kasan_unpoison_vmalloc(const void *start, + unsigned long size) +{ + return (void *)start; +} static inline void kasan_poison_vmalloc(const void *start, unsigned long size) { } -- cgit v1.2.3 From 01d92c7f358ce892279ca830cf6ccf2862a17d1c Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:16 -0700 Subject: kasan, vmalloc, arm64: mark vmalloc mappings as pgprot_tagged HW_TAGS KASAN relies on ARM Memory Tagging Extension (MTE). With MTE, a memory region must be mapped as MT_NORMAL_TAGGED to allow setting memory tags via MTE-specific instructions. Add proper protection bits to vmalloc() allocations. These allocations are always backed by page_alloc pages, so the tags will actually be getting set on the corresponding physical memory. Link: https://lkml.kernel.org/r/983fc33542db2f6b1e77b34ca23448d4640bbb9e.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 2ca95c7db463..3b1df7da402d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -115,6 +115,13 @@ static inline int arch_vmap_pte_supported_shift(unsigned long size) } #endif +#ifndef arch_vmap_pgprot_tagged +static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) +{ + return prot; +} +#endif + /* * Highlevel APIs for driver use */ -- cgit v1.2.3 From f49d9c5bb15cc5c470097078ec1f26ff605ae1a2 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:23 -0700 Subject: kasan, mm: only define ___GFP_SKIP_KASAN_POISON with HW_TAGS Only define the ___GFP_SKIP_KASAN_POISON flag when CONFIG_KASAN_HW_TAGS is enabled. This patch it not useful by itself, but it prepares the code for additions of new KASAN-specific GFP patches. Link: https://lkml.kernel.org/r/44e5738a584c11801b2b8f1231898918efc8634a.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 8 +++++++- include/trace/events/mmflags.h | 12 +++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 72dc4ca75cf3..75abf8d5bc9b 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -54,7 +54,11 @@ struct vm_area_struct; #define ___GFP_THISNODE 0x200000u #define ___GFP_ACCOUNT 0x400000u #define ___GFP_ZEROTAGS 0x800000u +#ifdef CONFIG_KASAN_HW_TAGS #define ___GFP_SKIP_KASAN_POISON 0x1000000u +#else +#define ___GFP_SKIP_KASAN_POISON 0 +#endif #ifdef CONFIG_LOCKDEP #define ___GFP_NOLOCKDEP 0x2000000u #else @@ -251,7 +255,9 @@ struct vm_area_struct; #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_SHIFT (24 + \ + IS_ENABLED(CONFIG_KASAN_HW_TAGS) + \ + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /** diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 116ed4d5d0f8..cb4520374e2c 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -49,12 +49,18 @@ {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\ - {(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"}, \ - {(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"}\ + {(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"} \ + +#ifdef CONFIG_KASAN_HW_TAGS +#define __def_gfpflag_names_kasan \ + , {(unsigned long)__GFP_SKIP_KASAN_POISON, "__GFP_SKIP_KASAN_POISON"} +#else +#define __def_gfpflag_names_kasan +#endif #define show_gfp_flags(flags) \ (flags) ? __print_flags(flags, "|", \ - __def_gfpflag_names \ + __def_gfpflag_names __def_gfpflag_names_kasan \ ) : "none" #ifdef CONFIG_MMU -- cgit v1.2.3 From 53ae233c30a623ff44ff2f83854e92530c5d9fc2 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:26 -0700 Subject: kasan, page_alloc: allow skipping unpoisoning for HW_TAGS Add a new GFP flag __GFP_SKIP_KASAN_UNPOISON that allows skipping KASAN poisoning for page_alloc allocations. The flag is only effective with HW_TAGS KASAN. This flag will be used by vmalloc code for page_alloc allocations backing vmalloc() mappings in a following patch. The reason to skip KASAN poisoning for these pages in page_alloc is because vmalloc code will be poisoning them instead. Also reword the comment for __GFP_SKIP_KASAN_POISON. Link: https://lkml.kernel.org/r/35c97d77a704f6ff971dd3bfe4be95855744108e.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 21 +++++++++++++-------- include/trace/events/mmflags.h | 5 +++-- 2 files changed, 16 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 75abf8d5bc9b..688a39fde43e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -55,12 +55,14 @@ struct vm_area_struct; #define ___GFP_ACCOUNT 0x400000u #define ___GFP_ZEROTAGS 0x800000u #ifdef CONFIG_KASAN_HW_TAGS -#define ___GFP_SKIP_KASAN_POISON 0x1000000u +#define ___GFP_SKIP_KASAN_UNPOISON 0x1000000u +#define ___GFP_SKIP_KASAN_POISON 0x2000000u #else +#define ___GFP_SKIP_KASAN_UNPOISON 0 #define ___GFP_SKIP_KASAN_POISON 0 #endif #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x2000000u +#define ___GFP_NOLOCKDEP 0x4000000u #else #define ___GFP_NOLOCKDEP 0 #endif @@ -241,22 +243,25 @@ struct vm_area_struct; * intended for optimization: setting memory tags at the same time as zeroing * memory has minimal additional performace impact. * - * %__GFP_SKIP_KASAN_POISON returns a page which does not need to be poisoned - * on deallocation. Typically used for userspace pages. Currently only has an - * effect in HW tags mode. + * %__GFP_SKIP_KASAN_UNPOISON makes KASAN skip unpoisoning on page allocation. + * Only effective in HW_TAGS mode. + * + * %__GFP_SKIP_KASAN_POISON makes KASAN skip poisoning on page deallocation. + * Typically, used for userspace pages. Only effective in HW_TAGS mode. */ #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) #define __GFP_COMP ((__force gfp_t)___GFP_COMP) #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) #define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) -#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) +#define __GFP_SKIP_KASAN_UNPOISON ((__force gfp_t)___GFP_SKIP_KASAN_UNPOISON) +#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) /* Disable lockdep for GFP context tracking */ #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (24 + \ - IS_ENABLED(CONFIG_KASAN_HW_TAGS) + \ +#define __GFP_BITS_SHIFT (24 + \ + 2 * IS_ENABLED(CONFIG_KASAN_HW_TAGS) + \ IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index cb4520374e2c..134c45e62d91 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -52,8 +52,9 @@ {(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"} \ #ifdef CONFIG_KASAN_HW_TAGS -#define __def_gfpflag_names_kasan \ - , {(unsigned long)__GFP_SKIP_KASAN_POISON, "__GFP_SKIP_KASAN_POISON"} +#define __def_gfpflag_names_kasan , \ + {(unsigned long)__GFP_SKIP_KASAN_POISON, "__GFP_SKIP_KASAN_POISON"}, \ + {(unsigned long)__GFP_SKIP_KASAN_UNPOISON, "__GFP_SKIP_KASAN_UNPOISON"} #else #define __def_gfpflag_names_kasan #endif -- cgit v1.2.3 From 9353ffa6e9e90d2b6348209cf2b95a8ffee18711 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:29 -0700 Subject: kasan, page_alloc: allow skipping memory init for HW_TAGS Add a new GFP flag __GFP_SKIP_ZERO that allows to skip memory initialization. The flag is only effective with HW_TAGS KASAN. This flag will be used by vmalloc code for page_alloc allocations backing vmalloc() mappings in a following patch. The reason to skip memory initialization for these pages in page_alloc is because vmalloc code will be initializing them instead. With the current implementation, when __GFP_SKIP_ZERO is provided, __GFP_ZEROTAGS is ignored. This doesn't matter, as these two flags are never provided at the same time. However, if this is changed in the future, this particular implementation detail can be changed as well. Link: https://lkml.kernel.org/r/0d53efeff345de7d708e0baa0d8829167772521e.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 18 +++++++++++------- include/trace/events/mmflags.h | 1 + 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 688a39fde43e..0fa17fb85de5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -55,14 +55,16 @@ struct vm_area_struct; #define ___GFP_ACCOUNT 0x400000u #define ___GFP_ZEROTAGS 0x800000u #ifdef CONFIG_KASAN_HW_TAGS -#define ___GFP_SKIP_KASAN_UNPOISON 0x1000000u -#define ___GFP_SKIP_KASAN_POISON 0x2000000u +#define ___GFP_SKIP_ZERO 0x1000000u +#define ___GFP_SKIP_KASAN_UNPOISON 0x2000000u +#define ___GFP_SKIP_KASAN_POISON 0x4000000u #else +#define ___GFP_SKIP_ZERO 0 #define ___GFP_SKIP_KASAN_UNPOISON 0 #define ___GFP_SKIP_KASAN_POISON 0 #endif #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x4000000u +#define ___GFP_NOLOCKDEP 0x8000000u #else #define ___GFP_NOLOCKDEP 0 #endif @@ -239,9 +241,10 @@ struct vm_area_struct; * %__GFP_ZERO returns a zeroed page on success. * * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself - * is being zeroed (either via __GFP_ZERO or via init_on_alloc). This flag is - * intended for optimization: setting memory tags at the same time as zeroing - * memory has minimal additional performace impact. + * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that + * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting + * memory tags at the same time as zeroing memory has minimal additional + * performace impact. * * %__GFP_SKIP_KASAN_UNPOISON makes KASAN skip unpoisoning on page allocation. * Only effective in HW_TAGS mode. @@ -253,6 +256,7 @@ struct vm_area_struct; #define __GFP_COMP ((__force gfp_t)___GFP_COMP) #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) #define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) +#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO) #define __GFP_SKIP_KASAN_UNPOISON ((__force gfp_t)___GFP_SKIP_KASAN_UNPOISON) #define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) @@ -261,7 +265,7 @@ struct vm_area_struct; /* Room for N __GFP_FOO bits */ #define __GFP_BITS_SHIFT (24 + \ - 2 * IS_ENABLED(CONFIG_KASAN_HW_TAGS) + \ + 3 * IS_ENABLED(CONFIG_KASAN_HW_TAGS) + \ IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 134c45e62d91..6532119a6bf1 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -53,6 +53,7 @@ #ifdef CONFIG_KASAN_HW_TAGS #define __def_gfpflag_names_kasan , \ + {(unsigned long)__GFP_SKIP_ZERO, "__GFP_SKIP_ZERO"}, \ {(unsigned long)__GFP_SKIP_KASAN_POISON, "__GFP_SKIP_KASAN_POISON"}, \ {(unsigned long)__GFP_SKIP_KASAN_UNPOISON, "__GFP_SKIP_KASAN_UNPOISON"} #else -- cgit v1.2.3 From 23689e91fb22c15b84ac6c22ad9942039792f3af Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:32 -0700 Subject: kasan, vmalloc: add vmalloc tagging for HW_TAGS Add vmalloc tagging support to HW_TAGS KASAN. The key difference between HW_TAGS and the other two KASAN modes when it comes to vmalloc: HW_TAGS KASAN can only assign tags to physical memory. The other two modes have shadow memory covering every mapped virtual memory region. Make __kasan_unpoison_vmalloc() for HW_TAGS KASAN: - Skip non-VM_ALLOC mappings as HW_TAGS KASAN can only tag a single mapping of normal physical memory; see the comment in the function. - Generate a random tag, tag the returned pointer and the allocation, and initialize the allocation at the same time. - Propagate the tag into the page stucts to allow accesses through page_address(vmalloc_to_page()). The rest of vmalloc-related KASAN hooks are not needed: - The shadow-related ones are fully skipped. - __kasan_poison_vmalloc() is kept as a no-op with a comment. Poisoning and zeroing of physical pages that are backing vmalloc() allocations are skipped via __GFP_SKIP_KASAN_UNPOISON and __GFP_SKIP_ZERO: __kasan_unpoison_vmalloc() does that instead. Enabling CONFIG_KASAN_VMALLOC with HW_TAGS is not yet allowed. Link: https://lkml.kernel.org/r/d19b2e9e59a9abc59d05b72dea8429dcaea739c6.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d3efb6cbd0f5..65fc5285bb59 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -26,6 +26,12 @@ struct kunit_kasan_expectation { #endif +typedef unsigned int __bitwise kasan_vmalloc_flags_t; + +#define KASAN_VMALLOC_NONE 0x00u +#define KASAN_VMALLOC_INIT 0x01u +#define KASAN_VMALLOC_VM_ALLOC 0x02u + #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #include @@ -397,18 +403,39 @@ static inline void kasan_init_hw_tags(void) { } #ifdef CONFIG_KASAN_VMALLOC +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) + void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); int kasan_populate_vmalloc(unsigned long addr, unsigned long size); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); -void *__kasan_unpoison_vmalloc(const void *start, unsigned long size); +#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +static inline void kasan_populate_early_vm_area_shadow(void *start, + unsigned long size) +{ } +static inline int kasan_populate_vmalloc(unsigned long start, + unsigned long size) +{ + return 0; +} +static inline void kasan_release_vmalloc(unsigned long start, + unsigned long end, + unsigned long free_region_start, + unsigned long free_region_end) { } + +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, + kasan_vmalloc_flags_t flags); static __always_inline void *kasan_unpoison_vmalloc(const void *start, - unsigned long size) + unsigned long size, + kasan_vmalloc_flags_t flags) { if (kasan_enabled()) - return __kasan_unpoison_vmalloc(start, size); + return __kasan_unpoison_vmalloc(start, size, flags); return (void *)start; } @@ -435,7 +462,8 @@ static inline void kasan_release_vmalloc(unsigned long start, unsigned long free_region_end) { } static inline void *kasan_unpoison_vmalloc(const void *start, - unsigned long size) + unsigned long size, + kasan_vmalloc_flags_t flags) { return (void *)start; } -- cgit v1.2.3 From f6e39794f4b6da7ca9b77f2f9ad11fd6f0ac83e5 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:35 -0700 Subject: kasan, vmalloc: only tag normal vmalloc allocations The kernel can use to allocate executable memory. The only supported way to do that is via __vmalloc_node_range() with the executable bit set in the prot argument. (vmap() resets the bit via pgprot_nx()). Once tag-based KASAN modes start tagging vmalloc allocations, executing code from such allocations will lead to the PC register getting a tag, which is not tolerated by the kernel. Only tag the allocations for normal kernel pages. [andreyknvl@google.com: pass KASAN_VMALLOC_PROT_NORMAL to kasan_unpoison_vmalloc()] Link: https://lkml.kernel.org/r/9230ca3d3e40ffca041c133a524191fd71969a8d.1646233925.git.andreyknvl@google.com [andreyknvl@google.com: support tagged vmalloc mappings] Link: https://lkml.kernel.org/r/2f6605e3a358cf64d73a05710cb3da356886ad29.1646233925.git.andreyknvl@google.com [andreyknvl@google.com: don't unintentionally disabled poisoning] Link: https://lkml.kernel.org/r/de4587d6a719232e83c760113e46ed2d4d8da61e.1646757322.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/fbfd9939a4dc375923c9a5c6b9e7ab05c26b8c6b.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 65fc5285bb59..903b9453931d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -28,9 +28,10 @@ struct kunit_kasan_expectation { typedef unsigned int __bitwise kasan_vmalloc_flags_t; -#define KASAN_VMALLOC_NONE 0x00u -#define KASAN_VMALLOC_INIT 0x01u -#define KASAN_VMALLOC_VM_ALLOC 0x02u +#define KASAN_VMALLOC_NONE 0x00u +#define KASAN_VMALLOC_INIT 0x01u +#define KASAN_VMALLOC_VM_ALLOC 0x02u +#define KASAN_VMALLOC_PROT_NORMAL 0x04u #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) -- cgit v1.2.3 From ed6d74446cbfb88c747d4b32477a9d46132694db Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:12:02 -0700 Subject: kasan: test: support async (again) and asymm modes for HW_TAGS Async mode support has already been implemented in commit e80a76aa1a91 ("kasan, arm64: tests supports for HW_TAGS async mode") but then got accidentally broken in commit 99734b535d9b ("kasan: detect false-positives in tests"). Restore the changes removed by the latter patch and adapt them for asymm mode: add a sync_fault flag to kunit_kasan_expectation that only get set if the MTE fault was synchronous, and reenable MTE on such faults in tests. Also rename kunit_kasan_expectation to kunit_kasan_status and move its definition to mm/kasan/kasan.h from include/linux/kasan.h, as this structure is only internally used by KASAN. Also put the structure definition under IS_ENABLED(CONFIG_KUNIT). Link: https://lkml.kernel.org/r/133970562ccacc93ba19d754012c562351d4a8c8.1645033139.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Vincenzo Frascino Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 903b9453931d..fe36215807f7 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -19,11 +19,6 @@ struct task_struct; #include #include -/* kasan_data struct is used in KUnit tests for KASAN expected failures */ -struct kunit_kasan_expectation { - bool report_found; -}; - #endif typedef unsigned int __bitwise kasan_vmalloc_flags_t; -- cgit v1.2.3 From 80207910cd71b4e0e87140d165d82b5d3ff69e53 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:13:12 -0700 Subject: kasan: move and hide kasan_save_enable/restore_multi_shot - Move kasan_save_enable/restore_multi_shot() declarations to mm/kasan/kasan.h, as there is no need for them to be visible outside of KASAN implementation. - Only define and export these functions when KASAN tests are enabled. - Move their definitions closer to other test-related code in report.c. Link: https://lkml.kernel.org/r/6ba637333b78447f027d775f2d55ab1a40f63c99.1646237226.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index fe36215807f7..ceebcb9de7bf 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -267,10 +267,6 @@ static __always_inline bool kasan_check_byte(const void *addr) return true; } - -bool kasan_save_enable_multi_shot(void); -void kasan_restore_multi_shot(bool enabled); - #else /* CONFIG_KASAN */ static inline slab_flags_t kasan_never_merge(void) -- cgit v1.2.3 From 562beb7235abfebdd8366e0664a5c3d1e597b990 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 24 Mar 2022 18:13:27 -0700 Subject: mm/huge_memory: make is_transparent_hugepage() static It's only used inside the huge_memory.c now. Don't export it and make it static. We can thus reduce the size of huge_memory.o a bit. Without this patch: text data bss dec hex filename 32319 2965 4 35288 89d8 mm/huge_memory.o With this patch: text data bss dec hex filename 32042 2957 4 35003 88bb mm/huge_memory.o Link: https://lkml.kernel.org/r/20220302082145.12028-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reviewed-by: Muchun Song Reviewed-by: Yang Shi Cc: Matthew Wilcox Cc: William Kucharski Cc: Hugh Dickins Cc: Peter Xu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 0734aff8fa19..2999190adc22 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -183,7 +183,6 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, void prep_transhuge_page(struct page *page); void free_transhuge_page(struct page *page); -bool is_transparent_hugepage(struct page *page); bool can_split_folio(struct folio *folio, int *pextra_pins); int split_huge_page_to_list(struct page *page, struct list_head *list); @@ -341,11 +340,6 @@ static inline bool transhuge_vma_enabled(struct vm_area_struct *vma, static inline void prep_transhuge_page(struct page *page) {} -static inline bool is_transparent_hugepage(struct page *page) -{ - return false; -} - #define transparent_hugepage_flags 0UL #define thp_get_unmapped_area NULL -- cgit v1.2.3 From 363106c4cefe7d08ca67cc77f3d38d4213190f31 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 24 Mar 2022 18:13:46 -0700 Subject: mm/khugepaged: remove reuse_swap_page() usage reuse_swap_page() currently indicates if we can write to an anon page without COW. A COW is required if the page is shared by multiple processes (either already mapped or via swap entries) or if there is concurrent writeback that cannot tolerate concurrent page modifications. However, in the context of khugepaged we're not actually going to write to a read-only mapped page, we'll copy the page content to our newly allocated THP and map that THP writable. All we have to make sure is that the read-only mapped page we're about to copy won't get reused by another process sharing the page, otherwise, page content would get modified. But that is already guaranteed via multiple mechanisms (e.g., holding a reference, holding the page lock, removing the rmap after copying the page). The swapcache handling was introduced in commit 10359213d05a ("mm: incorporate read-only pages into transparent huge pages") and it sounds like it merely wanted to mimic what do_swap_page() would do when trying to map a page obtained via the swapcache writable. As that logic is unnecessary, let's just remove it, removing the last user of reuse_swap_page(). Link: https://lkml.kernel.org/r/20220131162940.210846-7-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Yang Shi Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: Christoph Hellwig Cc: David Rientjes Cc: Don Dutile Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Jason Gunthorpe Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Liang Zhang Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nadav Amit Cc: Oleg Nesterov Cc: Peter Xu Cc: Rik van Riel Cc: Roman Gushchin Cc: Shakeel Butt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/huge_memory.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 4fdb14a81108..d651f3437367 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -29,7 +29,6 @@ EM( SCAN_VMA_NULL, "vma_null") \ EM( SCAN_VMA_CHECK, "vma_check_failed") \ EM( SCAN_ADDRESS_RANGE, "not_suitable_address_range") \ - EM( SCAN_SWAP_CACHE_PAGE, "page_swap_cache") \ EM( SCAN_DEL_PAGE_LRU, "could_not_delete_page_from_lru")\ EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \ EM( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed") \ -- cgit v1.2.3 From 03104c2c5db8918030788e607e4af980b2f42bb3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 24 Mar 2022 18:13:50 -0700 Subject: mm/swapfile: remove stale reuse_swap_page() All users are gone, let's remove it. We'll let SWP_STABLE_WRITES stick around for now, as it might come in handy in the near future. Link: https://lkml.kernel.org/r/20220131162940.210846-8-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: Christoph Hellwig Cc: David Rientjes Cc: Don Dutile Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Jason Gunthorpe Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Liang Zhang Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nadav Amit Cc: Oleg Nesterov Cc: Peter Xu Cc: Rik van Riel Cc: Roman Gushchin Cc: Shakeel Butt Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index f37837c614c5..27093b477c5f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -515,7 +515,6 @@ extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); -extern bool reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); @@ -681,9 +680,6 @@ static inline int swp_swapcount(swp_entry_t entry) return 0; } -#define reuse_swap_page(page) \ - (page_trans_huge_mapcount(page) == 1) - static inline int try_to_free_swap(struct page *page) { return 0; -- cgit v1.2.3 From 55c62fa7c53368a9011cd1276c001a1469078c6a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 24 Mar 2022 18:13:53 -0700 Subject: mm/huge_memory: remove stale page_trans_huge_mapcount() All users are gone, let's remove it. Link: https://lkml.kernel.org/r/20220131162940.210846-9-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: Christoph Hellwig Cc: David Rientjes Cc: Don Dutile Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Jason Gunthorpe Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Liang Zhang Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nadav Amit Cc: Oleg Nesterov Cc: Peter Xu Cc: Rik van Riel Cc: Roman Gushchin Cc: Shakeel Butt Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7a3dd7e617e4..e34edb775334 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -834,16 +834,11 @@ static inline int total_mapcount(struct page *page) return folio_mapcount(page_folio(page)); } -int page_trans_huge_mapcount(struct page *page); #else static inline int total_mapcount(struct page *page) { return page_mapcount(page); } -static inline int page_trans_huge_mapcount(struct page *page) -{ - return page_mapcount(page); -} #endif static inline struct page *virt_to_head_page(const void *x) -- cgit v1.2.3 From 566d3362885aab04d6b0f885f12db3176ca3a032 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 24 Mar 2022 18:13:59 -0700 Subject: mm: warn on deleting redirtied only if accounted filemap_unaccount_folio() has a WARN_ON_ONCE(folio_test_dirty(folio)). It is good to warn of late dirtying on a persistent filesystem, but late dirtying on tmpfs can only lose data which is expected to be thrown away; and it's a pity if that warning comes ONCE on tmpfs, then hides others which really matter. Make it conditional on mapping_cap_writeback(). Cleanup: then folio_account_cleaned() no longer needs to check that for itself, and so no longer needs to know the mapping. Link: https://lkml.kernel.org/r/b5a1106c-7226-a5c6-ad41-ad4832cae1f@google.com Signed-off-by: Hugh Dickins Cc: Matthew Wilcox Cc: Jan Kara Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 58f395f3febe..a8d0b327b066 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1009,8 +1009,7 @@ static inline void __set_page_dirty(struct page *page, { __folio_mark_dirty(page_folio(page), mapping, warn); } -void folio_account_cleaned(struct folio *folio, struct address_space *mapping, - struct bdi_writeback *wb); +void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb); void __folio_cancel_dirty(struct folio *folio); static inline void folio_cancel_dirty(struct folio *folio) { -- cgit v1.2.3 From 9457056ac426e5ed0671356509c8dcce69f8dee0 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 24 Mar 2022 18:14:12 -0700 Subject: mm: madvise: MADV_DONTNEED_LOCKED MADV_DONTNEED historically rejects mlocked ranges, but with MLOCK_ONFAULT and MCL_ONFAULT allowing to mlock without populating, there are valid use cases for depopulating locked ranges as well. Users mlock memory to protect secrets. There are allocators for secure buffers that want in-use memory generally mlocked, but cleared and invalidated memory to give up the physical pages. This could be done with explicit munlock -> mlock calls on free -> alloc of course, but that adds two unnecessary syscalls, heavy mmap_sem write locks, vma splits and re-merges - only to get rid of the backing pages. Users also mlockall(MCL_ONFAULT) to suppress sustained paging, but are okay with on-demand initial population. It seems valid to selectively free some memory during the lifetime of such a process, without having to mess with its overall policy. Why add a separate flag? Isn't this a pretty niche usecase? - MADV_DONTNEED has been bailing on locked vmas forever. It's at least conceivable that someone, somewhere is relying on mlock to protect data from perhaps broader invalidation calls. Changing this behavior now could lead to quiet data corruption. - It also clarifies expectations around MADV_FREE and maybe MADV_REMOVE. It avoids the situation where one quietly behaves different than the others. MADV_FREE_LOCKED can be added later. - The combination of mlock() and madvise() in the first place is probably niche. But where it happens, I'd say that dropping pages from a locked region once they don't contain secrets or won't page anymore is much saner than relying on mlock to protect memory from speculative or errant invalidation calls. It's just that we can't change the default behavior because of the two previous points. Given that, an explicit new flag seems to make the most sense. [hannes@cmpxchg.org: fix mips build] Link: https://lkml.kernel.org/r/20220304171912.305060-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Reviewed-by: Mike Kravetz Reviewed-by: Shakeel Butt Acked-by: Vlastimil Babka Cc: Nadav Amit Cc: David Hildenbrand Cc: Dr. David Alan Gilbert Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/mman-common.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 1567a3294c3d..6c1aa92a92e4 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -75,6 +75,8 @@ #define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ #define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ +#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + /* compatibility flags */ #define MAP_FILE 0 -- cgit v1.2.3 From 421ab1be43bd015ffe744f4ea25df4f19d1ce6fe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 Mar 2022 10:37:31 -0400 Subject: SUNRPC: Do not dereference non-socket transports in sysfs Do not cast the struct xprt to a sock_xprt unless we know it is a UDP or TCP transport. Otherwise the call to lock the mutex will scribble over whatever structure is actually there. This has been seen to cause hard system lockups when the underlying transport was RDMA. Fixes: b49ea673e119 ("SUNRPC: lock against ->sock changing during sysfs read") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 +++ include/linux/sunrpc/xprtsock.h | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 955ea4d7af0b..eef5e87c03b4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -139,6 +139,9 @@ struct rpc_xprt_ops { void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); + int (*get_srcaddr)(struct rpc_xprt *xprt, char *buf, + size_t buflen); + unsigned short (*get_srcport)(struct rpc_xprt *xprt); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); void (*prepare_request)(struct rpc_rqst *req); diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 3eb0079669c5..38284f25eddf 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -10,7 +10,6 @@ int init_socket_xprt(void); void cleanup_socket_xprt(void); -unsigned short get_srcport(struct rpc_xprt *); #define RPC_MIN_RESVPORT (1U) #define RPC_MAX_RESVPORT (65535U) -- cgit v1.2.3 From bddac7c1e02ba47f0570e494c9289acea3062cc1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 26 Mar 2022 10:42:04 -0700 Subject: Revert "swiotlb: rework "fix info leak with DMA_FROM_DEVICE"" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit aa6f8dcbab473f3a3c7454b74caa46d36cdc5d13. It turns out this breaks at least the ath9k wireless driver, and possibly others. What the ath9k driver does on packet receive is to set up the DMA transfer with: int ath_rx_init(..) .. bf->bf_buf_addr = dma_map_single(sc->dev, skb->data, common->rx_bufsize, DMA_FROM_DEVICE); and then the receive logic (through ath_rx_tasklet()) will fetch incoming packets static bool ath_edma_get_buffers(..) .. dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr, common->rx_bufsize, DMA_FROM_DEVICE); ret = ath9k_hw_process_rxdesc_edma(ah, rs, skb->data); if (ret == -EINPROGRESS) { /*let device gain the buffer again*/ dma_sync_single_for_device(sc->dev, bf->bf_buf_addr, common->rx_bufsize, DMA_FROM_DEVICE); return false; } and it's worth noting how that first DMA sync: dma_sync_single_for_cpu(..DMA_FROM_DEVICE); is there to make sure the CPU can read the DMA buffer (possibly by copying it from the bounce buffer area, or by doing some cache flush). The iommu correctly turns that into a "copy from bounce bufer" so that the driver can look at the state of the packets. In the meantime, the device may continue to write to the DMA buffer, but we at least have a snapshot of the state due to that first DMA sync. But that _second_ DMA sync: dma_sync_single_for_device(..DMA_FROM_DEVICE); is telling the DMA mapping that the CPU wasn't interested in the area because the packet wasn't there. In the case of a DMA bounce buffer, that is a no-op. Note how it's not a sync for the CPU (the "for_device()" part), and it's not a sync for data written by the CPU (the "DMA_FROM_DEVICE" part). Or rather, it _should_ be a no-op. That's what commit aa6f8dcbab47 broke: it made the code bounce the buffer unconditionally, and changed the DMA_FROM_DEVICE to just unconditionally and illogically be DMA_TO_DEVICE. [ Side note: purely within the confines of the swiotlb driver it wasn't entirely illogical: The reason it did that odd DMA_FROM_DEVICE -> DMA_TO_DEVICE conversion thing is because inside the swiotlb driver, it uses just a swiotlb_bounce() helper that doesn't care about the whole distinction of who the sync is for - only which direction to bounce. So it took the "sync for device" to mean that the CPU must have been the one writing, and thought it meant DMA_TO_DEVICE. ] Also note how the commentary in that commit was wrong, probably due to that whole confusion, claiming that the commit makes the swiotlb code "bounce unconditionally (that is, also when dir == DMA_TO_DEVICE) in order do avoid synchronising back stale data from the swiotlb buffer" which is nonsensical for two reasons: - that "also when dir == DMA_TO_DEVICE" is nonsensical, as that was exactly when it always did - and should do - the bounce. - since this is a sync for the device (not for the CPU), we're clearly fundamentally not coping back stale data from the bounce buffers at all, because we'd be copying *to* the bounce buffers. So that commit was just very confused. It confused the direction of the synchronization (to the device, not the cpu) with the direction of the DMA (from the device). Reported-and-bisected-by: Oleksandr Natalenko Reported-by: Olha Cherevyk Cc: Halil Pasic Cc: Christoph Hellwig Cc: Kalle Valo Cc: Robin Murphy Cc: Toke Høiland-Jørgensen Cc: Maxime Bizon Cc: Johannes Berg Signed-off-by: Linus Torvalds --- include/linux/dma-mapping.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index dca2b1355bb1..6150d11a607e 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -61,6 +61,14 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 9) +/* + * This is a hint to the DMA-mapping subsystem that the device is expected + * to overwrite the entire mapped size, thus the caller does not require any + * of the previous buffer contents to be preserved. This allows + * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers. + */ +#define DMA_ATTR_OVERWRITE (1UL << 10) + /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can * be given to a device to use as a DMA source or target. It is specific to a -- cgit v1.2.3 From 901c7280ca0d5e2b4a8929fbe0bfb007ac2a6544 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 28 Mar 2022 11:37:05 -0700 Subject: Reinstate some of "swiotlb: rework "fix info leak with DMA_FROM_DEVICE"" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Halil Pasic points out [1] that the full revert of that commit (revert in bddac7c1e02b), and that a partial revert that only reverts the problematic case, but still keeps some of the cleanups is probably better.  And that partial revert [2] had already been verified by Oleksandr Natalenko to also fix the issue, I had just missed that in the long discussion. So let's reinstate the cleanups from commit aa6f8dcbab47 ("swiotlb: rework "fix info leak with DMA_FROM_DEVICE""), and effectively only revert the part that caused problems. Link: https://lore.kernel.org/all/20220328013731.017ae3e3.pasic@linux.ibm.com/ [1] Link: https://lore.kernel.org/all/20220324055732.GB12078@lst.de/ [2] Link: https://lore.kernel.org/all/4386660.LvFx2qVVIh@natalenko.name/ [3] Suggested-by: Halil Pasic Tested-by: Oleksandr Natalenko Cc: Christoph Hellwig" Signed-off-by: Linus Torvalds --- include/linux/dma-mapping.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 6150d11a607e..dca2b1355bb1 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -61,14 +61,6 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 9) -/* - * This is a hint to the DMA-mapping subsystem that the device is expected - * to overwrite the entire mapped size, thus the caller does not require any - * of the previous buffer contents to be preserved. This allows - * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers. - */ -#define DMA_ATTR_OVERWRITE (1UL << 10) - /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can * be given to a device to use as a DMA source or target. It is specific to a -- cgit v1.2.3 From 504c1cabe325df65c18ef38365ddd1a41c6b591b Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 25 Jan 2022 21:22:21 +0800 Subject: mm/balloon_compaction: make balloon page compaction callbacks static Since commit b1123ea6d3b3 ("mm: balloon: use general non-lru movable page feature"), these functions are called via balloon_aops callbacks. They're not called directly outside this file. So make them static and clean up the relevant code. Signed-off-by: Miaohe Lin Link: https://lore.kernel.org/r/20220125132221.2220-1-linmiaohe@huawei.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Muchun Song --- include/linux/balloon_compaction.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include') diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index 338aa27e4773..edb7f6d41faa 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -80,12 +80,6 @@ static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) #ifdef CONFIG_BALLOON_COMPACTION extern const struct address_space_operations balloon_aops; -extern bool balloon_page_isolate(struct page *page, - isolate_mode_t mode); -extern void balloon_page_putback(struct page *page); -extern int balloon_page_migrate(struct address_space *mapping, - struct page *newpage, - struct page *page, enum migrate_mode mode); /* * balloon_page_insert - insert a page into the balloon's page list and make @@ -155,22 +149,6 @@ static inline void balloon_page_delete(struct page *page) list_del(&page->lru); } -static inline bool balloon_page_isolate(struct page *page) -{ - return false; -} - -static inline void balloon_page_putback(struct page *page) -{ - return; -} - -static inline int balloon_page_migrate(struct page *newpage, - struct page *page, enum migrate_mode mode) -{ - return 0; -} - static inline gfp_t balloon_mapping_gfp_mask(void) { return GFP_HIGHUSER; -- cgit v1.2.3 From 90a6951b58e935124eeb7ecd9fbc2426f841ac0c Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Tue, 15 Feb 2022 11:04:29 +0530 Subject: Add definition of VIRTIO_F_IN_ORDER feature bit This patch adds the definition of VIRTIO_F_IN_ORDER feature bit in the relevant header file to make it available in QEMU's linux standard header file virtio_config.h, which is updated using scripts/update-linux-headers.sh Signed-off-by: Gautam Dawar Link: https://lore.kernel.org/r/20220215053430.24650-1-gdawar@xilinx.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/uapi/linux/virtio_config.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index b5eda06f0d57..f0fb0ae021c0 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -82,6 +82,12 @@ /* This feature indicates support for the packed virtqueue layout. */ #define VIRTIO_F_RING_PACKED 34 +/* + * Inorder feature indicates that all buffers are used by the device + * in the same order in which they have been made available. + */ +#define VIRTIO_F_IN_ORDER 35 + /* * This feature indicates that memory accesses by the driver and the * device are ordered in a way described by the platform. -- cgit v1.2.3 From 13d640a3e9a3ac7ec694843d3d3b785e85fb8cb8 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Wed, 2 Mar 2022 11:39:14 +0800 Subject: virtio_crypto: Introduce VIRTIO_CRYPTO_NOSPC Base on the lastest virtio crypto spec, define VIRTIO_CRYPTO_NOSPC. Reviewed-by: Gonglei Signed-off-by: zhenwei pi Link: https://lore.kernel.org/r/20220302033917.1295334-2-pizhenwei@bytedance.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_crypto.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index a03932f10565..1166a49084b0 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -408,6 +408,7 @@ struct virtio_crypto_op_data_req { #define VIRTIO_CRYPTO_BADMSG 2 #define VIRTIO_CRYPTO_NOTSUPP 3 #define VIRTIO_CRYPTO_INVSESS 4 /* Invalid session id */ +#define VIRTIO_CRYPTO_NOSPC 5 /* no free session ID */ /* The accelerator hardware is ready */ #define VIRTIO_CRYPTO_S_HW_READY (1 << 0) -- cgit v1.2.3 From 24e19590628b58578748eeaec8140bf9c9dc00d9 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Wed, 2 Mar 2022 11:39:15 +0800 Subject: virtio-crypto: introduce akcipher service Introduce asymmetric service definition, asymmetric operations and several well known algorithms. Co-developed-by: lei he Signed-off-by: lei he Signed-off-by: zhenwei pi Link: https://lore.kernel.org/r/20220302033917.1295334-3-pizhenwei@bytedance.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Gonglei --- include/uapi/linux/virtio_crypto.h | 81 +++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index 1166a49084b0..71a54a6849ca 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -37,6 +37,7 @@ #define VIRTIO_CRYPTO_SERVICE_HASH 1 #define VIRTIO_CRYPTO_SERVICE_MAC 2 #define VIRTIO_CRYPTO_SERVICE_AEAD 3 +#define VIRTIO_CRYPTO_SERVICE_AKCIPHER 4 #define VIRTIO_CRYPTO_OPCODE(service, op) (((service) << 8) | (op)) @@ -57,6 +58,10 @@ struct virtio_crypto_ctrl_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02) #define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03) +#define VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x04) +#define VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x05) __le32 opcode; __le32 algo; __le32 flag; @@ -180,6 +185,58 @@ struct virtio_crypto_aead_create_session_req { __u8 padding[32]; }; +struct virtio_crypto_rsa_session_para { +#define VIRTIO_CRYPTO_RSA_RAW_PADDING 0 +#define VIRTIO_CRYPTO_RSA_PKCS1_PADDING 1 + __le32 padding_algo; + +#define VIRTIO_CRYPTO_RSA_NO_HASH 0 +#define VIRTIO_CRYPTO_RSA_MD2 1 +#define VIRTIO_CRYPTO_RSA_MD3 2 +#define VIRTIO_CRYPTO_RSA_MD4 3 +#define VIRTIO_CRYPTO_RSA_MD5 4 +#define VIRTIO_CRYPTO_RSA_SHA1 5 +#define VIRTIO_CRYPTO_RSA_SHA256 6 +#define VIRTIO_CRYPTO_RSA_SHA384 7 +#define VIRTIO_CRYPTO_RSA_SHA512 8 +#define VIRTIO_CRYPTO_RSA_SHA224 9 + __le32 hash_algo; +}; + +struct virtio_crypto_ecdsa_session_para { +#define VIRTIO_CRYPTO_CURVE_UNKNOWN 0 +#define VIRTIO_CRYPTO_CURVE_NIST_P192 1 +#define VIRTIO_CRYPTO_CURVE_NIST_P224 2 +#define VIRTIO_CRYPTO_CURVE_NIST_P256 3 +#define VIRTIO_CRYPTO_CURVE_NIST_P384 4 +#define VIRTIO_CRYPTO_CURVE_NIST_P521 5 + __le32 curve_id; + __le32 padding; +}; + +struct virtio_crypto_akcipher_session_para { +#define VIRTIO_CRYPTO_NO_AKCIPHER 0 +#define VIRTIO_CRYPTO_AKCIPHER_RSA 1 +#define VIRTIO_CRYPTO_AKCIPHER_DSA 2 +#define VIRTIO_CRYPTO_AKCIPHER_ECDSA 3 + __le32 algo; + +#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PUBLIC 1 +#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PRIVATE 2 + __le32 keytype; + __le32 keylen; + + union { + struct virtio_crypto_rsa_session_para rsa; + struct virtio_crypto_ecdsa_session_para ecdsa; + } u; +}; + +struct virtio_crypto_akcipher_create_session_req { + struct virtio_crypto_akcipher_session_para para; + __u8 padding[36]; +}; + struct virtio_crypto_alg_chain_session_para { #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER 1 #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH 2 @@ -247,6 +304,8 @@ struct virtio_crypto_op_ctrl_req { mac_create_session; struct virtio_crypto_aead_create_session_req aead_create_session; + struct virtio_crypto_akcipher_create_session_req + akcipher_create_session; struct virtio_crypto_destroy_session_req destroy_session; __u8 padding[56]; @@ -266,6 +325,14 @@ struct virtio_crypto_op_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00) #define VIRTIO_CRYPTO_AEAD_DECRYPT \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01) +#define VIRTIO_CRYPTO_AKCIPHER_ENCRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x00) +#define VIRTIO_CRYPTO_AKCIPHER_DECRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x01) +#define VIRTIO_CRYPTO_AKCIPHER_SIGN \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x02) +#define VIRTIO_CRYPTO_AKCIPHER_VERIFY \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x03) __le32 opcode; /* algo should be service-specific algorithms */ __le32 algo; @@ -390,6 +457,16 @@ struct virtio_crypto_aead_data_req { __u8 padding[32]; }; +struct virtio_crypto_akcipher_para { + __le32 src_data_len; + __le32 dst_data_len; +}; + +struct virtio_crypto_akcipher_data_req { + struct virtio_crypto_akcipher_para para; + __u8 padding[40]; +}; + /* The request of the data virtqueue's packet */ struct virtio_crypto_op_data_req { struct virtio_crypto_op_header header; @@ -399,6 +476,7 @@ struct virtio_crypto_op_data_req { struct virtio_crypto_hash_data_req hash_req; struct virtio_crypto_mac_data_req mac_req; struct virtio_crypto_aead_data_req aead_req; + struct virtio_crypto_akcipher_data_req akcipher_req; __u8 padding[48]; } u; }; @@ -409,6 +487,7 @@ struct virtio_crypto_op_data_req { #define VIRTIO_CRYPTO_NOTSUPP 3 #define VIRTIO_CRYPTO_INVSESS 4 /* Invalid session id */ #define VIRTIO_CRYPTO_NOSPC 5 /* no free session ID */ +#define VIRTIO_CRYPTO_KEY_REJECTED 6 /* Signature verification failed */ /* The accelerator hardware is ready */ #define VIRTIO_CRYPTO_S_HW_READY (1 << 0) @@ -439,7 +518,7 @@ struct virtio_crypto_config { __le32 max_cipher_key_len; /* Maximum length of authenticated key */ __le32 max_auth_key_len; - __le32 reserve; + __le32 akcipher_algo; /* Maximum size of each crypto request's content */ __le64 max_size; }; -- cgit v1.2.3 From a61280ddddaa45f95b60dd54c05f8e0e5b6810b7 Mon Sep 17 00:00:00 2001 From: Longpeng Date: Tue, 15 Mar 2022 11:25:51 +0800 Subject: vdpa: support exposing the config size to userspace - GET_CONFIG_SIZE: return the size of the virtio config space. The size contains the fields which are conditional on feature bits. Acked-by: Jason Wang Signed-off-by: Longpeng Link: https://lore.kernel.org/r/20220315032553.455-2-longpeng2@huawei.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- include/linux/vdpa.h | 3 ++- include/uapi/linux/vhost.h | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 721089bb4c84..a5269191edda 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -207,7 +207,8 @@ struct vdpa_map_file { * @reset: Reset device * @vdev: vdpa device * Returns integer: success (0) or error (< 0) - * @get_config_size: Get the size of the configuration space + * @get_config_size: Get the size of the configuration space includes + * fields that are conditional on feature bits. * @vdev: vdpa device * Returns size_t: configuration size * @get_config: Read from device specific configuration space diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index c998860d7bbc..bc74e95a273a 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -150,4 +150,8 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + #endif -- cgit v1.2.3 From 81d46d693173a5c86a9b0c648eca1817ad5c0ae5 Mon Sep 17 00:00:00 2001 From: Longpeng Date: Tue, 15 Mar 2022 11:25:52 +0800 Subject: vdpa: change the type of nvqs to u32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change vdpa_device.nvqs and vhost_vdpa.nvqs to use u32 Signed-off-by: Longpeng Link: https://lore.kernel.org/r/20220315032553.455-3-longpeng2@huawei.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Longpeng <longpeng2@huawei.com>

Acked-by: Jason Wang <jasowang@redhat.com>
 
Reviewed-by: Stefano Garzarella --- include/linux/vdpa.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index a5269191edda..8943a209202e 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -83,7 +83,7 @@ struct vdpa_device { unsigned int index; bool features_valid; bool use_va; - int nvqs; + u32 nvqs; struct vdpa_mgmt_dev *mdev; }; @@ -338,10 +338,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, dev_struct, member)), name, use_va), \ dev_struct, member) -int vdpa_register_device(struct vdpa_device *vdev, int nvqs); +int vdpa_register_device(struct vdpa_device *vdev, u32 nvqs); void vdpa_unregister_device(struct vdpa_device *vdev); -int _vdpa_register_device(struct vdpa_device *vdev, int nvqs); +int _vdpa_register_device(struct vdpa_device *vdev, u32 nvqs); void _vdpa_unregister_device(struct vdpa_device *vdev); /** -- cgit v1.2.3 From b04d910af330b55e1d5d6eb9ecd53a375a9cf81c Mon Sep 17 00:00:00 2001 From: Longpeng Date: Tue, 15 Mar 2022 11:25:53 +0800 Subject: vdpa: support exposing the count of vqs to userspace - GET_VQS_COUNT: the count of virtqueues that exposed Signed-off-by: Longpeng Link: https://lore.kernel.org/r/20220315032553.455-4-longpeng2@huawei.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Longpeng <longpeng2@huawei.com>
Reviewed-by: Stefano Garzarella --- include/uapi/linux/vhost.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index bc74e95a273a..5d99e7c242a2 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -154,4 +154,7 @@ /* Get the config size */ #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif -- cgit v1.2.3 From f32404ae1bb9a7428a3c77419672a28895d185bf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Mar 2022 22:50:23 +0100 Subject: net: move net_unlink_todo() out of the header There's no reason for this to be in netdevice.h, it's all just used in dev.c. Also make it no longer inline and let the compiler decide to do that by itself. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20220325225023.f49b9056fe1c.I6b901a2df00000837a9bd251a8dd259bd23f5ded@changeid Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cd7a597c55b1..59e27a2b7bf0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4601,16 +4601,6 @@ bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, struct list_head **iter); -#ifdef CONFIG_LOCKDEP -static LIST_HEAD(net_unlink_list); - -static inline void net_unlink_todo(struct net_device *dev) -{ - if (list_empty(&dev->unlink_list)) - list_add_tail(&dev->unlink_list, &net_unlink_list); -} -#endif - /* iterate through upper list, must be called under RCU read lock */ #define netdev_for_each_upper_dev_rcu(dev, updev, iter) \ for (iter = &(dev)->adj_list.upper, \ -- cgit v1.2.3 From 73f9b911faa74ac5107879de05c9489c419f41bb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 26 Mar 2022 11:27:05 +0900 Subject: kprobes: Use rethook for kretprobe if possible Use rethook for kretprobe function return hooking if the arch sets CONFIG_HAVE_RETHOOK=y. In this case, CONFIG_KRETPROBE_ON_RETHOOK is set to 'y' automatically, and the kretprobe internal data fields switches to use rethook. If not, it continues to use kretprobe specific function return hooks. Suggested-by: Peter Zijlstra Signed-off-by: Masami Hiramatsu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/164826162556.2455864.12255833167233452047.stgit@devnote2 --- include/linux/kprobes.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 312ff997c743..157168769fc2 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #ifdef CONFIG_KPROBES @@ -149,13 +150,20 @@ struct kretprobe { int maxactive; int nmissed; size_t data_size; +#ifdef CONFIG_KRETPROBE_ON_RETHOOK + struct rethook *rh; +#else struct freelist_head freelist; struct kretprobe_holder *rph; +#endif }; #define KRETPROBE_MAX_DATA_SIZE 4096 struct kretprobe_instance { +#ifdef CONFIG_KRETPROBE_ON_RETHOOK + struct rethook_node node; +#else union { struct freelist_node freelist; struct rcu_head rcu; @@ -164,6 +172,7 @@ struct kretprobe_instance { struct kretprobe_holder *rph; kprobe_opcode_t *ret_addr; void *fp; +#endif char data[]; }; @@ -186,10 +195,24 @@ extern void kprobe_busy_begin(void); extern void kprobe_busy_end(void); #ifdef CONFIG_KRETPROBES -extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs); +/* Check whether @p is used for implementing a trampoline. */ extern int arch_trampoline_kprobe(struct kprobe *p); +#ifdef CONFIG_KRETPROBE_ON_RETHOOK +static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), + "Kretprobe is accessed from instance under preemptive context"); + + return (struct kretprobe *)READ_ONCE(ri->node.rethook->data); +} +static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri) +{ + return ri->node.ret_addr; +} +#else +extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs); void arch_kretprobe_fixup_return(struct pt_regs *regs, kprobe_opcode_t *correct_ret_addr); @@ -232,6 +255,12 @@ static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance return READ_ONCE(ri->rph->rp); } +static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri) +{ + return (unsigned long)ri->ret_addr; +} +#endif /* CONFIG_KRETPROBE_ON_RETHOOK */ + #else /* !CONFIG_KRETPROBES */ static inline void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) @@ -395,7 +424,11 @@ void unregister_kretprobe(struct kretprobe *rp); int register_kretprobes(struct kretprobe **rps, int num); void unregister_kretprobes(struct kretprobe **rps, int num); +#ifdef CONFIG_KRETPROBE_ON_RETHOOK +#define kprobe_flush_task(tk) do {} while (0) +#else void kprobe_flush_task(struct task_struct *tk); +#endif void kprobe_free_init_mem(void); @@ -509,6 +542,19 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr) #endif /* !CONFIG_OPTPROBES */ #ifdef CONFIG_KRETPROBES +#ifdef CONFIG_KRETPROBE_ON_RETHOOK +static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr) +{ + return is_rethook_trampoline(addr); +} + +static nokprobe_inline +unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp, + struct llist_node **cur) +{ + return rethook_find_ret_addr(tsk, (unsigned long)fp, cur); +} +#else static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr) { return (void *)addr == kretprobe_trampoline_addr(); @@ -516,6 +562,7 @@ static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr) unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp, struct llist_node **cur); +#endif #else static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr) { -- cgit v1.2.3 From 5974ea7ce0f9a5987fc8cf5e08ad6e3e70bb542e Mon Sep 17 00:00:00 2001 From: Sungup Moon Date: Mon, 14 Mar 2022 20:05:45 +0900 Subject: nvme: allow duplicate NSIDs for private namespaces A NVMe subsystem with multiple controller can have private namespaces that use the same NSID under some conditions: "If Namespace Management, ANA Reporting, or NVM Sets are supported, the NSIDs shall be unique within the NVM subsystem. If the Namespace Management, ANA Reporting, and NVM Sets are not supported, then NSIDs: a) for shared namespace shall be unique; and b) for private namespace are not required to be unique." Reference: Section 6.1.6 NSID and Namespace Usage; NVM Express 1.4c spec. Make sure this specific setup is supported in Linux. Fixes: 9ad1927a3bc2 ("nvme: always search for namespace head") Signed-off-by: Sungup Moon [hch: refactored and fixed the controller vs subsystem based naming conflict] Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9dbc3ef4daf7..2dcee34d467d 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -345,6 +345,7 @@ enum { NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, + NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3, NVME_CTRL_OACS_DIRECTIVES = 1 << 5, NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, -- cgit v1.2.3 From 3ae8fd41573af4fb3a490c9ed947fc936ba87190 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 11 Jan 2022 16:57:50 -0600 Subject: rtc: mc146818-lib: Fix the AltCentury for AMD platforms Setting the century forward has been failing on AMD platforms. There was a previous attempt at fixing this for family 0x17 as part of commit 7ad295d5196a ("rtc: Fix the AltCentury value on AMD/Hygon platform") but this was later reverted due to some problems reported that appeared to stem from an FW bug on a family 0x17 desktop system. The same comments mentioned in the previous commit continue to apply to the newer platforms as well. ``` MC146818 driver use function mc146818_set_time() to set register RTC_FREQ_SELECT(RTC_REG_A)'s bit4-bit6 field which means divider stage reset value on Intel platform to 0x7. While AMD/Hygon RTC_REG_A(0Ah)'s bit4 is defined as DV0 [Reference]: DV0 = 0 selects Bank 0, DV0 = 1 selects Bank 1. Bit5-bit6 is defined as reserved. DV0 is set to 1, it will select Bank 1, which will disable AltCentury register(0x32) access. As UEFI pass acpi_gbl_FADT.century 0x32 (AltCentury), the CMOS write will be failed on code: CMOS_WRITE(century, acpi_gbl_FADT.century). Correct RTC_REG_A bank select bit(DV0) to 0 on AMD/Hygon CPUs, it will enable AltCentury(0x32) register writing and finally setup century as expected. ``` However in closer examination the change previously submitted was also modifying bits 5 & 6 which are declared reserved in the AMD documentation. So instead modify just the DV0 bank selection bit. Being cognizant that there was a failure reported before, split the code change out to a static function that can also be used for exclusions if any regressions such as Mikhail's pop up again. Cc: Jinke Fan Cc: Mikhail Gavrilov Link: https://lore.kernel.org/all/CABXGCsMLob0DC25JS8wwAYydnDoHBSoMh2_YLPfqm3TTvDE-Zw@mail.gmail.com/ Link: https://www.amd.com/system/files/TechDocs/51192_Bolton_FCH_RRG.pdf Signed-off-by: Raul E Rangel Signed-off-by: Mario Limonciello Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220111225750.1699-1-mario.limonciello@amd.com --- include/linux/mc146818rtc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index 808bb4cee230..b0da04fe087b 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -86,6 +86,8 @@ struct cmos_rtc_board_info { /* 2 values for divider stage reset, others for "testing purposes only" */ # define RTC_DIV_RESET1 0x60 # define RTC_DIV_RESET2 0x70 + /* In AMD BKDG bit 5 and 6 are reserved, bit 4 is for select dv0 bank */ +# define RTC_AMD_BANK_SELECT 0x10 /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */ # define RTC_RATE_SELECT 0x0F -- cgit v1.2.3 From 7968778914e53788a01c2dee2692cab157de9ac0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 9 Mar 2022 20:50:39 +0100 Subject: PCI: Remove the deprecated "pci-dma-compat.h" API Now that all usages of the functions defined in "pci-dma-compat.h" have been removed, it is time to remove this file as well. In order not to break builds, move the "#include " that was in "pci-dma-compat.h" into "include/linux/pci.h" Signed-off-by: Christophe JAILLET Acked-by: Bjorn Helgaas Signed-off-by: Christoph Hellwig --- include/linux/pci-dma-compat.h | 129 ----------------------------------------- include/linux/pci.h | 3 +- 2 files changed, 1 insertion(+), 131 deletions(-) delete mode 100644 include/linux/pci-dma-compat.h (limited to 'include') diff --git a/include/linux/pci-dma-compat.h b/include/linux/pci-dma-compat.h deleted file mode 100644 index 249d4d7fbf18..000000000000 --- a/include/linux/pci-dma-compat.h +++ /dev/null @@ -1,129 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* include this file if the platform implements the dma_ DMA Mapping API - * and wants to provide the pci_ DMA Mapping API in terms of it */ - -#ifndef _ASM_GENERIC_PCI_DMA_COMPAT_H -#define _ASM_GENERIC_PCI_DMA_COMPAT_H - -#include - -/* This defines the direction arg to the DMA mapping routines. */ -#define PCI_DMA_BIDIRECTIONAL DMA_BIDIRECTIONAL -#define PCI_DMA_TODEVICE DMA_TO_DEVICE -#define PCI_DMA_FROMDEVICE DMA_FROM_DEVICE -#define PCI_DMA_NONE DMA_NONE - -static inline void * -pci_alloc_consistent(struct pci_dev *hwdev, size_t size, - dma_addr_t *dma_handle) -{ - return dma_alloc_coherent(&hwdev->dev, size, dma_handle, GFP_ATOMIC); -} - -static inline void * -pci_zalloc_consistent(struct pci_dev *hwdev, size_t size, - dma_addr_t *dma_handle) -{ - return dma_alloc_coherent(&hwdev->dev, size, dma_handle, GFP_ATOMIC); -} - -static inline void -pci_free_consistent(struct pci_dev *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - dma_free_coherent(&hwdev->dev, size, vaddr, dma_handle); -} - -static inline dma_addr_t -pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction) -{ - return dma_map_single(&hwdev->dev, ptr, size, (enum dma_data_direction)direction); -} - -static inline void -pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, - size_t size, int direction) -{ - dma_unmap_single(&hwdev->dev, dma_addr, size, (enum dma_data_direction)direction); -} - -static inline dma_addr_t -pci_map_page(struct pci_dev *hwdev, struct page *page, - unsigned long offset, size_t size, int direction) -{ - return dma_map_page(&hwdev->dev, page, offset, size, (enum dma_data_direction)direction); -} - -static inline void -pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, - size_t size, int direction) -{ - dma_unmap_page(&hwdev->dev, dma_address, size, (enum dma_data_direction)direction); -} - -static inline int -pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - return dma_map_sg(&hwdev->dev, sg, nents, (enum dma_data_direction)direction); -} - -static inline void -pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - dma_unmap_sg(&hwdev->dev, sg, nents, (enum dma_data_direction)direction); -} - -static inline void -pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle, - size_t size, int direction) -{ - dma_sync_single_for_cpu(&hwdev->dev, dma_handle, size, (enum dma_data_direction)direction); -} - -static inline void -pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle, - size_t size, int direction) -{ - dma_sync_single_for_device(&hwdev->dev, dma_handle, size, (enum dma_data_direction)direction); -} - -static inline void -pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, - int nelems, int direction) -{ - dma_sync_sg_for_cpu(&hwdev->dev, sg, nelems, (enum dma_data_direction)direction); -} - -static inline void -pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, - int nelems, int direction) -{ - dma_sync_sg_for_device(&hwdev->dev, sg, nelems, (enum dma_data_direction)direction); -} - -static inline int -pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr) -{ - return dma_mapping_error(&pdev->dev, dma_addr); -} - -#ifdef CONFIG_PCI -static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) -{ - return dma_set_mask(&dev->dev, mask); -} - -static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) -{ - return dma_set_coherent_mask(&dev->dev, mask); -} -#else -static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) -{ return -EIO; } -static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) -{ return -EIO; } -#endif - -#endif diff --git a/include/linux/pci.h b/include/linux/pci.h index b957eeb89c7a..60adf42460ab 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2473,8 +2473,7 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #endif -/* Provide the legacy pci_dma_* API */ -#include +#include #define pci_printk(level, pdev, fmt, arg...) \ dev_printk(level, &(pdev)->dev, fmt, ##arg) -- cgit v1.2.3 From bc55cfd5718c7c23e5524582e9fa70b4d10f2433 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Mar 2022 14:09:03 +0200 Subject: ALSA: pcm: Fix potential AB/BA lock with buffer_mutex and mmap_lock syzbot caught a potential deadlock between the PCM runtime->buffer_mutex and the mm->mmap_lock. It was brought by the recent fix to cover the racy read/write and other ioctls, and in that commit, I overlooked a (hopefully only) corner case that may take the revert lock, namely, the OSS mmap. The OSS mmap operation exceptionally allows to re-configure the parameters inside the OSS mmap syscall, where mm->mmap_mutex is already held. Meanwhile, the copy_from/to_user calls at read/write operations also take the mm->mmap_lock internally, hence it may lead to a AB/BA deadlock. A similar problem was already seen in the past and we fixed it with a refcount (in commit b248371628aa). The former fix covered only the call paths with OSS read/write and OSS ioctls, while we need to cover the concurrent access via both ALSA and OSS APIs now. This patch addresses the problem above by replacing the buffer_mutex lock in the read/write operations with a refcount similar as we've used for OSS. The new field, runtime->buffer_accessing, keeps the number of concurrent read/write operations. Unlike the former buffer_mutex protection, this protects only around the copy_from/to_user() calls; the other codes are basically protected by the PCM stream lock. The refcount can be a negative, meaning blocked by the ioctls. If a negative value is seen, the read/write aborts with -EBUSY. In the ioctl side, OTOH, they check this refcount, too, and set to a negative value for blocking unless it's already being accessed. Reported-by: syzbot+6e5c88838328e99c7e1c@syzkaller.appspotmail.com Fixes: dca947d4d26d ("ALSA: pcm: Fix races among concurrent read/write and buffer changes") Cc: Link: https://lore.kernel.org/r/000000000000381a0d05db622a81@google.com Link: https://lore.kernel.org/r/20220330120903.4738-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 314f2779cab5..6b99310b5b88 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -402,6 +402,7 @@ struct snd_pcm_runtime { struct fasync_struct *fasync; bool stop_operating; /* sync_stop will be called */ struct mutex buffer_mutex; /* protect for buffer changes */ + atomic_t buffer_accessing; /* >0: in r/w operation, <0: blocked */ /* -- private section -- */ void *private_data; -- cgit v1.2.3 From f941c51eeac7ebe0f8ec30943bf78e7f60aad039 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 29 Mar 2022 20:18:15 +0000 Subject: loop: fix ioctl calls using compat_loop_info Support for cryptoloop was deleted in commit 47e9624616c8 ("block: remove support for cryptoloop and the xor transfer"), making the usage of loop_info->lo_encrypt_type obsolete. However, this member was also removed from the compat_loop_info definition and this breaks userspace ioctl calls for 32-bit binaries and CONFIG_COMPAT=y. This patch restores the compat_loop_info->lo_encrypt_type member and marks it obsolete as well as in the uapi header definitions. Fixes: 47e9624616c8 ("block: remove support for cryptoloop and the xor transfer") Signed-off-by: Carlos Llamas Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220329201815.1347500-1-cmllamas@google.com Signed-off-by: Jens Axboe --- include/uapi/linux/loop.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index 24a1c45bd1ae..98e60801195e 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -45,7 +45,7 @@ struct loop_info { unsigned long lo_inode; /* ioctl r/o */ __kernel_old_dev_t lo_rdevice; /* ioctl r/o */ int lo_offset; - int lo_encrypt_type; + int lo_encrypt_type; /* obsolete, ignored */ int lo_encrypt_key_size; /* ioctl w/o */ int lo_flags; char lo_name[LO_NAME_SIZE]; @@ -61,7 +61,7 @@ struct loop_info64 { __u64 lo_offset; __u64 lo_sizelimit;/* bytes, 0 == max available */ __u32 lo_number; /* ioctl r/o */ - __u32 lo_encrypt_type; + __u32 lo_encrypt_type; /* obsolete, ignored */ __u32 lo_encrypt_key_size; /* ioctl w/o */ __u32 lo_flags; __u8 lo_file_name[LO_NAME_SIZE]; -- cgit v1.2.3 From 7dd5ad2d3e82fb55229e3fe18e09160878e77e20 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Mar 2022 10:36:55 +0200 Subject: Revert "signal, x86: Delay calling signals in atomic on RT enabled kernels" Revert commit bf9ad37dc8a. It needs to be better encapsulated and generalized. Signed-off-by: Thomas Gleixner Cc: "Eric W. Biederman" Cc: Oleg Nesterov Cc: Sebastian Andrzej Siewior --- include/linux/sched.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4a6fdd2a679f..d5e3c00b74e1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1090,9 +1090,6 @@ struct task_struct { /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; -#ifdef CONFIG_RT_DELAYED_SIGNALS - struct kernel_siginfo forced_info; -#endif unsigned long sas_ss_sp; size_t sas_ss_size; unsigned int sas_ss_flags; -- cgit v1.2.3 From 4a7f62f91933c8ae5308f9127fd8ea48188b6bc3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 30 Mar 2022 15:39:16 +0100 Subject: rxrpc: Fix call timer start racing with call destruction The rxrpc_call struct has a timer used to handle various timed events relating to a call. This timer can get started from the packet input routines that are run in softirq mode with just the RCU read lock held. Unfortunately, because only the RCU read lock is held - and neither ref or other lock is taken - the call can start getting destroyed at the same time a packet comes in addressed to that call. This causes the timer - which was already stopped - to get restarted. Later, the timer dispatch code may then oops if the timer got deallocated first. Fix this by trying to take a ref on the rxrpc_call struct and, if successful, passing that ref along to the timer. If the timer was already running, the ref is discarded. The timer completion routine can then pass the ref along to the call's work item when it queues it. If the timer or work item where already queued/running, the extra ref is discarded. Fixes: a158bdd3247b ("rxrpc: Fix call timeouts") Reported-by: Marc Dionne Signed-off-by: David Howells Reviewed-by: Marc Dionne Tested-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: http://lists.infradead.org/pipermail/linux-afs/2022-March/005073.html Link: https://lore.kernel.org/r/164865115696.2943015.11097991776647323586.stgit@warthog.procyon.org.uk Signed-off-by: Paolo Abeni --- include/trace/events/rxrpc.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e70c90116eda..4a3ab0ed6e06 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -83,12 +83,15 @@ enum rxrpc_call_trace { rxrpc_call_error, rxrpc_call_got, rxrpc_call_got_kernel, + rxrpc_call_got_timer, rxrpc_call_got_userid, rxrpc_call_new_client, rxrpc_call_new_service, rxrpc_call_put, rxrpc_call_put_kernel, rxrpc_call_put_noqueue, + rxrpc_call_put_notimer, + rxrpc_call_put_timer, rxrpc_call_put_userid, rxrpc_call_queued, rxrpc_call_queued_ref, @@ -278,12 +281,15 @@ enum rxrpc_tx_point { EM(rxrpc_call_error, "*E*") \ EM(rxrpc_call_got, "GOT") \ EM(rxrpc_call_got_kernel, "Gke") \ + EM(rxrpc_call_got_timer, "GTM") \ EM(rxrpc_call_got_userid, "Gus") \ EM(rxrpc_call_new_client, "NWc") \ EM(rxrpc_call_new_service, "NWs") \ EM(rxrpc_call_put, "PUT") \ EM(rxrpc_call_put_kernel, "Pke") \ - EM(rxrpc_call_put_noqueue, "PNQ") \ + EM(rxrpc_call_put_noqueue, "PnQ") \ + EM(rxrpc_call_put_notimer, "PnT") \ + EM(rxrpc_call_put_timer, "PTM") \ EM(rxrpc_call_put_userid, "Pus") \ EM(rxrpc_call_queued, "QUE") \ EM(rxrpc_call_queued_ref, "QUR") \ -- cgit v1.2.3 From 48ec13d36d3ff716a8a08e6583a925def7a2564d Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 18 Mar 2022 12:12:33 +0000 Subject: gpio: Properly document parent data union Suppress a warning in the html docs by documenting these fields separately. Signed-off-by: Joey Gouly Link: https://lore.kernel.org/lkml/20211027220118.71a229ab@canb.auug.org.au/ Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: Marc Zyngier Cc: Stephen Rothwell Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index b0728c8ad90c..98c93510640e 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -168,13 +168,16 @@ struct gpio_irq_chip { /** * @parent_handler_data: + * + * If @per_parent_data is false, @parent_handler_data is a single + * pointer used as the data associated with every parent interrupt. + * * @parent_handler_data_array: * - * Data associated, and passed to, the handler for the parent - * interrupt. Can either be a single pointer if @per_parent_data - * is false, or an array of @num_parents pointers otherwise. If - * @per_parent_data is true, @parent_handler_data_array cannot be - * NULL. + * If @per_parent_data is true, @parent_handler_data_array is + * an array of @num_parents pointers, and is used to associate + * different data for each parent. This cannot be NULL if + * @per_parent_data is true. */ union { void *parent_handler_data; -- cgit v1.2.3 From 229a08a4f4e4f9949801cc39b6480ddc9c487183 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 9 Mar 2022 09:37:31 -0800 Subject: ARM/dma-mapping: Remove CMA code when not built with CMA The MAX_CMA_AREAS could be set to 0, which would result in code that would attempt to operate beyond the end of a zero-sized array. If CONFIG_CMA is disabled, just remove this code entirely. Found when building arm on GCC 10.x for several defconfigs (e.g. axm55xx_defconfig) under -Warray-bounds: arch/arm/mm/dma-mapping.c:396:22: warning: array subscript is outside array bounds of 'struct dma_contig_early_reserve[0]' [-Warray-bounds] 396 | dma_mmu_remap[dma_mmu_remap_num].size = size; | ~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~ arch/arm/mm/dma-mapping.c:389:40: note: while referencing 'dma_mmu_remap' 389 | static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; | ^~~~~~~~~~~~~ Cc: Russell King Cc: Logan Gunthorpe Cc: Martin Oliveira Cc: David Hildenbrand Cc: Andrew Morton Cc: Stephen Rothwell Cc: Zi Yan Cc: Hari Bathini Cc: Minchan Kim Cc: Mike Kravetz Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/all/6243ee60.1c69fb81.16de6.7dbf@mx.google.com/ Signed-off-by: Kees Cook Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/lkml/20220310070041.GA24874@lst.de Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/lkml/9059fa71-330f-f04f-b155-2850abb72a71@redhat.com --- include/linux/cma.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/cma.h b/include/linux/cma.h index bd801023504b..2c2ede7f0724 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -12,10 +12,6 @@ */ #ifdef CONFIG_CMA_AREAS #define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS) - -#else -#define MAX_CMA_AREAS (0) - #endif #define CMA_MAX_NAME 64 -- cgit v1.2.3 From aad5b23ebf21573a32b6f07644f028d64492a5d6 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 28 Mar 2022 12:34:31 -0400 Subject: dm: fix dm_io and dm_target_io flags race condition on Alpha Early alpha processors cannot write a single byte or short; they read 8 bytes, modify the value in registers and write back 8 bytes. This could cause race condition in the structure dm_io - if the fields flags and io_count are modified simultaneously. Fix this bug by using 32-bit flags if we are on Alpha and if we are compiling for a processor that doesn't have the byte-word-extension. Signed-off-by: Mikulas Patocka Fixes: bd4a6dd241ae ("dm: reduce size of dm_io and dm_target_io structs") [snitzer: Jens allowed this change since Mikulas owns a relevant Alpha!] Acked-by: Jens Axboe Signed-off-by: Mike Snitzer --- include/linux/blk_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index dd0763a1c674..1973ef9bd40f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -85,8 +85,10 @@ struct block_device { */ #if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__) typedef u32 __bitwise blk_status_t; +typedef u32 blk_short_t; #else typedef u8 __bitwise blk_status_t; +typedef u16 blk_short_t; #endif #define BLK_STS_OK 0 #define BLK_STS_NOTSUPP ((__force blk_status_t)1) -- cgit v1.2.3 From ebf921a9fac38560e0fc3a4381e163a6969efd5a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 22 Jan 2022 15:46:22 -0500 Subject: readahead: Remove read_cache_pages() With no remaining users, remove this function and the related infrastructure. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Al Viro Acked-by: Al Viro --- include/linux/pagemap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a8d0b327b066..993994cd943a 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -752,8 +752,6 @@ struct page *read_cache_page(struct address_space *, pgoff_t index, filler_t *filler, void *data); extern struct page * read_cache_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); -extern int read_cache_pages(struct address_space *mapping, - struct list_head *pages, filler_t *filler, void *data); static inline struct page *read_mapping_page(struct address_space *mapping, pgoff_t index, struct file *file) -- cgit v1.2.3 From 704528d895dd3e7b173e672116b4eb2b0a0fceb0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 23 Mar 2022 21:29:04 -0400 Subject: fs: Remove ->readpages address space operation All filesystems have now been converted to use ->readahead, so remove the ->readpages operation and fix all the comments that used to refer to it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Al Viro Acked-by: Al Viro --- include/linux/fs.h | 6 ------ include/linux/fsverity.h | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 183160872133..7c81887cc7e8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -370,12 +370,6 @@ struct address_space_operations { /* Mark a folio dirty. Return true if this dirtied it */ bool (*dirty_folio)(struct address_space *, struct folio *); - /* - * Reads in the requested pages. Unlike ->readpage(), this is - * PURELY used for read-ahead!. - */ - int (*readpages)(struct file *filp, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages); void (*readahead)(struct readahead_control *); int (*write_begin)(struct file *, struct address_space *mapping, diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index b568b3c7d095..a7afc800bd8d 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -221,7 +221,7 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work) * * This checks whether ->i_verity_info has been set. * - * Filesystems call this from ->readpages() to check whether the pages need to + * Filesystems call this from ->readahead() to check whether the pages need to * be verified or not. Don't use IS_VERITY() for this purpose; it's subject to * a race condition where the file is being read concurrently with * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before ->i_verity_info.) -- cgit v1.2.3 From a9fcd89d67bb8c4ad613b54ab691fc603c94a03a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 14 Feb 2022 09:13:43 -0500 Subject: fs: Remove read_actor_t This typedef is not used any more. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Al Viro Acked-by: Al Viro --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7c81887cc7e8..7588d3a0ced8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -357,9 +357,6 @@ typedef struct { int error; } read_descriptor_t; -typedef int (*read_actor_t)(read_descriptor_t *, struct page *, - unsigned long, unsigned long); - struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); int (*readpage)(struct file *, struct page *); -- cgit v1.2.3 From b2403a61308533c576c9dd783fcb73a9186e0b37 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 14 Feb 2022 09:15:34 -0500 Subject: fs, net: Move read_descriptor_t to net.h fs.h has no more need for this typedef; networking is now the sole user of the read_descriptor_t. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Al Viro Acked-by: Al Viro --- include/linux/fs.h | 19 ------------------- include/linux/net.h | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7588d3a0ced8..8ff28939de60 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -338,25 +338,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb) return kiocb->ki_complete == NULL; } -/* - * "descriptor" for what we're up to with a read. - * This allows us to use the same read code yet - * have multiple different users of the data that - * we read from a file. - * - * The simplest case just copies the data to user - * mode. - */ -typedef struct { - size_t written; - size_t count; - union { - char __user *buf; - void *data; - } arg; - int error; -} read_descriptor_t; - struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); int (*readpage)(struct file *, struct page *); diff --git a/include/linux/net.h b/include/linux/net.h index ba736b457a06..12093f4db50c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -125,6 +125,25 @@ struct socket { struct socket_wq wq; }; +/* + * "descriptor" for what we're up to with a read. + * This allows us to use the same read code yet + * have multiple different users of the data that + * we read from a file. + * + * The simplest case just copies the data to user + * mode. + */ +typedef struct { + size_t written; + size_t count; + union { + char __user *buf; + void *data; + } arg; + int error; +} read_descriptor_t; + struct vm_area_struct; struct page; struct sockaddr; -- cgit v1.2.3 From 800ba29547e16d5fbe67ca764ba660e049e9f1bf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 19 Feb 2022 23:19:49 -0500 Subject: fs: Pass an iocb to generic_perform_write() We can extract both the file pointer and the pos from the iocb. This simplifies each caller as well as allowing generic_perform_write() to see more of the iocb contents in the future. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Christian Brauner Reviewed-by: Al Viro Acked-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8ff28939de60..468dc7ec821f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2999,7 +2999,7 @@ extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); -extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); +ssize_t generic_perform_write(struct kiocb *, struct iov_iter *); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags); -- cgit v1.2.3 From d7414ba14a3a67f81321069219dc7dbc095022c3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 20 Feb 2022 22:28:03 -0500 Subject: filemap: Remove AOP_FLAG_CONT_EXPAND This flag is no longer used, so remove it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Al Viro Acked-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 468dc7ec821f..bbde95387a23 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -275,7 +275,6 @@ enum positive_aop_returns { AOP_TRUNCATED_PAGE = 0x80001, }; -#define AOP_FLAG_CONT_EXPAND 0x0001 /* called from cont_expand */ #define AOP_FLAG_NOFS 0x0002 /* used by filesystem to direct * helper code (eg buffer layer) * to clear GFP_FS from alloc */ -- cgit v1.2.3 From ada543af3bfe3d953986eca118601b9612382c13 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 1 Apr 2022 11:28:45 -0700 Subject: mm, kasan: fix __GFP_BITS_SHIFT definition breaking LOCKDEP KASAN changes that added new GFP flags mistakenly updated __GFP_BITS_SHIFT as the total number of GFP bits instead of as a shift used to define __GFP_BITS_MASK. This broke LOCKDEP, as __GFP_BITS_MASK now gets the 25th bit enabled instead of the 28th for __GFP_NOLOCKDEP. Update __GFP_BITS_SHIFT to always count KASAN GFP bits. In the future, we could handle all combinations of KASAN and LOCKDEP to occupy as few bits as possible. For now, we have enough GFP bits to be inefficient in this quick fix. Link: https://lkml.kernel.org/r/462ff52742a1fcc95a69778685737f723ee4dfb3.1648400273.git.andreyknvl@google.com Fixes: 9353ffa6e9e9 ("kasan, page_alloc: allow skipping memory init for HW_TAGS") Fixes: 53ae233c30a6 ("kasan, page_alloc: allow skipping unpoisoning for HW_TAGS") Fixes: f49d9c5bb15c ("kasan, mm: only define ___GFP_SKIP_KASAN_POISON with HW_TAGS") Signed-off-by: Andrey Konovalov Reported-by: Sebastian Andrzej Siewior Tested-by: Sebastian Andrzej Siewior Acked-by: Vlastimil Babka Cc: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0fa17fb85de5..761f8f1885c7 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -264,9 +264,7 @@ struct vm_area_struct; #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (24 + \ - 3 * IS_ENABLED(CONFIG_KASAN_HW_TAGS) + \ - IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_SHIFT (27 + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /** -- cgit v1.2.3 From df06dae3f2a8bfb83683abf88d3dcde23fc8093d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 23 Feb 2022 16:53:02 +0000 Subject: KVM: Don't actually set a request when evicting vCPUs for GFN cache invd Don't actually set a request bit in vcpu->requests when making a request purely to force a vCPU to exit the guest. Logging a request but not actually consuming it would cause the vCPU to get stuck in an infinite loop during KVM_RUN because KVM would see the pending request and bail from VM-Enter to service the request. Note, it's currently impossible for KVM to set KVM_REQ_GPC_INVALIDATE as nothing in KVM is wired up to set guest_uses_pa=true. But, it'd be all too easy for arch code to introduce use of kvm_gfn_to_pfn_cache_init() without implementing handling of the request, especially since getting test coverage of MMU notifier interaction with specific KVM features usually requires a directed test. Opportunistically rename gfn_to_pfn_cache_invalidate_start()'s wake_vcpus to evict_vcpus. The purpose of the request is to get vCPUs out of guest mode, it's supposed to _avoid_ waking vCPUs that are blocking. Opportunistically rename KVM_REQ_GPC_INVALIDATE to be more specific as to what it wants to accomplish, and to genericize the name so that it can used for similar but unrelated scenarios, should they arise in the future. Add a comment and documentation to explain why the "no action" request exists. Add compile-time assertions to help detect improper usage. Use the inner assertless helper in the one s390 path that makes requests without a hardcoded request. Cc: David Woodhouse Signed-off-by: Sean Christopherson Message-Id: <20220223165302.3205276-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9536ffa0473b..678fd7914521 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -148,6 +148,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQUEST_MASK GENMASK(7,0) #define KVM_REQUEST_NO_WAKEUP BIT(8) #define KVM_REQUEST_WAIT BIT(9) +#define KVM_REQUEST_NO_ACTION BIT(10) /* * Architecture-independent vcpu->requests bit members * Bits 4-7 are reserved for more arch-independent bits. @@ -156,9 +157,18 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_UNBLOCK 2 #define KVM_REQ_UNHALT 3 -#define KVM_REQ_GPC_INVALIDATE (5 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQUEST_ARCH_BASE 8 +/* + * KVM_REQ_OUTSIDE_GUEST_MODE exists is purely as way to force the vCPU to + * OUTSIDE_GUEST_MODE. KVM_REQ_OUTSIDE_GUEST_MODE differs from a vCPU "kick" + * in that it ensures the vCPU has reached OUTSIDE_GUEST_MODE before continuing + * on. A kick only guarantees that the vCPU is on its way out, e.g. a previous + * kick may have set vcpu->mode to EXITING_GUEST_MODE, and so there's no + * guarantee the vCPU received an IPI and has actually exited guest mode. + */ +#define KVM_REQ_OUTSIDE_GUEST_MODE (KVM_REQUEST_NO_ACTION | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) + #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ BUILD_BUG_ON((unsigned)(nr) >= (sizeof_field(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \ (unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \ @@ -1222,7 +1232,9 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); * @vcpu: vCPU to be used for marking pages dirty and to be woken on * invalidation. * @guest_uses_pa: indicates that the resulting host physical PFN is used while - * @vcpu is IN_GUEST_MODE so invalidations should wake it. + * @vcpu is IN_GUEST_MODE; invalidations of the cache from MMU + * notifiers (but not for KVM memslot changes!) will also force + * @vcpu to exit the guest to refresh the cache. * @kernel_map: requests a kernel virtual mapping (kmap / memremap). * @gpa: guest physical address to map. * @len: sanity check; the range being access must fit a single page. @@ -1233,10 +1245,9 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); * -EFAULT for an untranslatable guest physical address. * * This primes a gfn_to_pfn_cache and links it into the @kvm's list for - * invalidations to be processed. Invalidation callbacks to @vcpu using - * %KVM_REQ_GPC_INVALIDATE will occur only for MMU notifiers, not for KVM - * memslot changes. Callers are required to use kvm_gfn_to_pfn_cache_check() - * to ensure that the cache is valid before accessing the target page. + * invalidations to be processed. Callers are required to use + * kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before + * accessing the target page. */ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, struct kvm_vcpu *vcpu, bool guest_uses_pa, @@ -1984,7 +1995,7 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) void kvm_arch_irq_routing_update(struct kvm *kvm); -static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) +static inline void __kvm_make_request(int req, struct kvm_vcpu *vcpu) { /* * Ensure the rest of the request is published to kvm_check_request's @@ -1994,6 +2005,19 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests); } +static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) +{ + /* + * Request that don't require vCPU action should never be logged in + * vcpu->requests. The vCPU won't clear the request, so it will stay + * logged indefinitely and prevent the vCPU from entering the guest. + */ + BUILD_BUG_ON(!__builtin_constant_p(req) || + (req & KVM_REQUEST_NO_ACTION)); + + __kvm_make_request(req, vcpu); +} + static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) { return READ_ONCE(vcpu->requests); -- cgit v1.2.3 From d0d96121d03d6d9cf608d948247a9f24f5a02da9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 3 Mar 2022 15:41:11 +0000 Subject: KVM: Use enum to track if cached PFN will be used in guest and/or host Replace the guest_uses_pa and kernel_map booleans in the PFN cache code with a unified enum/bitmask. Using explicit names makes it easier to review and audit call sites. Opportunistically add a WARN to prevent passing garbage; instantating a cache without declaring its usage is either buggy or pointless. Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 16 ++++++++-------- include/linux/kvm_types.h | 10 ++++++++-- 2 files changed, 16 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 678fd7914521..be9bbc0c6200 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1231,11 +1231,12 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); * @gpc: struct gfn_to_pfn_cache object. * @vcpu: vCPU to be used for marking pages dirty and to be woken on * invalidation. - * @guest_uses_pa: indicates that the resulting host physical PFN is used while - * @vcpu is IN_GUEST_MODE; invalidations of the cache from MMU - * notifiers (but not for KVM memslot changes!) will also force - * @vcpu to exit the guest to refresh the cache. - * @kernel_map: requests a kernel virtual mapping (kmap / memremap). + * @usage: indicates if the resulting host physical PFN is used while + * the @vcpu is IN_GUEST_MODE (in which case invalidation of + * the cache from MMU notifiers---but not for KVM memslot + * changes!---will also force @vcpu to exit the guest and + * refresh the cache); and/or if the PFN used directly + * by KVM (and thus needs a kernel virtual mapping). * @gpa: guest physical address to map. * @len: sanity check; the range being access must fit a single page. * @dirty: mark the cache dirty immediately. @@ -1250,9 +1251,8 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); * accessing the target page. */ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, - struct kvm_vcpu *vcpu, bool guest_uses_pa, - bool kernel_map, gpa_t gpa, unsigned long len, - bool dirty); + struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, + gpa_t gpa, unsigned long len, bool dirty); /** * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache. diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index dceac12c1ce5..784f37cbf33e 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -18,6 +18,7 @@ struct kvm_memslots; enum kvm_mr_change; +#include #include #include @@ -46,6 +47,12 @@ typedef u64 hfn_t; typedef hfn_t kvm_pfn_t; +enum pfn_cache_usage { + KVM_GUEST_USES_PFN = BIT(0), + KVM_HOST_USES_PFN = BIT(1), + KVM_GUEST_AND_HOST_USE_PFN = KVM_GUEST_USES_PFN | KVM_HOST_USES_PFN, +}; + struct gfn_to_hva_cache { u64 generation; gpa_t gpa; @@ -64,11 +71,10 @@ struct gfn_to_pfn_cache { rwlock_t lock; void *khva; kvm_pfn_t pfn; + enum pfn_cache_usage usage; bool active; bool valid; bool dirty; - bool kernel_map; - bool guest_uses_pa; }; #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE -- cgit v1.2.3 From cf1d88b36ba7e83bdaa50bccc4c47864e8f08cbe Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 3 Mar 2022 15:41:12 +0000 Subject: KVM: Remove dirty handling from gfn_to_pfn_cache completely It isn't OK to cache the dirty status of a page in internal structures for an indefinite period of time. Any time a vCPU exits the run loop to userspace might be its last; the VMM might do its final check of the dirty log, flush the last remaining dirty pages to the destination and complete a live migration. If we have internal 'dirty' state which doesn't get flushed until the vCPU is finally destroyed on the source after migration is complete, then we have lost data because that will escape the final copy. This problem already exists with the use of kvm_vcpu_unmap() to mark pages dirty in e.g. VMX nesting. Note that the actual Linux MM already considers the page to be dirty since we have a writeable mapping of it. This is just about the KVM dirty logging. For the nesting-style use cases (KVM_GUEST_USES_PFN) we will need to track which gfn_to_pfn_caches have been used and explicitly mark the corresponding pages dirty before returning to userspace. But we would have needed external tracking of that anyway, rather than walking the full list of GPCs to find those belonging to this vCPU which are dirty. So let's rely *solely* on that external tracking, and keep it simple rather than laying a tempting trap for callers to fall into. Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-3-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 14 +++++--------- include/linux/kvm_types.h | 1 - 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index be9bbc0c6200..3f9b22c4983a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1239,7 +1239,6 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); * by KVM (and thus needs a kernel virtual mapping). * @gpa: guest physical address to map. * @len: sanity check; the range being access must fit a single page. - * @dirty: mark the cache dirty immediately. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. @@ -1252,7 +1251,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); */ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, - gpa_t gpa, unsigned long len, bool dirty); + gpa_t gpa, unsigned long len); /** * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache. @@ -1261,7 +1260,6 @@ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, * @gpc: struct gfn_to_pfn_cache object. * @gpa: current guest physical address to map. * @len: sanity check; the range being access must fit a single page. - * @dirty: mark the cache dirty immediately. * * @return: %true if the cache is still valid and the address matches. * %false if the cache is not valid. @@ -1283,7 +1281,6 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, * @gpc: struct gfn_to_pfn_cache object. * @gpa: updated guest physical address to map. * @len: sanity check; the range being access must fit a single page. - * @dirty: mark the cache dirty immediately. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. @@ -1296,7 +1293,7 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, * with the lock still held to permit access. */ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, - gpa_t gpa, unsigned long len, bool dirty); + gpa_t gpa, unsigned long len); /** * kvm_gfn_to_pfn_cache_unmap - temporarily unmap a gfn_to_pfn_cache. @@ -1304,10 +1301,9 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. * - * This unmaps the referenced page and marks it dirty, if appropriate. The - * cache is left in the invalid state but at least the mapping from GPA to - * userspace HVA will remain cached and can be reused on a subsequent - * refresh. + * This unmaps the referenced page. The cache is left in the invalid state + * but at least the mapping from GPA to userspace HVA will remain cached + * and can be reused on a subsequent refresh. */ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 784f37cbf33e..ac1ebb37a0ff 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -74,7 +74,6 @@ struct gfn_to_pfn_cache { enum pfn_cache_usage usage; bool active; bool valid; - bool dirty; }; #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE -- cgit v1.2.3 From 84055411d861d3a2b485ad629fce7d2179d72c1e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 29 Mar 2022 16:50:44 -0400 Subject: tracing: Rename the staging files for trace_events When looking for implementation of different phases of the creation of the TRACE_EVENT() macro, it is pretty useless when all helper macro redefinitions are in files labeled "stageX_defines.h". Rename them to state which phase the files are for. For instance, when looking for the defines that are used to create the event fields, seeing "stage4_event_fields.h" gives the developer a good idea that the defines are in that file. Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/stage1_defines.h | 51 ---------- include/trace/stages/stage1_struct_define.h | 51 ++++++++++ include/trace/stages/stage2_data_offsets.h | 54 +++++++++++ include/trace/stages/stage2_defines.h | 54 ----------- include/trace/stages/stage3_defines.h | 135 --------------------------- include/trace/stages/stage3_trace_output.h | 135 +++++++++++++++++++++++++++ include/trace/stages/stage4_defines.h | 63 ------------- include/trace/stages/stage4_event_fields.h | 63 +++++++++++++ include/trace/stages/stage5_defines.h | 89 ------------------ include/trace/stages/stage5_get_offsets.h | 89 ++++++++++++++++++ include/trace/stages/stage6_defines.h | 106 --------------------- include/trace/stages/stage6_event_callback.h | 106 +++++++++++++++++++++ include/trace/stages/stage7_class_define.h | 36 +++++++ include/trace/stages/stage7_defines.h | 36 ------- include/trace/trace_custom_events.h | 14 +-- include/trace/trace_events.h | 14 +-- 16 files changed, 548 insertions(+), 548 deletions(-) delete mode 100644 include/trace/stages/stage1_defines.h create mode 100644 include/trace/stages/stage1_struct_define.h create mode 100644 include/trace/stages/stage2_data_offsets.h delete mode 100644 include/trace/stages/stage2_defines.h delete mode 100644 include/trace/stages/stage3_defines.h create mode 100644 include/trace/stages/stage3_trace_output.h delete mode 100644 include/trace/stages/stage4_defines.h create mode 100644 include/trace/stages/stage4_event_fields.h delete mode 100644 include/trace/stages/stage5_defines.h create mode 100644 include/trace/stages/stage5_get_offsets.h delete mode 100644 include/trace/stages/stage6_defines.h create mode 100644 include/trace/stages/stage6_event_callback.h create mode 100644 include/trace/stages/stage7_class_define.h delete mode 100644 include/trace/stages/stage7_defines.h (limited to 'include') diff --git a/include/trace/stages/stage1_defines.h b/include/trace/stages/stage1_defines.h deleted file mode 100644 index a16783419687..000000000000 --- a/include/trace/stages/stage1_defines.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* Stage 1 definitions for creating trace events */ - -#undef __field -#define __field(type, item) type item; - -#undef __field_ext -#define __field_ext(type, item, filter_type) type item; - -#undef __field_struct -#define __field_struct(type, item) type item; - -#undef __field_struct_ext -#define __field_struct_ext(type, item, filter_type) type item; - -#undef __array -#define __array(type, item, len) type item[len]; - -#undef __dynamic_array -#define __dynamic_array(type, item, len) u32 __data_loc_##item; - -#undef __string -#define __string(item, src) __dynamic_array(char, item, -1) - -#undef __string_len -#define __string_len(item, src, len) __dynamic_array(char, item, -1) - -#undef __bitmask -#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) - -#undef __sockaddr -#define __sockaddr(field, len) __dynamic_array(u8, field, len) - -#undef __rel_dynamic_array -#define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item; - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) - -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1) - -#undef __rel_sockaddr -#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) - -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args diff --git a/include/trace/stages/stage1_struct_define.h b/include/trace/stages/stage1_struct_define.h new file mode 100644 index 000000000000..a16783419687 --- /dev/null +++ b/include/trace/stages/stage1_struct_define.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Stage 1 definitions for creating trace events */ + +#undef __field +#define __field(type, item) type item; + +#undef __field_ext +#define __field_ext(type, item, filter_type) type item; + +#undef __field_struct +#define __field_struct(type, item) type item; + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) type item; + +#undef __array +#define __array(type, item, len) type item[len]; + +#undef __dynamic_array +#define __dynamic_array(type, item, len) u32 __data_loc_##item; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, -1) + +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) + +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) + +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1) + +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args diff --git a/include/trace/stages/stage2_data_offsets.h b/include/trace/stages/stage2_data_offsets.h new file mode 100644 index 000000000000..42fd1e8813ec --- /dev/null +++ b/include/trace/stages/stage2_data_offsets.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Stage 2 definitions for creating trace events */ + +#undef TRACE_DEFINE_ENUM +#define TRACE_DEFINE_ENUM(a) + +#undef TRACE_DEFINE_SIZEOF +#define TRACE_DEFINE_SIZEOF(a) + +#undef __field +#define __field(type, item) + +#undef __field_ext +#define __field_ext(type, item, filter_type) + +#undef __field_struct +#define __field_struct(type, item) + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) + +#undef __array +#define __array(type, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item, len) u32 item; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, -1) + +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) + +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) + +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) u32 item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) + +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) diff --git a/include/trace/stages/stage2_defines.h b/include/trace/stages/stage2_defines.h deleted file mode 100644 index 42fd1e8813ec..000000000000 --- a/include/trace/stages/stage2_defines.h +++ /dev/null @@ -1,54 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* Stage 2 definitions for creating trace events */ - -#undef TRACE_DEFINE_ENUM -#define TRACE_DEFINE_ENUM(a) - -#undef TRACE_DEFINE_SIZEOF -#define TRACE_DEFINE_SIZEOF(a) - -#undef __field -#define __field(type, item) - -#undef __field_ext -#define __field_ext(type, item, filter_type) - -#undef __field_struct -#define __field_struct(type, item) - -#undef __field_struct_ext -#define __field_struct_ext(type, item, filter_type) - -#undef __array -#define __array(type, item, len) - -#undef __dynamic_array -#define __dynamic_array(type, item, len) u32 item; - -#undef __string -#define __string(item, src) __dynamic_array(char, item, -1) - -#undef __string_len -#define __string_len(item, src, len) __dynamic_array(char, item, -1) - -#undef __bitmask -#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) - -#undef __sockaddr -#define __sockaddr(field, len) __dynamic_array(u8, field, len) - -#undef __rel_dynamic_array -#define __rel_dynamic_array(type, item, len) u32 item; - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) - -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) - -#undef __rel_sockaddr -#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) diff --git a/include/trace/stages/stage3_defines.h b/include/trace/stages/stage3_defines.h deleted file mode 100644 index e3b183e9d18e..000000000000 --- a/include/trace/stages/stage3_defines.h +++ /dev/null @@ -1,135 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* Stage 3 definitions for creating trace events */ - -#undef __entry -#define __entry field - -#undef TP_printk -#define TP_printk(fmt, args...) fmt "\n", args - -#undef __get_dynamic_array -#define __get_dynamic_array(field) \ - ((void *)__entry + (__entry->__data_loc_##field & 0xffff)) - -#undef __get_dynamic_array_len -#define __get_dynamic_array_len(field) \ - ((__entry->__data_loc_##field >> 16) & 0xffff) - -#undef __get_str -#define __get_str(field) ((char *)__get_dynamic_array(field)) - -#undef __get_rel_dynamic_array -#define __get_rel_dynamic_array(field) \ - ((void *)__entry + \ - offsetof(typeof(*__entry), __rel_loc_##field) + \ - sizeof(__entry->__rel_loc_##field) + \ - (__entry->__rel_loc_##field & 0xffff)) - -#undef __get_rel_dynamic_array_len -#define __get_rel_dynamic_array_len(field) \ - ((__entry->__rel_loc_##field >> 16) & 0xffff) - -#undef __get_rel_str -#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) - -#undef __get_bitmask -#define __get_bitmask(field) \ - ({ \ - void *__bitmask = __get_dynamic_array(field); \ - unsigned int __bitmask_size; \ - __bitmask_size = __get_dynamic_array_len(field); \ - trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ - }) - -#undef __get_rel_bitmask -#define __get_rel_bitmask(field) \ - ({ \ - void *__bitmask = __get_rel_dynamic_array(field); \ - unsigned int __bitmask_size; \ - __bitmask_size = __get_rel_dynamic_array_len(field); \ - trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ - }) - -#undef __get_sockaddr -#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) - -#undef __get_rel_sockaddr -#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) - -#undef __print_flags -#define __print_flags(flag, delim, flag_array...) \ - ({ \ - static const struct trace_print_flags __flags[] = \ - { flag_array, { -1, NULL }}; \ - trace_print_flags_seq(p, delim, flag, __flags); \ - }) - -#undef __print_symbolic -#define __print_symbolic(value, symbol_array...) \ - ({ \ - static const struct trace_print_flags symbols[] = \ - { symbol_array, { -1, NULL }}; \ - trace_print_symbols_seq(p, value, symbols); \ - }) - -#undef __print_flags_u64 -#undef __print_symbolic_u64 -#if BITS_PER_LONG == 32 -#define __print_flags_u64(flag, delim, flag_array...) \ - ({ \ - static const struct trace_print_flags_u64 __flags[] = \ - { flag_array, { -1, NULL } }; \ - trace_print_flags_seq_u64(p, delim, flag, __flags); \ - }) - -#define __print_symbolic_u64(value, symbol_array...) \ - ({ \ - static const struct trace_print_flags_u64 symbols[] = \ - { symbol_array, { -1, NULL } }; \ - trace_print_symbols_seq_u64(p, value, symbols); \ - }) -#else -#define __print_flags_u64(flag, delim, flag_array...) \ - __print_flags(flag, delim, flag_array) - -#define __print_symbolic_u64(value, symbol_array...) \ - __print_symbolic(value, symbol_array) -#endif - -#undef __print_hex -#define __print_hex(buf, buf_len) \ - trace_print_hex_seq(p, buf, buf_len, false) - -#undef __print_hex_str -#define __print_hex_str(buf, buf_len) \ - trace_print_hex_seq(p, buf, buf_len, true) - -#undef __print_array -#define __print_array(array, count, el_size) \ - ({ \ - BUILD_BUG_ON(el_size != 1 && el_size != 2 && \ - el_size != 4 && el_size != 8); \ - trace_print_array_seq(p, array, count, el_size); \ - }) - -#undef __print_hex_dump -#define __print_hex_dump(prefix_str, prefix_type, \ - rowsize, groupsize, buf, len, ascii) \ - trace_print_hex_dump_seq(p, prefix_str, prefix_type, \ - rowsize, groupsize, buf, len, ascii) - -#undef __print_ns_to_secs -#define __print_ns_to_secs(value) \ - ({ \ - u64 ____val = (u64)(value); \ - do_div(____val, NSEC_PER_SEC); \ - ____val; \ - }) - -#undef __print_ns_without_secs -#define __print_ns_without_secs(value) \ - ({ \ - u64 ____val = (u64)(value); \ - (u32) do_div(____val, NSEC_PER_SEC); \ - }) diff --git a/include/trace/stages/stage3_trace_output.h b/include/trace/stages/stage3_trace_output.h new file mode 100644 index 000000000000..e3b183e9d18e --- /dev/null +++ b/include/trace/stages/stage3_trace_output.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Stage 3 definitions for creating trace events */ + +#undef __entry +#define __entry field + +#undef TP_printk +#define TP_printk(fmt, args...) fmt "\n", args + +#undef __get_dynamic_array +#define __get_dynamic_array(field) \ + ((void *)__entry + (__entry->__data_loc_##field & 0xffff)) + +#undef __get_dynamic_array_len +#define __get_dynamic_array_len(field) \ + ((__entry->__data_loc_##field >> 16) & 0xffff) + +#undef __get_str +#define __get_str(field) ((char *)__get_dynamic_array(field)) + +#undef __get_rel_dynamic_array +#define __get_rel_dynamic_array(field) \ + ((void *)__entry + \ + offsetof(typeof(*__entry), __rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ + (__entry->__rel_loc_##field & 0xffff)) + +#undef __get_rel_dynamic_array_len +#define __get_rel_dynamic_array_len(field) \ + ((__entry->__rel_loc_##field >> 16) & 0xffff) + +#undef __get_rel_str +#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) + +#undef __get_bitmask +#define __get_bitmask(field) \ + ({ \ + void *__bitmask = __get_dynamic_array(field); \ + unsigned int __bitmask_size; \ + __bitmask_size = __get_dynamic_array_len(field); \ + trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ + }) + +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) \ + ({ \ + void *__bitmask = __get_rel_dynamic_array(field); \ + unsigned int __bitmask_size; \ + __bitmask_size = __get_rel_dynamic_array_len(field); \ + trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ + }) + +#undef __get_sockaddr +#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) + +#undef __get_rel_sockaddr +#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) + +#undef __print_flags +#define __print_flags(flag, delim, flag_array...) \ + ({ \ + static const struct trace_print_flags __flags[] = \ + { flag_array, { -1, NULL }}; \ + trace_print_flags_seq(p, delim, flag, __flags); \ + }) + +#undef __print_symbolic +#define __print_symbolic(value, symbol_array...) \ + ({ \ + static const struct trace_print_flags symbols[] = \ + { symbol_array, { -1, NULL }}; \ + trace_print_symbols_seq(p, value, symbols); \ + }) + +#undef __print_flags_u64 +#undef __print_symbolic_u64 +#if BITS_PER_LONG == 32 +#define __print_flags_u64(flag, delim, flag_array...) \ + ({ \ + static const struct trace_print_flags_u64 __flags[] = \ + { flag_array, { -1, NULL } }; \ + trace_print_flags_seq_u64(p, delim, flag, __flags); \ + }) + +#define __print_symbolic_u64(value, symbol_array...) \ + ({ \ + static const struct trace_print_flags_u64 symbols[] = \ + { symbol_array, { -1, NULL } }; \ + trace_print_symbols_seq_u64(p, value, symbols); \ + }) +#else +#define __print_flags_u64(flag, delim, flag_array...) \ + __print_flags(flag, delim, flag_array) + +#define __print_symbolic_u64(value, symbol_array...) \ + __print_symbolic(value, symbol_array) +#endif + +#undef __print_hex +#define __print_hex(buf, buf_len) \ + trace_print_hex_seq(p, buf, buf_len, false) + +#undef __print_hex_str +#define __print_hex_str(buf, buf_len) \ + trace_print_hex_seq(p, buf, buf_len, true) + +#undef __print_array +#define __print_array(array, count, el_size) \ + ({ \ + BUILD_BUG_ON(el_size != 1 && el_size != 2 && \ + el_size != 4 && el_size != 8); \ + trace_print_array_seq(p, array, count, el_size); \ + }) + +#undef __print_hex_dump +#define __print_hex_dump(prefix_str, prefix_type, \ + rowsize, groupsize, buf, len, ascii) \ + trace_print_hex_dump_seq(p, prefix_str, prefix_type, \ + rowsize, groupsize, buf, len, ascii) + +#undef __print_ns_to_secs +#define __print_ns_to_secs(value) \ + ({ \ + u64 ____val = (u64)(value); \ + do_div(____val, NSEC_PER_SEC); \ + ____val; \ + }) + +#undef __print_ns_without_secs +#define __print_ns_without_secs(value) \ + ({ \ + u64 ____val = (u64)(value); \ + (u32) do_div(____val, NSEC_PER_SEC); \ + }) diff --git a/include/trace/stages/stage4_defines.h b/include/trace/stages/stage4_defines.h deleted file mode 100644 index e80cdc397a43..000000000000 --- a/include/trace/stages/stage4_defines.h +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* Stage 4 definitions for creating trace events */ - -#undef __field_ext -#define __field_ext(_type, _item, _filter_type) { \ - .type = #_type, .name = #_item, \ - .size = sizeof(_type), .align = __alignof__(_type), \ - .is_signed = is_signed_type(_type), .filter_type = _filter_type }, - -#undef __field_struct_ext -#define __field_struct_ext(_type, _item, _filter_type) { \ - .type = #_type, .name = #_item, \ - .size = sizeof(_type), .align = __alignof__(_type), \ - 0, .filter_type = _filter_type }, - -#undef __field -#define __field(type, item) __field_ext(type, item, FILTER_OTHER) - -#undef __field_struct -#define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER) - -#undef __array -#define __array(_type, _item, _len) { \ - .type = #_type"["__stringify(_len)"]", .name = #_item, \ - .size = sizeof(_type[_len]), .align = __alignof__(_type), \ - .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, - -#undef __dynamic_array -#define __dynamic_array(_type, _item, _len) { \ - .type = "__data_loc " #_type "[]", .name = #_item, \ - .size = 4, .align = 4, \ - .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, - -#undef __string -#define __string(item, src) __dynamic_array(char, item, -1) - -#undef __string_len -#define __string_len(item, src, len) __dynamic_array(char, item, -1) - -#undef __bitmask -#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) - -#undef __sockaddr -#define __sockaddr(field, len) __dynamic_array(u8, field, len) - -#undef __rel_dynamic_array -#define __rel_dynamic_array(_type, _item, _len) { \ - .type = "__rel_loc " #_type "[]", .name = #_item, \ - .size = 4, .align = 4, \ - .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) - -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) - -#undef __rel_sockaddr -#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h new file mode 100644 index 000000000000..e80cdc397a43 --- /dev/null +++ b/include/trace/stages/stage4_event_fields.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Stage 4 definitions for creating trace events */ + +#undef __field_ext +#define __field_ext(_type, _item, _filter_type) { \ + .type = #_type, .name = #_item, \ + .size = sizeof(_type), .align = __alignof__(_type), \ + .is_signed = is_signed_type(_type), .filter_type = _filter_type }, + +#undef __field_struct_ext +#define __field_struct_ext(_type, _item, _filter_type) { \ + .type = #_type, .name = #_item, \ + .size = sizeof(_type), .align = __alignof__(_type), \ + 0, .filter_type = _filter_type }, + +#undef __field +#define __field(type, item) __field_ext(type, item, FILTER_OTHER) + +#undef __field_struct +#define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER) + +#undef __array +#define __array(_type, _item, _len) { \ + .type = #_type"["__stringify(_len)"]", .name = #_item, \ + .size = sizeof(_type[_len]), .align = __alignof__(_type), \ + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, + +#undef __dynamic_array +#define __dynamic_array(_type, _item, _len) { \ + .type = "__data_loc " #_type "[]", .name = #_item, \ + .size = 4, .align = 4, \ + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, -1) + +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) + +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) + +#undef __rel_dynamic_array +#define __rel_dynamic_array(_type, _item, _len) { \ + .type = "__rel_loc " #_type "[]", .name = #_item, \ + .size = 4, .align = 4, \ + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) + +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) diff --git a/include/trace/stages/stage5_defines.h b/include/trace/stages/stage5_defines.h deleted file mode 100644 index 7ee5931300e6..000000000000 --- a/include/trace/stages/stage5_defines.h +++ /dev/null @@ -1,89 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* Stage 5 definitions for creating trace events */ - -/* - * remember the offset of each array from the beginning of the event. - */ - -#undef __entry -#define __entry entry - -#undef __field -#define __field(type, item) - -#undef __field_ext -#define __field_ext(type, item, filter_type) - -#undef __field_struct -#define __field_struct(type, item) - -#undef __field_struct_ext -#define __field_struct_ext(type, item, filter_type) - -#undef __array -#define __array(type, item, len) - -#undef __dynamic_array -#define __dynamic_array(type, item, len) \ - __item_length = (len) * sizeof(type); \ - __data_offsets->item = __data_size + \ - offsetof(typeof(*entry), __data); \ - __data_offsets->item |= __item_length << 16; \ - __data_size += __item_length; - -#undef __string -#define __string(item, src) __dynamic_array(char, item, \ - strlen((src) ? (const char *)(src) : "(null)") + 1) - -#undef __string_len -#define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) - -#undef __rel_dynamic_array -#define __rel_dynamic_array(type, item, len) \ - __item_length = (len) * sizeof(type); \ - __data_offsets->item = __data_size + \ - offsetof(typeof(*entry), __data) - \ - offsetof(typeof(*entry), __rel_loc_##item) - \ - sizeof(u32); \ - __data_offsets->item |= __item_length << 16; \ - __data_size += __item_length; - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, \ - strlen((src) ? (const char *)(src) : "(null)") + 1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1) -/* - * __bitmask_size_in_bytes_raw is the number of bytes needed to hold - * num_possible_cpus(). - */ -#define __bitmask_size_in_bytes_raw(nr_bits) \ - (((nr_bits) + 7) / 8) - -#define __bitmask_size_in_longs(nr_bits) \ - ((__bitmask_size_in_bytes_raw(nr_bits) + \ - ((BITS_PER_LONG / 8) - 1)) / (BITS_PER_LONG / 8)) - -/* - * __bitmask_size_in_bytes is the number of bytes needed to hold - * num_possible_cpus() padded out to the nearest long. This is what - * is saved in the buffer, just to be consistent. - */ -#define __bitmask_size_in_bytes(nr_bits) \ - (__bitmask_size_in_longs(nr_bits) * (BITS_PER_LONG / 8)) - -#undef __bitmask -#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \ - __bitmask_size_in_longs(nr_bits)) - -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \ - __bitmask_size_in_longs(nr_bits)) - -#undef __sockaddr -#define __sockaddr(field, len) __dynamic_array(u8, field, len) - -#undef __rel_sockaddr -#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h new file mode 100644 index 000000000000..7ee5931300e6 --- /dev/null +++ b/include/trace/stages/stage5_get_offsets.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Stage 5 definitions for creating trace events */ + +/* + * remember the offset of each array from the beginning of the event. + */ + +#undef __entry +#define __entry entry + +#undef __field +#define __field(type, item) + +#undef __field_ext +#define __field_ext(type, item, filter_type) + +#undef __field_struct +#define __field_struct(type, item) + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) + +#undef __array +#define __array(type, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + __item_length = (len) * sizeof(type); \ + __data_offsets->item = __data_size + \ + offsetof(typeof(*entry), __data); \ + __data_offsets->item |= __item_length << 16; \ + __data_size += __item_length; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, \ + strlen((src) ? (const char *)(src) : "(null)") + 1) + +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) + +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) \ + __item_length = (len) * sizeof(type); \ + __data_offsets->item = __data_size + \ + offsetof(typeof(*entry), __data) - \ + offsetof(typeof(*entry), __rel_loc_##item) - \ + sizeof(u32); \ + __data_offsets->item |= __item_length << 16; \ + __data_size += __item_length; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, \ + strlen((src) ? (const char *)(src) : "(null)") + 1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1) +/* + * __bitmask_size_in_bytes_raw is the number of bytes needed to hold + * num_possible_cpus(). + */ +#define __bitmask_size_in_bytes_raw(nr_bits) \ + (((nr_bits) + 7) / 8) + +#define __bitmask_size_in_longs(nr_bits) \ + ((__bitmask_size_in_bytes_raw(nr_bits) + \ + ((BITS_PER_LONG / 8) - 1)) / (BITS_PER_LONG / 8)) + +/* + * __bitmask_size_in_bytes is the number of bytes needed to hold + * num_possible_cpus() padded out to the nearest long. This is what + * is saved in the buffer, just to be consistent. + */ +#define __bitmask_size_in_bytes(nr_bits) \ + (__bitmask_size_in_longs(nr_bits) * (BITS_PER_LONG / 8)) + +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \ + __bitmask_size_in_longs(nr_bits)) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \ + __bitmask_size_in_longs(nr_bits)) + +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) + +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) diff --git a/include/trace/stages/stage6_defines.h b/include/trace/stages/stage6_defines.h deleted file mode 100644 index e1724f73594b..000000000000 --- a/include/trace/stages/stage6_defines.h +++ /dev/null @@ -1,106 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* Stage 6 definitions for creating trace events */ - -#undef __entry -#define __entry entry - -#undef __field -#define __field(type, item) - -#undef __field_struct -#define __field_struct(type, item) - -#undef __array -#define __array(type, item, len) - -#undef __dynamic_array -#define __dynamic_array(type, item, len) \ - __entry->__data_loc_##item = __data_offsets.item; - -#undef __string -#define __string(item, src) __dynamic_array(char, item, -1) - -#undef __string_len -#define __string_len(item, src, len) __dynamic_array(char, item, -1) - -#undef __assign_str -#define __assign_str(dst, src) \ - strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)"); - -#undef __assign_str_len -#define __assign_str_len(dst, src, len) \ - do { \ - memcpy(__get_str(dst), (src), (len)); \ - __get_str(dst)[len] = '\0'; \ - } while(0) - -#undef __bitmask -#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) - -#undef __get_bitmask -#define __get_bitmask(field) (char *)__get_dynamic_array(field) - -#undef __assign_bitmask -#define __assign_bitmask(dst, src, nr_bits) \ - memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) - -#undef __sockaddr -#define __sockaddr(field, len) __dynamic_array(u8, field, len) - -#undef __get_sockaddr -#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) - -#undef __assign_sockaddr -#define __assign_sockaddr(dest, src, len) \ - memcpy(__get_dynamic_array(dest), src, len) - -#undef __rel_dynamic_array -#define __rel_dynamic_array(type, item, len) \ - __entry->__rel_loc_##item = __data_offsets.item; - -#undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) - -#undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) - -#undef __assign_rel_str -#define __assign_rel_str(dst, src) \ - strcpy(__get_rel_str(dst), (src) ? (const char *)(src) : "(null)"); - -#undef __assign_rel_str_len -#define __assign_rel_str_len(dst, src, len) \ - do { \ - memcpy(__get_rel_str(dst), (src), (len)); \ - __get_rel_str(dst)[len] = '\0'; \ - } while (0) - -#undef __rel_bitmask -#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) - -#undef __get_rel_bitmask -#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) - -#undef __assign_rel_bitmask -#define __assign_rel_bitmask(dst, src, nr_bits) \ - memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) - -#undef __rel_sockaddr -#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) - -#undef __get_rel_sockaddr -#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) - -#undef __assign_rel_sockaddr -#define __assign_rel_sockaddr(dest, src, len) \ - memcpy(__get_rel_dynamic_array(dest), src, len) - -#undef TP_fast_assign -#define TP_fast_assign(args...) args - -#undef __perf_count -#define __perf_count(c) (c) - -#undef __perf_task -#define __perf_task(t) (t) diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h new file mode 100644 index 000000000000..e1724f73594b --- /dev/null +++ b/include/trace/stages/stage6_event_callback.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Stage 6 definitions for creating trace events */ + +#undef __entry +#define __entry entry + +#undef __field +#define __field(type, item) + +#undef __field_struct +#define __field_struct(type, item) + +#undef __array +#define __array(type, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + __entry->__data_loc_##item = __data_offsets.item; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, -1) + +#undef __assign_str +#define __assign_str(dst, src) \ + strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)"); + +#undef __assign_str_len +#define __assign_str_len(dst, src, len) \ + do { \ + memcpy(__get_str(dst), (src), (len)); \ + __get_str(dst)[len] = '\0'; \ + } while(0) + +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) + +#undef __get_bitmask +#define __get_bitmask(field) (char *)__get_dynamic_array(field) + +#undef __assign_bitmask +#define __assign_bitmask(dst, src, nr_bits) \ + memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) + +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) + +#undef __get_sockaddr +#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) + +#undef __assign_sockaddr +#define __assign_sockaddr(dest, src, len) \ + memcpy(__get_dynamic_array(dest), src, len) + +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) \ + __entry->__rel_loc_##item = __data_offsets.item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __assign_rel_str +#define __assign_rel_str(dst, src) \ + strcpy(__get_rel_str(dst), (src) ? (const char *)(src) : "(null)"); + +#undef __assign_rel_str_len +#define __assign_rel_str_len(dst, src, len) \ + do { \ + memcpy(__get_rel_str(dst), (src), (len)); \ + __get_rel_str(dst)[len] = '\0'; \ + } while (0) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) + +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) + +#undef __assign_rel_bitmask +#define __assign_rel_bitmask(dst, src, nr_bits) \ + memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) + +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) + +#undef __get_rel_sockaddr +#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) + +#undef __assign_rel_sockaddr +#define __assign_rel_sockaddr(dest, src, len) \ + memcpy(__get_rel_dynamic_array(dest), src, len) + +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef __perf_count +#define __perf_count(c) (c) + +#undef __perf_task +#define __perf_task(t) (t) diff --git a/include/trace/stages/stage7_class_define.h b/include/trace/stages/stage7_class_define.h new file mode 100644 index 000000000000..8a7ec24c246d --- /dev/null +++ b/include/trace/stages/stage7_class_define.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Stage 7 definitions for creating trace events */ + +#undef __entry +#define __entry REC + +#undef __print_flags +#undef __print_symbolic +#undef __print_hex +#undef __print_hex_str +#undef __get_dynamic_array +#undef __get_dynamic_array_len +#undef __get_str +#undef __get_bitmask +#undef __get_sockaddr +#undef __get_rel_dynamic_array +#undef __get_rel_dynamic_array_len +#undef __get_rel_str +#undef __get_rel_bitmask +#undef __get_rel_sockaddr +#undef __print_array +#undef __print_hex_dump + +/* + * The below is not executed in the kernel. It is only what is + * displayed in the print format for userspace to parse. + */ +#undef __print_ns_to_secs +#define __print_ns_to_secs(val) (val) / 1000000000UL + +#undef __print_ns_without_secs +#define __print_ns_without_secs(val) (val) % 1000000000UL + +#undef TP_printk +#define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args) diff --git a/include/trace/stages/stage7_defines.h b/include/trace/stages/stage7_defines.h deleted file mode 100644 index 8a7ec24c246d..000000000000 --- a/include/trace/stages/stage7_defines.h +++ /dev/null @@ -1,36 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* Stage 7 definitions for creating trace events */ - -#undef __entry -#define __entry REC - -#undef __print_flags -#undef __print_symbolic -#undef __print_hex -#undef __print_hex_str -#undef __get_dynamic_array -#undef __get_dynamic_array_len -#undef __get_str -#undef __get_bitmask -#undef __get_sockaddr -#undef __get_rel_dynamic_array -#undef __get_rel_dynamic_array_len -#undef __get_rel_str -#undef __get_rel_bitmask -#undef __get_rel_sockaddr -#undef __print_array -#undef __print_hex_dump - -/* - * The below is not executed in the kernel. It is only what is - * displayed in the print format for userspace to parse. - */ -#undef __print_ns_to_secs -#define __print_ns_to_secs(val) (val) / 1000000000UL - -#undef __print_ns_without_secs -#define __print_ns_without_secs(val) (val) % 1000000000UL - -#undef TP_printk -#define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args) diff --git a/include/trace/trace_custom_events.h b/include/trace/trace_custom_events.h index b567c7202339..6e492dba96bf 100644 --- a/include/trace/trace_custom_events.h +++ b/include/trace/trace_custom_events.h @@ -35,7 +35,7 @@ /* Stage 1 creates the structure of the recorded event layout */ -#include "stages/stage1_defines.h" +#include "stages/stage1_struct_define.h" #undef DECLARE_CUSTOM_EVENT_CLASS #define DECLARE_CUSTOM_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ @@ -56,7 +56,7 @@ /* Stage 2 creates the custom class */ -#include "stages/stage2_defines.h" +#include "stages/stage2_data_offsets.h" #undef DECLARE_CUSTOM_EVENT_CLASS #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -71,7 +71,7 @@ /* Stage 3 create the way to print the custom event */ -#include "stages/stage3_defines.h" +#include "stages/stage3_trace_output.h" #undef DECLARE_CUSTOM_EVENT_CLASS #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -102,7 +102,7 @@ static struct trace_event_functions trace_custom_event_type_funcs_##call = { \ /* Stage 4 creates the offset layout for the fields */ -#include "stages/stage4_defines.h" +#include "stages/stage4_event_fields.h" #undef DECLARE_CUSTOM_EVENT_CLASS #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, func, print) \ @@ -114,7 +114,7 @@ static struct trace_event_fields trace_custom_event_fields_##call[] = { \ /* Stage 5 creates the helper function for dynamic fields */ -#include "stages/stage5_defines.h" +#include "stages/stage5_get_offsets.h" #undef DECLARE_CUSTOM_EVENT_CLASS #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -134,7 +134,7 @@ static inline notrace int trace_custom_event_get_offsets_##call( \ /* Stage 6 creates the probe function that records the event */ -#include "stages/stage6_defines.h" +#include "stages/stage6_event_callback.h" #undef DECLARE_CUSTOM_EVENT_CLASS #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -182,7 +182,7 @@ static inline void ftrace_test_custom_probe_##call(void) \ /* Stage 7 creates the actual class and event structure for the custom event */ -#include "stages/stage7_defines.h" +#include "stages/stage7_class_define.h" #undef DECLARE_CUSTOM_EVENT_CLASS #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 8a8cd66cc6d5..c2f9cabf154d 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -45,7 +45,7 @@ PARAMS(print)); \ DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); -#include "stages/stage1_defines.h" +#include "stages/stage1_struct_define.h" #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ @@ -109,7 +109,7 @@ * The size of an array is also encoded, in the higher 16 bits of . */ -#include "stages/stage2_defines.h" +#include "stages/stage2_data_offsets.h" #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -181,7 +181,7 @@ * in binary. */ -#include "stages/stage3_defines.h" +#include "stages/stage3_trace_output.h" #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -236,7 +236,7 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#include "stages/stage4_defines.h" +#include "stages/stage4_event_fields.h" #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ @@ -249,7 +249,7 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#include "stages/stage5_defines.h" +#include "stages/stage5_get_offsets.h" #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -372,7 +372,7 @@ static inline notrace int trace_event_get_offsets_##call( \ #define _TRACE_PERF_INIT(call) #endif /* CONFIG_PERF_EVENTS */ -#include "stages/stage6_defines.h" +#include "stages/stage6_event_callback.h" #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -418,7 +418,7 @@ static inline void ftrace_test_probe_##call(void) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#include "stages/stage7_defines.h" +#include "stages/stage7_class_define.h" #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ -- cgit v1.2.3 From 768c1e7f1de03afd0b55e0e951efc272309eeb52 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Tue, 29 Mar 2022 10:30:51 -0700 Subject: tracing/user_events: Remove eBPF interfaces Remove eBPF interfaces within user_events to ensure they are fully reviewed. Link: https://lore.kernel.org/all/20220329165718.GA10381@kbox/ Link: https://lkml.kernel.org/r/20220329173051.10087-1-beaub@linux.microsoft.com Suggested-by: Alexei Starovoitov Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- include/uapi/linux/user_events.h | 53 ---------------------------------------- 1 file changed, 53 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h index e570840571e1..736e05603463 100644 --- a/include/uapi/linux/user_events.h +++ b/include/uapi/linux/user_events.h @@ -32,9 +32,6 @@ /* Create dynamic location entry within a 32-bit value */ #define DYN_LOC(offset, size) ((size) << 16 | (offset)) -/* Use raw iterator for attached BPF program(s), no affect on ftrace/perf */ -#define FLAG_BPF_ITER (1 << 0) - /* * Describes an event registration and stores the results of the registration. * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum @@ -63,54 +60,4 @@ struct user_reg { /* Requests to delete a user_event */ #define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*) -/* Data type that was passed to the BPF program */ -enum { - /* Data resides in kernel space */ - USER_BPF_DATA_KERNEL, - - /* Data resides in user space */ - USER_BPF_DATA_USER, - - /* Data is a pointer to a user_bpf_iter structure */ - USER_BPF_DATA_ITER, -}; - -/* - * Describes an iovec iterator that BPF programs can use to access data for - * a given user_event write() / writev() call. - */ -struct user_bpf_iter { - - /* Offset of the data within the first iovec */ - __u32 iov_offset; - - /* Number of iovec structures */ - __u32 nr_segs; - - /* Pointer to iovec structures */ - const struct iovec *iov; -}; - -/* Context that BPF programs receive when attached to a user_event */ -struct user_bpf_context { - - /* Data type being passed (see union below) */ - __u32 data_type; - - /* Length of the data */ - __u32 data_len; - - /* Pointer to data, varies by data type */ - union { - /* Kernel data (data_type == USER_BPF_DATA_KERNEL) */ - void *kdata; - - /* User data (data_type == USER_BPF_DATA_USER) */ - void *udata; - - /* Direct iovec (data_type == USER_BPF_DATA_ITER) */ - struct user_bpf_iter *iter; - }; -}; - #endif /* _UAPI_LINUX_USER_EVENTS_H */ -- cgit v1.2.3 From 18bfee3216fa6f28d55ebf88d824a539d2bec3c7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 30 Mar 2022 09:00:19 +0200 Subject: ftrace: Make ftrace_graph_is_dead() a static branch ftrace_graph_is_dead() is used on hot paths, it just reads a variable in memory and is not worth suffering function call constraints. For instance, at entry of prepare_ftrace_return(), inlining it avoids saving prepare_ftrace_return() parameters to stack and restoring them after calling ftrace_graph_is_dead(). While at it using a static branch is even more performant and is rather well adapted considering that the returned value will almost never change. Inline ftrace_graph_is_dead() and replace 'kill_ftrace_graph' bool by a static branch. The performance improvement is noticeable. Link: https://lkml.kernel.org/r/e0411a6a0ed3eafff0ad2bc9cd4b0e202b4617df.1648623570.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 37b619185ec9..f15a4b76cbfc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -1015,7 +1016,20 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, extern int register_ftrace_graph(struct fgraph_ops *ops); extern void unregister_ftrace_graph(struct fgraph_ops *ops); -extern bool ftrace_graph_is_dead(void); +/** + * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called + * + * ftrace_graph_stop() is called when a severe error is detected in + * the function graph tracing. This function is called by the critical + * paths of function graph to keep those paths from doing any more harm. + */ +DECLARE_STATIC_KEY_FALSE(kill_ftrace_graph); + +static inline bool ftrace_graph_is_dead(void) +{ + return static_branch_unlikely(&kill_ftrace_graph); +} + extern void ftrace_graph_stop(void); /* The current handlers in use */ -- cgit v1.2.3 From 5cfff569cab8bf544bab62c911c5d6efd5af5e05 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 1 Apr 2022 14:39:03 -0400 Subject: tracing: Move user_events.h temporarily out of include/uapi While user_events API is under development and has been marked for broken to not let the API become fixed, move the header file out of the uapi directory. This is to prevent it from being installed, then later changed, and then have an old distro user space update with a new kernel, where applications see the user_events being available, but the old header is in place, and then they get compiled incorrectly. Also, surround the include with CONFIG_COMPILE_TEST to the current location, but when the BROKEN tag is taken off, it will use the uapi directory, and fail to compile. This is a good way to remind us to move the header back. Link: https://lore.kernel.org/all/20220330155835.5e1f6669@gandalf.local.home Link: https://lkml.kernel.org/r/20220330201755.29319-1-mathieu.desnoyers@efficios.com Link: https://lkml.kernel.org/r/20220401143903.188384f3@gandalf.local.home Suggested-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/linux/user_events.h | 63 ++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/user_events.h | 63 ---------------------------------------- 2 files changed, 63 insertions(+), 63 deletions(-) create mode 100644 include/linux/user_events.h delete mode 100644 include/uapi/linux/user_events.h (limited to 'include') diff --git a/include/linux/user_events.h b/include/linux/user_events.h new file mode 100644 index 000000000000..736e05603463 --- /dev/null +++ b/include/linux/user_events.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2021, Microsoft Corporation. + * + * Authors: + * Beau Belgrave + */ +#ifndef _UAPI_LINUX_USER_EVENTS_H +#define _UAPI_LINUX_USER_EVENTS_H + +#include +#include + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#define USER_EVENTS_SYSTEM "user_events" +#define USER_EVENTS_PREFIX "u:" + +/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */ +#define EVENT_BIT_FTRACE 0 +#define EVENT_BIT_PERF 1 +#define EVENT_BIT_OTHER 7 + +#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE) +#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF) +#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER) + +/* Create dynamic location entry within a 32-bit value */ +#define DYN_LOC(offset, size) ((size) << 16 | (offset)) + +/* + * Describes an event registration and stores the results of the registration. + * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum + * must set the size and name_args before invocation. + */ +struct user_reg { + + /* Input: Size of the user_reg structure being used */ + __u32 size; + + /* Input: Pointer to string with event name, description and flags */ + __u64 name_args; + + /* Output: Byte index of the event within the status page */ + __u32 status_index; + + /* Output: Index of the event to use when writing data */ + __u32 write_index; +}; + +#define DIAG_IOC_MAGIC '*' + +/* Requests to register a user_event */ +#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg*) + +/* Requests to delete a user_event */ +#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*) + +#endif /* _UAPI_LINUX_USER_EVENTS_H */ diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h deleted file mode 100644 index 736e05603463..000000000000 --- a/include/uapi/linux/user_events.h +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (c) 2021, Microsoft Corporation. - * - * Authors: - * Beau Belgrave - */ -#ifndef _UAPI_LINUX_USER_EVENTS_H -#define _UAPI_LINUX_USER_EVENTS_H - -#include -#include - -#ifdef __KERNEL__ -#include -#else -#include -#endif - -#define USER_EVENTS_SYSTEM "user_events" -#define USER_EVENTS_PREFIX "u:" - -/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */ -#define EVENT_BIT_FTRACE 0 -#define EVENT_BIT_PERF 1 -#define EVENT_BIT_OTHER 7 - -#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE) -#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF) -#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER) - -/* Create dynamic location entry within a 32-bit value */ -#define DYN_LOC(offset, size) ((size) << 16 | (offset)) - -/* - * Describes an event registration and stores the results of the registration. - * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum - * must set the size and name_args before invocation. - */ -struct user_reg { - - /* Input: Size of the user_reg structure being used */ - __u32 size; - - /* Input: Pointer to string with event name, description and flags */ - __u64 name_args; - - /* Output: Byte index of the event within the status page */ - __u32 status_index; - - /* Output: Index of the event to use when writing data */ - __u32 write_index; -}; - -#define DIAG_IOC_MAGIC '*' - -/* Requests to register a user_event */ -#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg*) - -/* Requests to delete a user_event */ -#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*) - -#endif /* _UAPI_LINUX_USER_EVENTS_H */ -- cgit v1.2.3 From 1cd927ad6f62f27d8908498dcbf61395c5dd5fe2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 1 Apr 2022 14:39:03 -0400 Subject: tracing: mark user_events as BROKEN After being merged, user_events become more visible to a wider audience that have concerns with the current API. It is too late to fix this for this release, but instead of a full revert, just mark it as BROKEN (which prevents it from being selected in make config). Then we can work finding a better API. If that fails, then it will need to be completely reverted. To not have the code silently bitrot, still allow building it with COMPILE_TEST. And to prevent the uapi header from being installed, then later changed, and then have an old distro user space see the old version, move the header file out of the uapi directory. Surround the include with CONFIG_COMPILE_TEST to the current location, but when the BROKEN tag is taken off, it will use the uapi directory, and fail to compile. This is a good way to remind us to move the header back. Link: https://lore.kernel.org/all/20220330155835.5e1f6669@gandalf.local.home Link: https://lkml.kernel.org/r/20220330201755.29319-1-mathieu.desnoyers@efficios.com Suggested-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) Signed-off-by: Linus Torvalds --- include/linux/user_events.h | 116 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/user_events.h | 116 --------------------------------------- 2 files changed, 116 insertions(+), 116 deletions(-) create mode 100644 include/linux/user_events.h delete mode 100644 include/uapi/linux/user_events.h (limited to 'include') diff --git a/include/linux/user_events.h b/include/linux/user_events.h new file mode 100644 index 000000000000..e570840571e1 --- /dev/null +++ b/include/linux/user_events.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2021, Microsoft Corporation. + * + * Authors: + * Beau Belgrave + */ +#ifndef _UAPI_LINUX_USER_EVENTS_H +#define _UAPI_LINUX_USER_EVENTS_H + +#include +#include + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#define USER_EVENTS_SYSTEM "user_events" +#define USER_EVENTS_PREFIX "u:" + +/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */ +#define EVENT_BIT_FTRACE 0 +#define EVENT_BIT_PERF 1 +#define EVENT_BIT_OTHER 7 + +#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE) +#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF) +#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER) + +/* Create dynamic location entry within a 32-bit value */ +#define DYN_LOC(offset, size) ((size) << 16 | (offset)) + +/* Use raw iterator for attached BPF program(s), no affect on ftrace/perf */ +#define FLAG_BPF_ITER (1 << 0) + +/* + * Describes an event registration and stores the results of the registration. + * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum + * must set the size and name_args before invocation. + */ +struct user_reg { + + /* Input: Size of the user_reg structure being used */ + __u32 size; + + /* Input: Pointer to string with event name, description and flags */ + __u64 name_args; + + /* Output: Byte index of the event within the status page */ + __u32 status_index; + + /* Output: Index of the event to use when writing data */ + __u32 write_index; +}; + +#define DIAG_IOC_MAGIC '*' + +/* Requests to register a user_event */ +#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg*) + +/* Requests to delete a user_event */ +#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*) + +/* Data type that was passed to the BPF program */ +enum { + /* Data resides in kernel space */ + USER_BPF_DATA_KERNEL, + + /* Data resides in user space */ + USER_BPF_DATA_USER, + + /* Data is a pointer to a user_bpf_iter structure */ + USER_BPF_DATA_ITER, +}; + +/* + * Describes an iovec iterator that BPF programs can use to access data for + * a given user_event write() / writev() call. + */ +struct user_bpf_iter { + + /* Offset of the data within the first iovec */ + __u32 iov_offset; + + /* Number of iovec structures */ + __u32 nr_segs; + + /* Pointer to iovec structures */ + const struct iovec *iov; +}; + +/* Context that BPF programs receive when attached to a user_event */ +struct user_bpf_context { + + /* Data type being passed (see union below) */ + __u32 data_type; + + /* Length of the data */ + __u32 data_len; + + /* Pointer to data, varies by data type */ + union { + /* Kernel data (data_type == USER_BPF_DATA_KERNEL) */ + void *kdata; + + /* User data (data_type == USER_BPF_DATA_USER) */ + void *udata; + + /* Direct iovec (data_type == USER_BPF_DATA_ITER) */ + struct user_bpf_iter *iter; + }; +}; + +#endif /* _UAPI_LINUX_USER_EVENTS_H */ diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h deleted file mode 100644 index e570840571e1..000000000000 --- a/include/uapi/linux/user_events.h +++ /dev/null @@ -1,116 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (c) 2021, Microsoft Corporation. - * - * Authors: - * Beau Belgrave - */ -#ifndef _UAPI_LINUX_USER_EVENTS_H -#define _UAPI_LINUX_USER_EVENTS_H - -#include -#include - -#ifdef __KERNEL__ -#include -#else -#include -#endif - -#define USER_EVENTS_SYSTEM "user_events" -#define USER_EVENTS_PREFIX "u:" - -/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */ -#define EVENT_BIT_FTRACE 0 -#define EVENT_BIT_PERF 1 -#define EVENT_BIT_OTHER 7 - -#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE) -#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF) -#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER) - -/* Create dynamic location entry within a 32-bit value */ -#define DYN_LOC(offset, size) ((size) << 16 | (offset)) - -/* Use raw iterator for attached BPF program(s), no affect on ftrace/perf */ -#define FLAG_BPF_ITER (1 << 0) - -/* - * Describes an event registration and stores the results of the registration. - * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum - * must set the size and name_args before invocation. - */ -struct user_reg { - - /* Input: Size of the user_reg structure being used */ - __u32 size; - - /* Input: Pointer to string with event name, description and flags */ - __u64 name_args; - - /* Output: Byte index of the event within the status page */ - __u32 status_index; - - /* Output: Index of the event to use when writing data */ - __u32 write_index; -}; - -#define DIAG_IOC_MAGIC '*' - -/* Requests to register a user_event */ -#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg*) - -/* Requests to delete a user_event */ -#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*) - -/* Data type that was passed to the BPF program */ -enum { - /* Data resides in kernel space */ - USER_BPF_DATA_KERNEL, - - /* Data resides in user space */ - USER_BPF_DATA_USER, - - /* Data is a pointer to a user_bpf_iter structure */ - USER_BPF_DATA_ITER, -}; - -/* - * Describes an iovec iterator that BPF programs can use to access data for - * a given user_event write() / writev() call. - */ -struct user_bpf_iter { - - /* Offset of the data within the first iovec */ - __u32 iov_offset; - - /* Number of iovec structures */ - __u32 nr_segs; - - /* Pointer to iovec structures */ - const struct iovec *iov; -}; - -/* Context that BPF programs receive when attached to a user_event */ -struct user_bpf_context { - - /* Data type being passed (see union below) */ - __u32 data_type; - - /* Length of the data */ - __u32 data_len; - - /* Pointer to data, varies by data type */ - union { - /* Kernel data (data_type == USER_BPF_DATA_KERNEL) */ - void *kdata; - - /* User data (data_type == USER_BPF_DATA_USER) */ - void *udata; - - /* Direct iovec (data_type == USER_BPF_DATA_ITER) */ - struct user_bpf_iter *iter; - }; -}; - -#endif /* _UAPI_LINUX_USER_EVENTS_H */ -- cgit v1.2.3