From be68d63a139bd4a0eae44d06234eaff8c07d207c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 12 Jun 2024 19:19:36 -0400 Subject: ring-buffer: Add ring_buffer_alloc_range() In preparation to allowing the trace ring buffer to be allocated in a range of memory that is persistent across reboots, add ring_buffer_alloc_range(). It takes a contiguous range of memory and will split it up evenly for the per CPU ring buffers. If there's not enough memory to handle all CPUs with the minimum size, it will fail to allocate the ring buffer. Link: https://lkml.kernel.org/r/20240612232025.363998725@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Vincent Donnefort Cc: Joel Fernandes Cc: Daniel Bristot de Oliveira Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vineeth Pillai Cc: Youssef Esmat Cc: Beau Belgrave Cc: Alexander Graf Cc: Baoquan He Cc: Borislav Petkov Cc: "Paul E. McKenney" Cc: David Howells Cc: Mike Rapoport Cc: Dave Hansen Cc: Tony Luck Cc: Guenter Roeck Cc: Ross Zwisler Cc: Kees Cook Signed-off-by: Steven Rostedt (Google) --- include/linux/ring_buffer.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 96d2140b471e..a50b0223b1d3 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -89,6 +89,11 @@ void ring_buffer_discard_commit(struct trace_buffer *buffer, struct trace_buffer * __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key); +struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags, + int order, unsigned long start, + unsigned long range_size, + struct lock_class_key *key); + /* * Because the ring buffer is generic, if other users of the ring buffer get * traced by ftrace, it can produce lockdep warnings. We need to keep each @@ -100,6 +105,18 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) +/* + * Because the ring buffer is generic, if other users of the ring buffer get + * traced by ftrace, it can produce lockdep warnings. We need to keep each + * ring buffer's lock class separate. + */ +#define ring_buffer_alloc_range(size, flags, order, start, range_size) \ +({ \ + static struct lock_class_key __key; \ + __ring_buffer_alloc_range((size), (flags), (order), (start), \ + (range_size), &__key); \ +}) + typedef bool (*ring_buffer_cond_fn)(void *data); int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full, ring_buffer_cond_fn cond, void *data); -- cgit v1.2.3 From 7a1d1e4b9639ff08b2f42605c2950ae1ba4a43bf Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 12 Jun 2024 19:19:44 -0400 Subject: tracing/ring-buffer: Add last_boot_info file to boot instance If an instance is mapped to memory on boot up, create a new file called "last_boot_info" that will hold information that can be used to properly parse the raw data in the ring buffer. It will export the delta of the addresses for text and data from what it was from the last boot. It does not expose actually addresses (unless you knew what the actual address was from the last boot). The output will look like: # cat last_boot_info text delta: -268435456 data delta: -268435456 The text and data are kept separate in case they are ever made different. Link: https://lkml.kernel.org/r/20240612232026.658680738@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Vincent Donnefort Cc: Joel Fernandes Cc: Daniel Bristot de Oliveira Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vineeth Pillai Cc: Youssef Esmat Cc: Beau Belgrave Cc: Alexander Graf Cc: Baoquan He Cc: Borislav Petkov Cc: "Paul E. McKenney" Cc: David Howells Cc: Mike Rapoport Cc: Dave Hansen Cc: Tony Luck Cc: Guenter Roeck Cc: Ross Zwisler Cc: Kees Cook Signed-off-by: Steven Rostedt (Google) --- include/linux/ring_buffer.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index a50b0223b1d3..55de3798a9b9 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -94,6 +94,9 @@ struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flag unsigned long range_size, struct lock_class_key *key); +bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text, + long *data); + /* * Because the ring buffer is generic, if other users of the ring buffer get * traced by ftrace, it can produce lockdep warnings. We need to keep each -- cgit v1.2.3 From 304b3f2bc07ba7c74a1ebc7917a8f0549e6b8d54 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jun 2024 10:09:16 -1000 Subject: sched: Allow sched_cgroup_fork() to fail and introduce sched_cancel_fork() A new BPF extensible sched_class will need more control over the forking process. It wants to be able to fail from sched_cgroup_fork() after the new task's sched_task_group is initialized so that the loaded BPF program can prepare the task with its cgroup association is established and reject fork if e.g. allocation fails. Allow sched_cgroup_fork() to fail by making it return int instead of void and adding sched_cancel_fork() to undo sched_fork() in the error path. sched_cgroup_fork() doesn't fail yet and this patch shouldn't cause any behavior changes. v2: Patch description updated to detail the expected use. Signed-off-by: Tejun Heo Reviewed-by: David Vernet Acked-by: Josh Don Acked-by: Hao Luo Acked-by: Barret Rhoden --- include/linux/sched/task.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index d362aacf9f89..4df2f9055587 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -63,7 +63,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs); +extern int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs); +extern void sched_cancel_fork(struct task_struct *p); extern void sched_post_fork(struct task_struct *p); extern void sched_dead(struct task_struct *p); -- cgit v1.2.3 From 4f9c7ca851044273df5d67e00ca0b4d0476a48f6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jun 2024 10:09:16 -1000 Subject: sched: Factor out cgroup weight conversion functions Factor out sched_weight_from/to_cgroup() which convert between scheduler shares and cgroup weight. No functional change. The factored out functions will be used by a new BPF extensible sched_class so that the weights can be exposed to the BPF programs in a way which is consistent cgroup weights and easier to interpret. The weight conversions will be used regardless of cgroup usage. It's just borrowing the cgroup weight range as it's more intuitive. CGROUP_WEIGHT_MIN/DFL/MAX constants are moved outside CONFIG_CGROUPS so that the conversion helpers can always be defined. v2: The helpers are now defined regardless of COFNIG_CGROUPS. Signed-off-by: Tejun Heo Reviewed-by: David Vernet Acked-by: Josh Don Acked-by: Hao Luo Acked-by: Barret Rhoden --- include/linux/cgroup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 2150ca60394b..3cdaec701600 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -29,8 +29,6 @@ struct kernel_clone_args; -#ifdef CONFIG_CGROUPS - /* * All weight knobs on the default hierarchy should use the following min, * default and max values. The default value is the logarithmic center of @@ -40,6 +38,8 @@ struct kernel_clone_args; #define CGROUP_WEIGHT_DFL 100 #define CGROUP_WEIGHT_MAX 10000 +#ifdef CONFIG_CGROUPS + enum { CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */ CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */ -- cgit v1.2.3 From a7a9fc549293168c1edbc8433d5d4fbbe1171630 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jun 2024 10:09:17 -1000 Subject: sched_ext: Add boilerplate for extensible scheduler class This adds dummy implementations of sched_ext interfaces which interact with the scheduler core and hook them in the correct places. As they're all dummies, this doesn't cause any behavior changes. This is split out to help reviewing. v2: balance_scx_on_up() dropped. This will be handled in sched_ext proper. Signed-off-by: Tejun Heo Reviewed-by: David Vernet Acked-by: Josh Don Acked-by: Hao Luo Acked-by: Barret Rhoden --- include/linux/sched/ext.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 include/linux/sched/ext.h (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h new file mode 100644 index 000000000000..a05dfcf533b0 --- /dev/null +++ b/include/linux/sched/ext.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SCHED_EXT_H +#define _LINUX_SCHED_EXT_H + +#ifdef CONFIG_SCHED_CLASS_EXT +#error "NOT IMPLEMENTED YET" +#else /* !CONFIG_SCHED_CLASS_EXT */ + +static inline void sched_ext_free(struct task_struct *p) {} + +#endif /* CONFIG_SCHED_CLASS_EXT */ +#endif /* _LINUX_SCHED_EXT_H */ -- cgit v1.2.3 From f0e1a0643a59bf1f922fa209cec86a170b784f3f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jun 2024 10:09:17 -1000 Subject: sched_ext: Implement BPF extensible scheduler class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement a new scheduler class sched_ext (SCX), which allows scheduling policies to be implemented as BPF programs to achieve the following: 1. Ease of experimentation and exploration: Enabling rapid iteration of new scheduling policies. 2. Customization: Building application-specific schedulers which implement policies that are not applicable to general-purpose schedulers. 3. Rapid scheduler deployments: Non-disruptive swap outs of scheduling policies in production environments. sched_ext leverages BPF’s struct_ops feature to define a structure which exports function callbacks and flags to BPF programs that wish to implement scheduling policies. The struct_ops structure exported by sched_ext is struct sched_ext_ops, and is conceptually similar to struct sched_class. The role of sched_ext is to map the complex sched_class callbacks to the more simple and ergonomic struct sched_ext_ops callbacks. For more detailed discussion on the motivations and overview, please refer to the cover letter. Later patches will also add several example schedulers and documentation. This patch implements the minimum core framework to enable implementation of BPF schedulers. Subsequent patches will gradually add functionalities including safety guarantee mechanisms, nohz and cgroup support. include/linux/sched/ext.h defines struct sched_ext_ops. With the comment on top, each operation should be self-explanatory. The followings are worth noting: - Both "sched_ext" and its shorthand "scx" are used. If the identifier already has "sched" in it, "ext" is used; otherwise, "scx". - In sched_ext_ops, only .name is mandatory. Every operation is optional and if omitted a simple but functional default behavior is provided. - A new policy constant SCHED_EXT is added and a task can select sched_ext by invoking sched_setscheduler(2) with the new policy constant. However, if the BPF scheduler is not loaded, SCHED_EXT is the same as SCHED_NORMAL and the task is scheduled by CFS. When the BPF scheduler is loaded, all tasks which have the SCHED_EXT policy are switched to sched_ext. - To bridge the workflow imbalance between the scheduler core and sched_ext_ops callbacks, sched_ext uses simple FIFOs called dispatch queues (dsq's). By default, there is one global dsq (SCX_DSQ_GLOBAL), and one local per-CPU dsq (SCX_DSQ_LOCAL). SCX_DSQ_GLOBAL is provided for convenience and need not be used by a scheduler that doesn't require it. SCX_DSQ_LOCAL is the per-CPU FIFO that sched_ext pulls from when putting the next task on the CPU. The BPF scheduler can manage an arbitrary number of dsq's using scx_bpf_create_dsq() and scx_bpf_destroy_dsq(). - sched_ext guarantees system integrity no matter what the BPF scheduler does. To enable this, each task's ownership is tracked through p->scx.ops_state and all tasks are put on scx_tasks list. The disable path can always recover and revert all tasks back to CFS. See p->scx.ops_state and scx_tasks. - A task is not tied to its rq while enqueued. This decouples CPU selection from queueing and allows sharing a scheduling queue across an arbitrary subset of CPUs. This adds some complexities as a task may need to be bounced between rq's right before it starts executing. See dispatch_to_local_dsq() and move_task_to_local_dsq(). - One complication that arises from the above weak association between task and rq is that synchronizing with dequeue() gets complicated as dequeue() may happen anytime while the task is enqueued and the dispatch path might need to release the rq lock to transfer the task. Solving this requires a bit of complexity. See the logic around p->scx.sticky_cpu and p->scx.ops_qseq. - Both enable and disable paths are a bit complicated. The enable path switches all tasks without blocking to avoid issues which can arise from partially switched states (e.g. the switching task itself being starved). The disable path can't trust the BPF scheduler at all, so it also has to guarantee forward progress without blocking. See scx_ops_enable() and scx_ops_disable_workfn(). - When sched_ext is disabled, static_branches are used to shut down the entry points from hot paths. v7: - scx_ops_bypass() was incorrectly and unnecessarily trying to grab scx_ops_enable_mutex which can lead to deadlocks in the disable path. Fixed. - Fixed TASK_DEAD handling bug in scx_ops_enable() path which could lead to use-after-free. - Consolidated per-cpu variable usages and other cleanups. v6: - SCX_NR_ONLINE_OPS replaced with SCX_OPI_*_BEGIN/END so that multiple groups can be expressed. Later CPU hotplug operations are put into their own group. - SCX_OPS_DISABLING state is replaced with the new bypass mechanism which allows temporarily putting the system into simple FIFO scheduling mode bypassing the BPF scheduler. In addition to the shut down path, this will also be used to isolate the BPF scheduler across PM events. Enabling and disabling the bypass mode requires iterating all runnable tasks. rq->scx.runnable_list addition is moved from the later watchdog patch. - ops.prep_enable() is replaced with ops.init_task() and ops.enable/disable() are now called whenever the task enters and leaves sched_ext instead of when the task becomes schedulable on sched_ext and stops being so. A new operation - ops.exit_task() - is called when the task stops being schedulable on sched_ext. - scx_bpf_dispatch() can now be called from ops.select_cpu() too. This removes the need for communicating local dispatch decision made by ops.select_cpu() to ops.enqueue() via per-task storage. SCX_KF_SELECT_CPU is added to support the change. - SCX_TASK_ENQ_LOCAL which told the BPF scheudler that scx_select_cpu_dfl() wants the task to be dispatched to the local DSQ was removed. Instead, scx_bpf_select_cpu_dfl() now dispatches directly if it finds a suitable idle CPU. If such behavior is not desired, users can use scx_bpf_select_cpu_dfl() which returns the verdict in a bool out param. - scx_select_cpu_dfl() was mishandling WAKE_SYNC and could end up queueing many tasks on a local DSQ which makes tasks to execute in order while other CPUs stay idle which made some hackbench numbers really bad. Fixed. - The current state of sched_ext can now be monitored through files under /sys/sched_ext instead of /sys/kernel/debug/sched/ext. This is to enable monitoring on kernels which don't enable debugfs. - sched_ext wasn't telling BPF that ops.dispatch()'s @prev argument may be NULL and a BPF scheduler which derefs the pointer without checking could crash the kernel. Tell BPF. This is currently a bit ugly. A better way to annotate this is expected in the future. - scx_exit_info updated to carry pointers to message buffers instead of embedding them directly. This decouples buffer sizes from API so that they can be changed without breaking compatibility. - exit_code added to scx_exit_info. This is used to indicate different exit conditions on non-error exits and will be used to handle e.g. CPU hotplugs. - The patch "sched_ext: Allow BPF schedulers to switch all eligible tasks into sched_ext" is folded in and the interface is changed so that partial switching is indicated with a new ops flag %SCX_OPS_SWITCH_PARTIAL. This makes scx_bpf_switch_all() unnecessasry and in turn SCX_KF_INIT. ops.init() is now called with SCX_KF_SLEEPABLE. - Code reorganized so that only the parts necessary to integrate with the rest of the kernel are in the header files. - Changes to reflect the BPF and other kernel changes including the addition of bpf_sched_ext_ops.cfi_stubs. v5: - To accommodate 32bit configs, p->scx.ops_state is now atomic_long_t instead of atomic64_t and scx_dsp_buf_ent.qseq which uses load_acquire/store_release is now unsigned long instead of u64. - Fix the bug where bpf_scx_btf_struct_access() was allowing write access to arbitrary fields. - Distinguish kfuncs which can be called from any sched_ext ops and from anywhere. e.g. scx_bpf_pick_idle_cpu() can now be called only from sched_ext ops. - Rename "type" to "kind" in scx_exit_info to make it easier to use on languages in which "type" is a reserved keyword. - Since cff9b2332ab7 ("kernel/sched: Modify initial boot task idle setup"), PF_IDLE is not set on idle tasks which haven't been online yet which made scx_task_iter_next_filtered() include those idle tasks in iterations leading to oopses. Update scx_task_iter_next_filtered() to directly test p->sched_class against idle_sched_class instead of using is_idle_task() which tests PF_IDLE. - Other updates to match upstream changes such as adding const to set_cpumask() param and renaming check_preempt_curr() to wakeup_preempt(). v4: - SCHED_CHANGE_BLOCK replaced with the previous sched_deq_and_put_task()/sched_enq_and_set_tsak() pair. This is because upstream is adaopting a different generic cleanup mechanism. Once that lands, the code will be adapted accordingly. - task_on_scx() used to test whether a task should be switched into SCX, which is confusing. Renamed to task_should_scx(). task_on_scx() now tests whether a task is currently on SCX. - scx_has_idle_cpus is barely used anymore and replaced with direct check on the idle cpumask. - SCX_PICK_IDLE_CORE added and scx_pick_idle_cpu() improved to prefer fully idle cores. - ops.enable() now sees up-to-date p->scx.weight value. - ttwu_queue path is disabled for tasks on SCX to avoid confusing BPF schedulers expecting ->select_cpu() call. - Use cpu_smt_mask() instead of topology_sibling_cpumask() like the rest of the scheduler. v3: - ops.set_weight() added to allow BPF schedulers to track weight changes without polling p->scx.weight. - move_task_to_local_dsq() was losing SCX-specific enq_flags when enqueueing the task on the target dsq because it goes through activate_task() which loses the upper 32bit of the flags. Carry the flags through rq->scx.extra_enq_flags. - scx_bpf_dispatch(), scx_bpf_pick_idle_cpu(), scx_bpf_task_running() and scx_bpf_task_cpu() now use the new KF_RCU instead of KF_TRUSTED_ARGS to make it easier for BPF schedulers to call them. - The kfunc helper access control mechanism implemented through sched_ext_entity.kf_mask is improved. Now SCX_CALL_OP*() is always used when invoking scx_ops operations. v2: - balance_scx_on_up() is dropped. Instead, on UP, balance_scx() is called from put_prev_taks_scx() and pick_next_task_scx() as necessary. To determine whether balance_scx() should be called from put_prev_task_scx(), SCX_TASK_DEQD_FOR_SLEEP flag is added. See the comment in put_prev_task_scx() for details. - sched_deq_and_put_task() / sched_enq_and_set_task() sequences replaced with SCHED_CHANGE_BLOCK(). - Unused all_dsqs list removed. This was a left-over from previous iterations. - p->scx.kf_mask is added to track and enforce which kfunc helpers are allowed. Also, init/exit sequences are updated to make some kfuncs always safe to call regardless of the current BPF scheduler state. Combined, this should make all the kfuncs safe. - BPF now supports sleepable struct_ops operations. Hacky workaround removed and operations and kfunc helpers are tagged appropriately. - BPF now supports bitmask / cpumask helpers. scx_bpf_get_idle_cpumask() and friends are added so that BPF schedulers can use the idle masks with the generic helpers. This replaces the hacky kfunc helpers added by a separate patch in V1. - CONFIG_SCHED_CLASS_EXT can no longer be enabled if SCHED_CORE is enabled. This restriction will be removed by a later patch which adds core-sched support. - Add MAINTAINERS entries and other misc changes. Signed-off-by: Tejun Heo Co-authored-by: David Vernet Acked-by: Josh Don Acked-by: Hao Luo Acked-by: Barret Rhoden Cc: Andrea Righi --- include/asm-generic/vmlinux.lds.h | 1 + include/linux/sched.h | 5 ++ include/linux/sched/ext.h | 141 +++++++++++++++++++++++++++++++++++++- include/uapi/linux/sched.h | 1 + 4 files changed, 147 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5703526d6ebf..2e712183ba09 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -133,6 +133,7 @@ *(__dl_sched_class) \ *(__rt_sched_class) \ *(__fair_sched_class) \ + *(__ext_sched_class) \ *(__idle_sched_class) \ __sched_class_lowest = .; diff --git a/include/linux/sched.h b/include/linux/sched.h index 90691d99027e..06beb8a6e0ca 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -80,6 +80,8 @@ struct task_group; struct task_struct; struct user_event_mm; +#include + /* * Task state bitmask. NOTE! These bits are also * encoded in fs/proc/array.c: get_task_state(). @@ -802,6 +804,9 @@ struct task_struct { struct sched_rt_entity rt; struct sched_dl_entity dl; struct sched_dl_entity *dl_server; +#ifdef CONFIG_SCHED_CLASS_EXT + struct sched_ext_entity scx; +#endif const struct sched_class *sched_class; #ifdef CONFIG_SCHED_CORE diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index a05dfcf533b0..c1530a7992cc 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -1,9 +1,148 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Meta Platforms, Inc. and affiliates. + * Copyright (c) 2022 Tejun Heo + * Copyright (c) 2022 David Vernet + */ #ifndef _LINUX_SCHED_EXT_H #define _LINUX_SCHED_EXT_H #ifdef CONFIG_SCHED_CLASS_EXT -#error "NOT IMPLEMENTED YET" + +#include +#include + +enum scx_public_consts { + SCX_OPS_NAME_LEN = 128, + + SCX_SLICE_DFL = 20 * 1000000, /* 20ms */ +}; + +/* + * DSQ (dispatch queue) IDs are 64bit of the format: + * + * Bits: [63] [62 .. 0] + * [ B] [ ID ] + * + * B: 1 for IDs for built-in DSQs, 0 for ops-created user DSQs + * ID: 63 bit ID + * + * Built-in IDs: + * + * Bits: [63] [62] [61..32] [31 .. 0] + * [ 1] [ L] [ R ] [ V ] + * + * 1: 1 for built-in DSQs. + * L: 1 for LOCAL_ON DSQ IDs, 0 for others + * V: For LOCAL_ON DSQ IDs, a CPU number. For others, a pre-defined value. + */ +enum scx_dsq_id_flags { + SCX_DSQ_FLAG_BUILTIN = 1LLU << 63, + SCX_DSQ_FLAG_LOCAL_ON = 1LLU << 62, + + SCX_DSQ_INVALID = SCX_DSQ_FLAG_BUILTIN | 0, + SCX_DSQ_GLOBAL = SCX_DSQ_FLAG_BUILTIN | 1, + SCX_DSQ_LOCAL = SCX_DSQ_FLAG_BUILTIN | 2, + SCX_DSQ_LOCAL_ON = SCX_DSQ_FLAG_BUILTIN | SCX_DSQ_FLAG_LOCAL_ON, + SCX_DSQ_LOCAL_CPU_MASK = 0xffffffffLLU, +}; + +/* + * Dispatch queue (dsq) is a simple FIFO which is used to buffer between the + * scheduler core and the BPF scheduler. See the documentation for more details. + */ +struct scx_dispatch_q { + raw_spinlock_t lock; + struct list_head list; /* tasks in dispatch order */ + u32 nr; + u64 id; + struct rhash_head hash_node; + struct llist_node free_node; + struct rcu_head rcu; +}; + +/* scx_entity.flags */ +enum scx_ent_flags { + SCX_TASK_QUEUED = 1 << 0, /* on ext runqueue */ + SCX_TASK_BAL_KEEP = 1 << 1, /* balance decided to keep current */ + SCX_TASK_RESET_RUNNABLE_AT = 1 << 2, /* runnable_at should be reset */ + SCX_TASK_DEQD_FOR_SLEEP = 1 << 3, /* last dequeue was for SLEEP */ + + SCX_TASK_STATE_SHIFT = 8, /* bit 8 and 9 are used to carry scx_task_state */ + SCX_TASK_STATE_BITS = 2, + SCX_TASK_STATE_MASK = ((1 << SCX_TASK_STATE_BITS) - 1) << SCX_TASK_STATE_SHIFT, + + SCX_TASK_CURSOR = 1 << 31, /* iteration cursor, not a task */ +}; + +/* scx_entity.flags & SCX_TASK_STATE_MASK */ +enum scx_task_state { + SCX_TASK_NONE, /* ops.init_task() not called yet */ + SCX_TASK_INIT, /* ops.init_task() succeeded, but task can be cancelled */ + SCX_TASK_READY, /* fully initialized, but not in sched_ext */ + SCX_TASK_ENABLED, /* fully initialized and in sched_ext */ + + SCX_TASK_NR_STATES, +}; + +/* + * Mask bits for scx_entity.kf_mask. Not all kfuncs can be called from + * everywhere and the following bits track which kfunc sets are currently + * allowed for %current. This simple per-task tracking works because SCX ops + * nest in a limited way. BPF will likely implement a way to allow and disallow + * kfuncs depending on the calling context which will replace this manual + * mechanism. See scx_kf_allow(). + */ +enum scx_kf_mask { + SCX_KF_UNLOCKED = 0, /* not sleepable, not rq locked */ + /* all non-sleepables may be nested inside SLEEPABLE */ + SCX_KF_SLEEPABLE = 1 << 0, /* sleepable init operations */ + /* ops.dequeue (in REST) may be nested inside DISPATCH */ + SCX_KF_DISPATCH = 1 << 2, /* ops.dispatch() */ + SCX_KF_ENQUEUE = 1 << 3, /* ops.enqueue() and ops.select_cpu() */ + SCX_KF_SELECT_CPU = 1 << 4, /* ops.select_cpu() */ + SCX_KF_REST = 1 << 5, /* other rq-locked operations */ + + __SCX_KF_RQ_LOCKED = SCX_KF_DISPATCH | + SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, +}; + +/* + * The following is embedded in task_struct and contains all fields necessary + * for a task to be scheduled by SCX. + */ +struct sched_ext_entity { + struct scx_dispatch_q *dsq; + struct list_head dsq_node; + u32 flags; /* protected by rq lock */ + u32 weight; + s32 sticky_cpu; + s32 holding_cpu; + u32 kf_mask; /* see scx_kf_mask above */ + atomic_long_t ops_state; + + struct list_head runnable_node; /* rq->scx.runnable_list */ + + u64 ddsp_dsq_id; + u64 ddsp_enq_flags; + + /* BPF scheduler modifiable fields */ + + /* + * Runtime budget in nsecs. This is usually set through + * scx_bpf_dispatch() but can also be modified directly by the BPF + * scheduler. Automatically decreased by SCX as the task executes. On + * depletion, a scheduling event is triggered. + */ + u64 slice; + + /* cold fields */ + /* must be the last field, see init_scx_entity() */ + struct list_head tasks_node; +}; + +void sched_ext_free(struct task_struct *p); + #else /* !CONFIG_SCHED_CLASS_EXT */ static inline void sched_ext_free(struct task_struct *p) {} diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index 3bac0a8ceab2..359a14cc76a4 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -118,6 +118,7 @@ struct clone_args { /* SCHED_ISO: reserved but not implemented yet */ #define SCHED_IDLE 5 #define SCHED_DEADLINE 6 +#define SCHED_EXT 7 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ #define SCHED_RESET_ON_FORK 0x40000000 -- cgit v1.2.3 From 8a010b81b3a50b033fc3cddc613517abda586cbe Mon Sep 17 00:00:00 2001 From: David Vernet Date: Tue, 18 Jun 2024 10:09:18 -1000 Subject: sched_ext: Implement runnable task stall watchdog The most common and critical way that a BPF scheduler can misbehave is by failing to run runnable tasks for too long. This patch implements a watchdog. * All tasks record when they become runnable. * A watchdog work periodically scans all runnable tasks. If any task has stayed runnable for too long, the BPF scheduler is aborted. * scheduler_tick() monitors whether the watchdog itself is stuck. If so, the BPF scheduler is aborted. Because the watchdog only scans the tasks which are currently runnable and usually very infrequently, the overhead should be negligible. scx_qmap is updated so that it can be told to stall user and/or kernel tasks. A detected task stall looks like the following: sched_ext: BPF scheduler "qmap" errored, disabling sched_ext: runnable task stall (dbus-daemon[953] failed to run for 6.478s) scx_check_timeout_workfn+0x10e/0x1b0 process_one_work+0x287/0x560 worker_thread+0x234/0x420 kthread+0xe9/0x100 ret_from_fork+0x1f/0x30 A detected watchdog stall: sched_ext: BPF scheduler "qmap" errored, disabling sched_ext: runnable task stall (watchdog failed to check in for 5.001s) scheduler_tick+0x2eb/0x340 update_process_times+0x7a/0x90 tick_sched_timer+0xd8/0x130 __hrtimer_run_queues+0x178/0x3b0 hrtimer_interrupt+0xfc/0x390 __sysvec_apic_timer_interrupt+0xb7/0x2b0 sysvec_apic_timer_interrupt+0x90/0xb0 asm_sysvec_apic_timer_interrupt+0x1b/0x20 default_idle+0x14/0x20 arch_cpu_idle+0xf/0x20 default_idle_call+0x50/0x90 do_idle+0xe8/0x240 cpu_startup_entry+0x1d/0x20 kernel_init+0x0/0x190 start_kernel+0x0/0x392 start_kernel+0x324/0x392 x86_64_start_reservations+0x2a/0x2c x86_64_start_kernel+0x104/0x109 secondary_startup_64_no_verify+0xce/0xdb Note that this patch exposes scx_ops_error[_type]() in kernel/sched/ext.h to inline scx_notify_sched_tick(). v4: - While disabling, cancel_delayed_work_sync(&scx_watchdog_work) was being called before forward progress was guaranteed and thus could lead to system lockup. Relocated. - While enabling, it was comparing msecs against jiffies without conversion leading to spurious load failures on lower HZ kernels. Fixed. - runnable list management is now used by core bypass logic and moved to the patch implementing sched_ext core. v3: - bpf_scx_init_member() was incorrectly comparing ops->timeout_ms against SCX_WATCHDOG_MAX_TIMEOUT which is in jiffies without conversion leading to spurious load failures in lower HZ kernels. Fixed. v2: - Julia Lawall noticed that the watchdog code was mixing msecs and jiffies. Fix by using jiffies for everything. Signed-off-by: David Vernet Reviewed-by: Tejun Heo Signed-off-by: Tejun Heo Acked-by: Josh Don Acked-by: Hao Luo Acked-by: Barret Rhoden Cc: Julia Lawall --- include/linux/sched/ext.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index c1530a7992cc..96031252436f 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -122,6 +122,7 @@ struct sched_ext_entity { atomic_long_t ops_state; struct list_head runnable_node; /* rq->scx.runnable_list */ + unsigned long runnable_at; u64 ddsp_dsq_id; u64 ddsp_enq_flags; -- cgit v1.2.3 From 7bb6f0810ecfb73a9d7a2ca56fb001e0201a6758 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jun 2024 10:09:18 -1000 Subject: sched_ext: Allow BPF schedulers to disallow specific tasks from joining SCHED_EXT BPF schedulers might not want to schedule certain tasks - e.g. kernel threads. This patch adds p->scx.disallow which can be set by BPF schedulers in such cases. The field can be changed anytime and setting it in ops.prep_enable() guarantees that the task can never be scheduled by sched_ext. scx_qmap is updated with the -d option to disallow a specific PID: # echo $$ 1092 # grep -E '(policy)|(ext\.enabled)' /proc/self/sched policy : 0 ext.enabled : 0 # ./set-scx 1092 # grep -E '(policy)|(ext\.enabled)' /proc/self/sched policy : 7 ext.enabled : 0 Run "scx_qmap -p -d 1092" in another terminal. # cat /sys/kernel/sched_ext/nr_rejected 1 # grep -E '(policy)|(ext\.enabled)' /proc/self/sched policy : 0 ext.enabled : 0 # ./set-scx 1092 setparam failed for 1092 (Permission denied) - v4: Refreshed on top of tip:sched/core. - v3: Update description to reflect /sys/kernel/sched_ext interface change. - v2: Use atomic_long_t instead of atomic64_t for scx_kick_cpus_pnt_seqs to accommodate 32bit archs. Signed-off-by: Tejun Heo Suggested-by: Barret Rhoden Reviewed-by: David Vernet Acked-by: Josh Don Acked-by: Hao Luo Acked-by: Barret Rhoden --- include/linux/sched/ext.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 96031252436f..ea7c501ac819 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -137,6 +137,18 @@ struct sched_ext_entity { */ u64 slice; + /* + * If set, reject future sched_setscheduler(2) calls updating the policy + * to %SCHED_EXT with -%EACCES. + * + * If set from ops.init_task() and the task's policy is already + * %SCHED_EXT, which can happen while the BPF scheduler is being loaded + * or by inhering the parent's policy during fork, the task's policy is + * rejected and forcefully reverted to %SCHED_NORMAL. The number of + * such events are reported through /sys/kernel/debug/sched_ext::nr_rejected. + */ + bool disallow; /* reject switching into SCX */ + /* cold fields */ /* must be the last field, see init_scx_entity() */ struct list_head tasks_node; -- cgit v1.2.3 From 1538e33995eaf3f315cbb5506019b9f913ed8555 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Tue, 18 Jun 2024 10:09:18 -1000 Subject: sched_ext: Print sched_ext info when dumping stack It would be useful to see what the sched_ext scheduler state is, and what scheduler is running, when we're dumping a task's stack. This patch therefore adds a new print_scx_info() function that's called in the same context as print_worker_info() and print_stop_info(). An example dump follows. BUG: kernel NULL pointer dereference, address: 0000000000000999 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] PREEMPT SMP CPU: 13 PID: 2047 Comm: insmod Tainted: G O 6.6.0-work-10323-gb58d4cae8e99-dirty #34 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS unknown 2/2/2022 Sched_ext: qmap (enabled+all), task: runnable_at=-17ms RIP: 0010:init_module+0x9/0x1000 [test_module] ... v3: - scx_ops_enable_state_str[] definition moved to an earlier patch as it's now used by core implementation. - Convert jiffy delta to msecs using jiffies_to_msecs() instead of multiplying by (HZ / MSEC_PER_SEC). The conversion is implemented in jiffies_delta_msecs(). v2: - We are now using scx_ops_enable_state_str[] outside CONFIG_SCHED_DEBUG. Move it outside of CONFIG_SCHED_DEBUG and to the top. This was reported by Changwoo and Andrea. Signed-off-by: David Vernet Reported-by: Changwoo Min Reported-by: Andrea Righi Signed-off-by: Tejun Heo --- include/linux/sched/ext.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index ea7c501ac819..85fb5dc725ef 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -155,10 +155,12 @@ struct sched_ext_entity { }; void sched_ext_free(struct task_struct *p); +void print_scx_info(const char *log_lvl, struct task_struct *p); #else /* !CONFIG_SCHED_CLASS_EXT */ static inline void sched_ext_free(struct task_struct *p) {} +static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {} #endif /* CONFIG_SCHED_CLASS_EXT */ #endif /* _LINUX_SCHED_EXT_H */ -- cgit v1.2.3 From 07814a9439a3b03d79a1001614b5bc1cab69bcec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jun 2024 10:09:18 -1000 Subject: sched_ext: Print debug dump after an error exit If a BPF scheduler triggers an error, the scheduler is aborted and the system is reverted to the built-in scheduler. In the process, a lot of information which may be useful for figuring out what happened can be lost. This patch adds debug dump which captures information which may be useful for debugging including runqueue and runnable thread states at the time of failure. The following shows a debug dump after triggering the watchdog: root@test ~# os/work/tools/sched_ext/build/bin/scx_qmap -t 100 stats : enq=1 dsp=0 delta=1 deq=0 stats : enq=90 dsp=90 delta=0 deq=0 stats : enq=156 dsp=156 delta=0 deq=0 stats : enq=218 dsp=218 delta=0 deq=0 stats : enq=255 dsp=255 delta=0 deq=0 stats : enq=271 dsp=271 delta=0 deq=0 stats : enq=284 dsp=284 delta=0 deq=0 stats : enq=293 dsp=293 delta=0 deq=0 DEBUG DUMP ================================================================================ kworker/u32:12[320] triggered exit kind 1026: runnable task stall (stress[1530] failed to run for 6.841s) Backtrace: scx_watchdog_workfn+0x136/0x1c0 process_scheduled_works+0x2b5/0x600 worker_thread+0x269/0x360 kthread+0xeb/0x110 ret_from_fork+0x36/0x40 ret_from_fork_asm+0x1a/0x30 QMAP FIFO[0]: QMAP FIFO[1]: QMAP FIFO[2]: 1436 QMAP FIFO[3]: QMAP FIFO[4]: CPU states ---------- CPU 0 : nr_run=1 ops_qseq=244 curr=swapper/0[0] class=idle_sched_class QMAP: dsp_idx=1 dsp_cnt=0 R stress[1530] -6841ms scx_state/flags=3/0x1 ops_state/qseq=2/20 sticky/holding_cpu=-1/-1 dsq_id=(n/a) cpus=ff QMAP: force_local=0 asm_sysvec_apic_timer_interrupt+0x16/0x20 CPU 2 : nr_run=2 ops_qseq=142 curr=swapper/2[0] class=idle_sched_class QMAP: dsp_idx=1 dsp_cnt=0 R sshd[1703] -5905ms scx_state/flags=3/0x9 ops_state/qseq=2/88 sticky/holding_cpu=-1/-1 dsq_id=(n/a) cpus=ff QMAP: force_local=1 __x64_sys_ppoll+0xf6/0x120 do_syscall_64+0x7b/0x150 entry_SYSCALL_64_after_hwframe+0x76/0x7e R fish[1539] -4141ms scx_state/flags=3/0x9 ops_state/qseq=2/124 sticky/holding_cpu=-1/-1 dsq_id=(n/a) cpus=ff QMAP: force_local=1 futex_wait+0x60/0xe0 do_futex+0x109/0x180 __x64_sys_futex+0x117/0x190 do_syscall_64+0x7b/0x150 entry_SYSCALL_64_after_hwframe+0x76/0x7e CPU 3 : nr_run=2 ops_qseq=162 curr=kworker/u32:12[320] class=ext_sched_class QMAP: dsp_idx=1 dsp_cnt=0 *R kworker/u32:12[320] +0ms scx_state/flags=3/0xd ops_state/qseq=0/0 sticky/holding_cpu=-1/-1 dsq_id=(n/a) cpus=ff QMAP: force_local=0 scx_dump_state+0x613/0x6f0 scx_ops_error_irq_workfn+0x1f/0x40 irq_work_run_list+0x82/0xd0 irq_work_run+0x14/0x30 __sysvec_irq_work+0x40/0x140 sysvec_irq_work+0x60/0x70 asm_sysvec_irq_work+0x16/0x20 scx_watchdog_workfn+0x15f/0x1c0 process_scheduled_works+0x2b5/0x600 worker_thread+0x269/0x360 kthread+0xeb/0x110 ret_from_fork+0x36/0x40 ret_from_fork_asm+0x1a/0x30 R kworker/3:2[1436] +0ms scx_state/flags=3/0x9 ops_state/qseq=2/160 sticky/holding_cpu=-1/-1 dsq_id=(n/a) cpus=08 QMAP: force_local=0 kthread+0xeb/0x110 ret_from_fork+0x36/0x40 ret_from_fork_asm+0x1a/0x30 CPU 7 : nr_run=0 ops_qseq=76 curr=swapper/7[0] class=idle_sched_class ================================================================================ EXIT: runnable task stall (stress[1530] failed to run for 6.841s) It shows that CPU 3 was running the watchdog when it triggered the error condition and the scx_qmap thread has been queued on CPU 0 for over 5 seconds but failed to run. It also prints out scx_qmap specific information - e.g. which tasks are queued on each FIFO and so on using the dump_*() ops. This dump has proved pretty useful for developing and debugging BPF schedulers. Debug dump is generated automatically when the BPF scheduler exits due to an error. The debug buffer used in such cases is determined by sched_ext_ops.exit_dump_len and defaults to 32k. If the debug dump overruns the available buffer, the output is truncated and marked accordingly. Debug dump output can also be read through the sched_ext_dump tracepoint. When read through the tracepoint, there is no length limit. SysRq-D can be used to trigger debug dump at any time while a BPF scheduler is loaded. This is non-destructive - the scheduler keeps running afterwards. The output can be read through the sched_ext_dump tracepoint. v2: - The size of exit debug dump buffer can now be customized using sched_ext_ops.exit_dump_len. - sched_ext_ops.dump*() added to enable dumping of BPF scheduler specific information. - Tracpoint output and SysRq-D triggering added. Signed-off-by: Tejun Heo Reviewed-by: David Vernet --- include/trace/events/sched_ext.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/trace/events/sched_ext.h (limited to 'include') diff --git a/include/trace/events/sched_ext.h b/include/trace/events/sched_ext.h new file mode 100644 index 000000000000..fe19da7315a9 --- /dev/null +++ b/include/trace/events/sched_ext.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sched_ext + +#if !defined(_TRACE_SCHED_EXT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SCHED_EXT_H + +#include + +TRACE_EVENT(sched_ext_dump, + + TP_PROTO(const char *line), + + TP_ARGS(line), + + TP_STRUCT__entry( + __string(line, line) + ), + + TP_fast_assign( + __assign_str(line); + ), + + TP_printk("%s", + __get_str(line) + ) +); + +#endif /* _TRACE_SCHED_EXT_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 81aae789181b5850d77dfdf74d4b85c63f0705e9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jun 2024 10:09:19 -1000 Subject: sched_ext: Implement scx_bpf_kick_cpu() and task preemption support It's often useful to wake up and/or trigger reschedule on other CPUs. This patch adds scx_bpf_kick_cpu() kfunc helper that BPF scheduler can call to kick the target CPU into the scheduling path. As a sched_ext task relinquishes its CPU only after its slice is depleted, this patch also adds SCX_KICK_PREEMPT and SCX_ENQ_PREEMPT which clears the slice of the target CPU's current task to guarantee that sched_ext's scheduling path runs on the CPU. If SCX_KICK_IDLE is specified, the target CPU is kicked iff the CPU is idle to guarantee that the target CPU will go through at least one full sched_ext scheduling cycle after the kicking. This can be used to wake up idle CPUs without incurring unnecessary overhead if it isn't currently idle. As a demonstration of how backward compatibility can be supported using BPF CO-RE, tools/sched_ext/include/scx/compat.bpf.h is added. It provides __COMPAT_scx_bpf_kick_cpu_IDLE() which uses SCX_KICK_IDLE if available or becomes a regular kicking otherwise. This allows schedulers to use the new SCX_KICK_IDLE while maintaining support for older kernels. The plan is to temporarily use compat helpers to ease API updates and drop them after a few kernel releases. v5: - SCX_KICK_IDLE added. Note that this also adds a compat mechanism for schedulers so that they can support kernels without SCX_KICK_IDLE. This is useful as a demonstration of how new feature flags can be added in a backward compatible way. - kick_cpus_irq_workfn() reimplemented so that it touches the pending cpumasks only as necessary to reduce kicking overhead on machines with a lot of CPUs. - tools/sched_ext/include/scx/compat.bpf.h added. v4: - Move example scheduler to its own patch. v3: - Make scx_example_central switch all tasks by default. - Convert to BPF inline iterators. v2: - Julia Lawall reported that scx_example_central can overflow the dispatch buffer and malfunction. As scheduling for other CPUs can't be handled by the automatic retry mechanism, fix by implementing an explicit overflow and retry handling. - Updated to use generic BPF cpumask helpers. Signed-off-by: Tejun Heo Reviewed-by: David Vernet Acked-by: Josh Don Acked-by: Hao Luo Acked-by: Barret Rhoden --- include/linux/sched/ext.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 85fb5dc725ef..3b2809b980ac 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -134,6 +134,10 @@ struct sched_ext_entity { * scx_bpf_dispatch() but can also be modified directly by the BPF * scheduler. Automatically decreased by SCX as the task executes. On * depletion, a scheduling event is triggered. + * + * This value is cleared to zero if the task is preempted by + * %SCX_KICK_PREEMPT and shouldn't be used to determine how long the + * task ran. Use p->se.sum_exec_runtime instead. */ u64 slice; -- cgit v1.2.3 From 22a920209ab6aa4f8ec960ed81041643fddeaec6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jun 2024 10:09:19 -1000 Subject: sched_ext: Implement tickless support Allow BPF schedulers to indicate tickless operation by setting p->scx.slice to SCX_SLICE_INF. A CPU whose current task has infinte slice goes into tickless operation. scx_central is updated to use tickless operations for all tasks and instead use a BPF timer to expire slices. This also uses the SCX_ENQ_PREEMPT and task state tracking added by the previous patches. Currently, there is no way to pin the timer on the central CPU, so it may end up on one of the worker CPUs; however, outside of that, the worker CPUs can go tickless both while running sched_ext tasks and idling. With schbench running, scx_central shows: root@test ~# grep ^LOC /proc/interrupts; sleep 10; grep ^LOC /proc/interrupts LOC: 142024 656 664 449 Local timer interrupts LOC: 161663 663 665 449 Local timer interrupts Without it: root@test ~ [SIGINT]# grep ^LOC /proc/interrupts; sleep 10; grep ^LOC /proc/interrupts LOC: 188778 3142 3793 3993 Local timer interrupts LOC: 198993 5314 6323 6438 Local timer interrupts While scx_central itself is too barebone to be useful as a production scheduler, a more featureful central scheduler can be built using the same approach. Google's experience shows that such an approach can have significant benefits for certain applications such as VM hosting. v4: Allow operation even if BPF_F_TIMER_CPU_PIN is not available. v3: Pin the central scheduler's timer on the central_cpu using BPF_F_TIMER_CPU_PIN. v2: Convert to BPF inline iterators. Signed-off-by: Tejun Heo Reviewed-by: David Vernet Acked-by: Josh Don Acked-by: Hao Luo Acked-by: Barret Rhoden --- include/linux/sched/ext.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 3b2809b980ac..6f1a4977e9f8 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -16,6 +16,7 @@ enum scx_public_consts { SCX_OPS_NAME_LEN = 128, SCX_SLICE_DFL = 20 * 1000000, /* 20ms */ + SCX_SLICE_INF = U64_MAX, /* infinite, implies nohz */ }; /* -- cgit v1.2.3 From 36454023f50b2aadd25b7f47c50b5edc0c59e1c0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jun 2024 10:09:19 -1000 Subject: sched_ext: Track tasks that are subjects of the in-flight SCX operation When some SCX operations are in flight, it is known that the subject task's rq lock is held throughout which makes it safe to access certain fields of the task - e.g. its current task_group. We want to add SCX kfunc helpers that can make use of this guarantee - e.g. to help determining the currently associated CPU cgroup from the task's current task_group. As it'd be dangerous call such a helper on a task which isn't rq lock protected, the helper should be able to verify the input task and reject accordingly. This patch adds sched_ext_entity.kf_tasks[] that track the tasks which are currently being operated on by a terminal SCX operation. The new SCX_CALL_OP_[2]TASK[_RET]() can be used when invoking SCX operations which take tasks as arguments and the scx_kf_allowed_on_arg_tasks() can be used by kfunc helpers to verify the input task status. Note that as sched_ext_entity.kf_tasks[] can't handle nesting, the tracking is currently only limited to terminal SCX operations. If needed in the future, this restriction can be removed by moving the tracking to the task side with a couple per-task counters. v2: Updated to reflect the addition of SCX_KF_SELECT_CPU. Signed-off-by: Tejun Heo Reviewed-by: David Vernet --- include/linux/sched/ext.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 6f1a4977e9f8..74341dbc6a19 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -106,6 +106,7 @@ enum scx_kf_mask { __SCX_KF_RQ_LOCKED = SCX_KF_DISPATCH | SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, + __SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, }; /* @@ -120,6 +121,7 @@ struct sched_ext_entity { s32 sticky_cpu; s32 holding_cpu; u32 kf_mask; /* see scx_kf_mask above */ + struct task_struct *kf_tasks[2]; /* see SCX_CALL_OP_TASK() */ atomic_long_t ops_state; struct list_head runnable_node; /* rq->scx.runnable_list */ -- cgit v1.2.3 From 245254f7081dbe1c8da54675d0e4ddbe74cee61b Mon Sep 17 00:00:00 2001 From: David Vernet Date: Tue, 18 Jun 2024 10:09:20 -1000 Subject: sched_ext: Implement sched_ext_ops.cpu_acquire/release() Scheduler classes are strictly ordered and when a higher priority class has tasks to run, the lower priority ones lose access to the CPU. Being able to monitor and act on these events are necessary for use cases includling strict core-scheduling and latency management. This patch adds two operations ops.cpu_acquire() and .cpu_release(). The former is invoked when a CPU becomes available to the BPF scheduler and the opposite for the latter. This patch also implements scx_bpf_reenqueue_local() which can be called from .cpu_release() to trigger requeueing of all tasks in the local dsq of the CPU so that the tasks can be reassigned to other available CPUs. scx_pair is updated to use .cpu_acquire/release() along with %SCX_KICK_WAIT to make the pair scheduling guarantee strict even when a CPU is preempted by a higher priority scheduler class. scx_qmap is updated to use .cpu_acquire/release() to empty the local dsq of a preempted CPU. A similar approach can be adopted by BPF schedulers that want to have a tight control over latency. v4: Use the new SCX_KICK_IDLE to wake up a CPU after re-enqueueing. v3: Drop the const qualifier from scx_cpu_release_args.task. BPF enforces access control through the verifier, so the qualifier isn't actually operative and only gets in the way when interacting with various helpers. v2: Add p->scx.kf_mask annotation to allow calling scx_bpf_reenqueue_local() from ops.cpu_release() nested inside ops.init() and other sleepable operations. Signed-off-by: David Vernet Reviewed-by: Tejun Heo Signed-off-by: Tejun Heo Acked-by: Josh Don Acked-by: Hao Luo Acked-by: Barret Rhoden --- include/linux/sched/ext.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 74341dbc6a19..21c627337e01 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -98,13 +98,15 @@ enum scx_kf_mask { SCX_KF_UNLOCKED = 0, /* not sleepable, not rq locked */ /* all non-sleepables may be nested inside SLEEPABLE */ SCX_KF_SLEEPABLE = 1 << 0, /* sleepable init operations */ + /* ENQUEUE and DISPATCH may be nested inside CPU_RELEASE */ + SCX_KF_CPU_RELEASE = 1 << 1, /* ops.cpu_release() */ /* ops.dequeue (in REST) may be nested inside DISPATCH */ SCX_KF_DISPATCH = 1 << 2, /* ops.dispatch() */ SCX_KF_ENQUEUE = 1 << 3, /* ops.enqueue() and ops.select_cpu() */ SCX_KF_SELECT_CPU = 1 << 4, /* ops.select_cpu() */ SCX_KF_REST = 1 << 5, /* other rq-locked operations */ - __SCX_KF_RQ_LOCKED = SCX_KF_DISPATCH | + __SCX_KF_RQ_LOCKED = SCX_KF_CPU_RELEASE | SCX_KF_DISPATCH | SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, __SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, }; -- cgit v1.2.3 From 7b0888b7cc1924b74ce660e02f6211df8dd46e7b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jun 2024 10:09:20 -1000 Subject: sched_ext: Implement core-sched support The core-sched support is composed of the following parts: - task_struct->scx.core_sched_at is added. This is a timestamp which can be used to order tasks. Depending on whether the BPF scheduler implements custom ordering, it tracks either global FIFO ordering of all tasks or local-DSQ ordering within the dispatched tasks on a CPU. - prio_less() is updated to call scx_prio_less() when comparing SCX tasks. scx_prio_less() calls ops.core_sched_before() if available or uses the core_sched_at timestamp. For global FIFO ordering, the BPF scheduler doesn't need to do anything. Otherwise, it should implement ops.core_sched_before() which reflects the ordering. - When core-sched is enabled, balance_scx() balances all SMT siblings so that they all have tasks dispatched if necessary before pick_task_scx() is called. pick_task_scx() picks between the current task and the first dispatched task on the local DSQ based on availability and the core_sched_at timestamps. Note that FIFO ordering is expected among the already dispatched tasks whether running or on the local DSQ, so this path always compares core_sched_at instead of calling into ops.core_sched_before(). qmap_core_sched_before() is added to scx_qmap. It scales the distances from the heads of the queues to compare the tasks across different priority queues and seems to behave as expected. v3: Fixed build error when !CONFIG_SCHED_SMT reported by Andrea Righi. v2: Sched core added the const qualifiers to prio_less task arguments. Explicitly drop them for ops.core_sched_before() task arguments. BPF enforces access control through the verifier, so the qualifier isn't actually operative and only gets in the way when interacting with various helpers. Signed-off-by: Tejun Heo Reviewed-by: David Vernet Reviewed-by: Josh Don Cc: Andrea Righi --- include/linux/sched/ext.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 21c627337e01..3db7b32b2d1d 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -129,6 +129,9 @@ struct sched_ext_entity { struct list_head runnable_node; /* rq->scx.runnable_list */ unsigned long runnable_at; +#ifdef CONFIG_SCHED_CORE + u64 core_sched_at; /* see scx_prio_less() */ +#endif u64 ddsp_dsq_id; u64 ddsp_enq_flags; -- cgit v1.2.3 From 06e51be3d5e7a07aea5c9012773df8d5de01db6c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jun 2024 10:09:21 -1000 Subject: sched_ext: Add vtime-ordered priority queue to dispatch_q's Currently, a dsq is always a FIFO. A task which is dispatched earlier gets consumed or executed earlier. While this is sufficient when dsq's are used for simple staging areas for tasks which are ready to execute, it'd make dsq's a lot more useful if they can implement custom ordering. This patch adds a vtime-ordered priority queue to dsq's. When the BPF scheduler dispatches a task with the new scx_bpf_dispatch_vtime() helper, it can specify the vtime tha the task should be inserted at and the task is inserted into the priority queue in the dsq which is ordered according to time_before64() comparison of the vtime values. A DSQ can either be a FIFO or priority queue and automatically switches between the two depending on whether scx_bpf_dispatch() or scx_bpf_dispatch_vtime() is used. Using the wrong variant while the DSQ already has the other type queued is not allowed and triggers an ops error. Built-in DSQs must always be FIFOs. This makes it very easy for the BPF schedulers to implement proper vtime based scheduling within each dsq very easy and efficient at a negligible cost in terms of code complexity and overhead. scx_simple and scx_example_flatcg are updated to default to weighted vtime scheduling (the latter within each cgroup). FIFO scheduling can be selected with -f option. v4: - As allowing mixing priority queue and FIFO on the same DSQ sometimes led to unexpected starvations, DSQs now error out if both modes are used at the same time and the built-in DSQs are no longer allowed to be priority queues. - Explicit type struct scx_dsq_node added to contain fields needed to be linked on DSQs. This will be used to implement stateful iterator. - Tasks are now always linked on dsq->list whether the DSQ is in FIFO or PRIQ mode. This confines PRIQ related complexities to the enqueue and dequeue paths. Other paths only need to look at dsq->list. This will also ease implementing BPF iterator. - Print p->scx.dsq_flags in debug dump. v3: - SCX_TASK_DSQ_ON_PRIQ flag is moved from p->scx.flags into its own p->scx.dsq_flags. The flag is protected with the dsq lock unlike other flags in p->scx.flags. This led to flag corruption in some cases. - Add comments explaining the interaction between using consumption of p->scx.slice to determine vtime progress and yielding. v2: - p->scx.dsq_vtime was not initialized on load or across cgroup migrations leading to some tasks being stalled for extended period of time depending on how saturated the machine is. Fixed. Signed-off-by: Tejun Heo Reviewed-by: David Vernet --- include/linux/sched/ext.h | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 3db7b32b2d1d..9cee193dab19 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -49,12 +49,15 @@ enum scx_dsq_id_flags { }; /* - * Dispatch queue (dsq) is a simple FIFO which is used to buffer between the - * scheduler core and the BPF scheduler. See the documentation for more details. + * A dispatch queue (DSQ) can be either a FIFO or p->scx.dsq_vtime ordered + * queue. A built-in DSQ is always a FIFO. The built-in local DSQs are used to + * buffer between the scheduler core and the BPF scheduler. See the + * documentation for more details. */ struct scx_dispatch_q { raw_spinlock_t lock; struct list_head list; /* tasks in dispatch order */ + struct rb_root priq; /* used to order by p->scx.dsq_vtime */ u32 nr; u64 id; struct rhash_head hash_node; @@ -86,6 +89,11 @@ enum scx_task_state { SCX_TASK_NR_STATES, }; +/* scx_entity.dsq_flags */ +enum scx_ent_dsq_flags { + SCX_TASK_DSQ_ON_PRIQ = 1 << 0, /* task is queued on the priority queue of a dsq */ +}; + /* * Mask bits for scx_entity.kf_mask. Not all kfuncs can be called from * everywhere and the following bits track which kfunc sets are currently @@ -111,13 +119,19 @@ enum scx_kf_mask { __SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, }; +struct scx_dsq_node { + struct list_head list; /* dispatch order */ + struct rb_node priq; /* p->scx.dsq_vtime order */ + u32 flags; /* SCX_TASK_DSQ_* flags */ +}; + /* * The following is embedded in task_struct and contains all fields necessary * for a task to be scheduled by SCX. */ struct sched_ext_entity { struct scx_dispatch_q *dsq; - struct list_head dsq_node; + struct scx_dsq_node dsq_node; /* protected by dsq lock */ u32 flags; /* protected by rq lock */ u32 weight; s32 sticky_cpu; @@ -149,6 +163,15 @@ struct sched_ext_entity { */ u64 slice; + /* + * Used to order tasks when dispatching to the vtime-ordered priority + * queue of a dsq. This is usually set through scx_bpf_dispatch_vtime() + * but can also be modified directly by the BPF scheduler. Modifying it + * while a task is queued on a dsq may mangle the ordering and is not + * recommended. + */ + u64 dsq_vtime; + /* * If set, reject future sched_setscheduler(2) calls updating the policy * to %SCHED_EXT with -%EACCES. -- cgit v1.2.3 From fa48e8d2c7b58d242c1db3a09c14f4274e055087 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jun 2024 10:09:21 -1000 Subject: sched_ext: Documentation: scheduler: Document extensible scheduler class Add Documentation/scheduler/sched-ext.rst which gives a high-level overview and pointers to the examples. v6: - Add paragraph explaining debug dump. v5: - Updated to reflect /sys/kernel interface change. Kconfig options added. v4: - README improved, reformatted in markdown and renamed to README.md. v3: - Added tools/sched_ext/README. - Dropped _example prefix from scheduler names. v2: - Apply minor edits suggested by Bagas. Caveats section dropped as all of them are addressed. Signed-off-by: Tejun Heo Reviewed-by: David Vernet Acked-by: Josh Don Acked-by: Hao Luo Acked-by: Barret Rhoden Cc: Bagas Sanjaya --- include/linux/sched/ext.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 9cee193dab19..fe9a67ffe6b1 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* + * BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst + * * Copyright (c) 2022 Meta Platforms, Inc. and affiliates. * Copyright (c) 2022 Tejun Heo * Copyright (c) 2022 David Vernet -- cgit v1.2.3 From 8169b2097d88d99d7e4a72e20e4b549efe9eb8d7 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 3 Jul 2024 09:48:01 -0700 Subject: drm/xe/uapi: Rename xe perf layer as xe observation layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Xe, the perf layer allows capture of HW counter streams. These HW counters are generally performance related but don't have to be necessarily so. Also, the name "perf" is a carryover from i915 and is not preferred. Here we propose the name "observation" for this common layer which allows capture of different types of these counter streams. v2: Rename observability layer to observation layer (Lucas/Rodrigo) v3: Rename sysctl file to "observation_paranoid" (Jose) Fixes: 52c2e956dceb ("drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream types") Fixes: fe8929bdf835 ("drm/xe/perf/uapi: Add perf_stream_paranoid sysctl") Acked-by: Lucas De Marchi Acked-by: Rodrigo Vivi Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Acked-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240703164801.2561423-1-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 102 +++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 12eaa8532b5c..33544ef78d3e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -80,7 +80,7 @@ extern "C" { * - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY * - &DRM_IOCTL_XE_EXEC * - &DRM_IOCTL_XE_WAIT_USER_FENCE - * - &DRM_IOCTL_XE_PERF + * - &DRM_IOCTL_XE_OBSERVATION */ /* @@ -101,7 +101,7 @@ extern "C" { #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 #define DRM_XE_EXEC 0x09 #define DRM_XE_WAIT_USER_FENCE 0x0a -#define DRM_XE_PERF 0x0b +#define DRM_XE_OBSERVATION 0x0b /* Must be kept compact -- no holes */ @@ -116,7 +116,7 @@ extern "C" { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) -#define DRM_IOCTL_XE_PERF DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param) +#define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param) /** * DOC: Xe IOCTL Extensions @@ -1376,66 +1376,67 @@ struct drm_xe_wait_user_fence { }; /** - * enum drm_xe_perf_type - Perf stream types + * enum drm_xe_observation_type - Observation stream types */ -enum drm_xe_perf_type { - /** @DRM_XE_PERF_TYPE_OA: OA perf stream type */ - DRM_XE_PERF_TYPE_OA, +enum drm_xe_observation_type { + /** @DRM_XE_OBSERVATION_TYPE_OA: OA observation stream type */ + DRM_XE_OBSERVATION_TYPE_OA, }; /** - * enum drm_xe_perf_op - Perf stream ops + * enum drm_xe_observation_op - Observation stream ops */ -enum drm_xe_perf_op { - /** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */ - DRM_XE_PERF_OP_STREAM_OPEN, +enum drm_xe_observation_op { + /** @DRM_XE_OBSERVATION_OP_STREAM_OPEN: Open an observation stream */ + DRM_XE_OBSERVATION_OP_STREAM_OPEN, - /** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */ - DRM_XE_PERF_OP_ADD_CONFIG, + /** @DRM_XE_OBSERVATION_OP_ADD_CONFIG: Add observation stream config */ + DRM_XE_OBSERVATION_OP_ADD_CONFIG, - /** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */ - DRM_XE_PERF_OP_REMOVE_CONFIG, + /** @DRM_XE_OBSERVATION_OP_REMOVE_CONFIG: Remove observation stream config */ + DRM_XE_OBSERVATION_OP_REMOVE_CONFIG, }; /** - * struct drm_xe_perf_param - Input of &DRM_XE_PERF + * struct drm_xe_observation_param - Input of &DRM_XE_OBSERVATION * - * The perf layer enables multiplexing perf counter streams of multiple - * types. The actual params for a particular stream operation are supplied - * via the @param pointer (use __copy_from_user to get these params). + * The observation layer enables multiplexing observation streams of + * multiple types. The actual params for a particular stream operation are + * supplied via the @param pointer (use __copy_from_user to get these + * params). */ -struct drm_xe_perf_param { +struct drm_xe_observation_param { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; - /** @perf_type: Perf stream type, of enum @drm_xe_perf_type */ - __u64 perf_type; - /** @perf_op: Perf op, of enum @drm_xe_perf_op */ - __u64 perf_op; + /** @observation_type: observation stream type, of enum @drm_xe_observation_type */ + __u64 observation_type; + /** @observation_op: observation stream op, of enum @drm_xe_observation_op */ + __u64 observation_op; /** @param: Pointer to actual stream params */ __u64 param; }; /** - * enum drm_xe_perf_ioctls - Perf fd ioctl's + * enum drm_xe_observation_ioctls - Observation stream fd ioctl's * - * Information exchanged between userspace and kernel for perf fd ioctl's - * is stream type specific + * Information exchanged between userspace and kernel for observation fd + * ioctl's is stream type specific */ -enum drm_xe_perf_ioctls { - /** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */ - DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0), +enum drm_xe_observation_ioctls { + /** @DRM_XE_OBSERVATION_IOCTL_ENABLE: Enable data capture for an observation stream */ + DRM_XE_OBSERVATION_IOCTL_ENABLE = _IO('i', 0x0), - /** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */ - DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1), + /** @DRM_XE_OBSERVATION_IOCTL_DISABLE: Disable data capture for a observation stream */ + DRM_XE_OBSERVATION_IOCTL_DISABLE = _IO('i', 0x1), - /** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */ - DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2), + /** @DRM_XE_OBSERVATION_IOCTL_CONFIG: Change observation stream configuration */ + DRM_XE_OBSERVATION_IOCTL_CONFIG = _IO('i', 0x2), - /** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */ - DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3), + /** @DRM_XE_OBSERVATION_IOCTL_STATUS: Return observation stream status */ + DRM_XE_OBSERVATION_IOCTL_STATUS = _IO('i', 0x3), - /** @DRM_XE_PERF_IOCTL_INFO: Return stream info */ - DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), + /** @DRM_XE_OBSERVATION_IOCTL_INFO: Return observation stream info */ + DRM_XE_OBSERVATION_IOCTL_INFO = _IO('i', 0x4), }; /** @@ -1546,12 +1547,12 @@ enum drm_xe_oa_format_type { * Stream params are specified as a chain of @drm_xe_ext_set_property * struct's, with @property values from enum @drm_xe_oa_property_id and * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. - * @param field in struct @drm_xe_perf_param points to the first + * @param field in struct @drm_xe_observation_param points to the first * @drm_xe_ext_set_property struct. * - * Exactly the same mechanism is also used for stream reconfiguration using - * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of - * properties below can be specified for stream reconfiguration. + * Exactly the same mechanism is also used for stream reconfiguration using the + * @DRM_XE_OBSERVATION_IOCTL_CONFIG observation stream fd ioctl, though only a + * subset of properties below can be specified for stream reconfiguration. */ enum drm_xe_oa_property_id { #define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 @@ -1571,11 +1572,11 @@ enum drm_xe_oa_property_id { /** * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA - * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG. + * reports, previously added via @DRM_XE_OBSERVATION_OP_ADD_CONFIG. */ DRM_XE_OA_PROPERTY_OA_METRIC_SET, - /** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */ + /** @DRM_XE_OA_PROPERTY_OA_FORMAT: OA counter report format */ DRM_XE_OA_PROPERTY_OA_FORMAT, /* * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942, @@ -1596,13 +1597,13 @@ enum drm_xe_oa_property_id { /** * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA - * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE). + * stream in a DISABLED state (see @DRM_XE_OBSERVATION_IOCTL_ENABLE). */ DRM_XE_OA_PROPERTY_OA_DISABLED, /** * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific - * @exec_queue_id. Perf queries can be executed on this exec queue. + * @exec_queue_id. OA queries can be executed on this exec queue. */ DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, @@ -1622,7 +1623,7 @@ enum drm_xe_oa_property_id { /** * struct drm_xe_oa_config - OA metric configuration * - * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A + * Multiple OA configs can be added using @DRM_XE_OBSERVATION_OP_ADD_CONFIG. A * particular config can be specified when opening an OA stream using * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property. */ @@ -1645,8 +1646,9 @@ struct drm_xe_oa_config { /** * struct drm_xe_oa_stream_status - OA stream status returned from - * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to - * query stream status in response to EIO errno from perf fd read(). + * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl. Userspace can + * call the ioctl to query stream status in response to EIO errno from + * observation fd read(). */ struct drm_xe_oa_stream_status { /** @extensions: Pointer to the first extension struct, if any */ @@ -1665,7 +1667,7 @@ struct drm_xe_oa_stream_status { /** * struct drm_xe_oa_stream_info - OA stream info returned from - * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl + * @DRM_XE_OBSERVATION_IOCTL_INFO observation stream fd ioctl */ struct drm_xe_oa_stream_info { /** @extensions: Pointer to the first extension struct, if any */ -- cgit v1.2.3 From 01e0cfc994be484ddcb9e121e353e51d8bb837c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 5 Jul 2024 15:28:28 +0200 Subject: drm/xe: Use write-back caching mode for system memory on DGFX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The caching mode for buffer objects with VRAM as a possible placement was forced to write-combined, regardless of placement. However, write-combined system memory is expensive to allocate and even though it is pooled, the pool is expensive to shrink, since it involves global CPU TLB flushes. Moreover write-combined system memory from TTM is only reliably available on x86 and DGFX doesn't have an x86 restriction. So regardless of the cpu caching mode selected for a bo, internally use write-back caching mode for system memory on DGFX. Coherency is maintained, but user-space clients may perceive a difference in cpu access speeds. v2: - Update RB- and Ack tags. - Rephrase wording in xe_drm.h (Matt Roper) v3: - Really rephrase wording. Signed-off-by: Thomas Hellström Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode") Cc: Pallavi Mishra Cc: Matthew Auld Cc: dri-devel@lists.freedesktop.org Cc: Joonas Lahtinen Cc: Effie Yu Cc: Matthew Brost Cc: Maarten Lankhorst Cc: Jose Souza Cc: Michal Mrozek Cc: # v6.8+ Acked-by: Matthew Auld Acked-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode") Acked-by: Michal Mrozek Acked-by: Effie Yu #On chat Link: https://patchwork.freedesktop.org/patch/msgid/20240705132828.27714-1-thomas.hellstrom@linux.intel.com --- include/uapi/drm/xe_drm.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 33544ef78d3e..19619d4952a8 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -783,7 +783,13 @@ struct drm_xe_gem_create { #define DRM_XE_GEM_CPU_CACHING_WC 2 /** * @cpu_caching: The CPU caching mode to select for this object. If - * mmaping the object the mode selected here will also be used. + * mmaping the object the mode selected here will also be used. The + * exception is when mapping system memory (including data evicted + * to system) on discrete GPUs. The caching mode selected will + * then be overridden to DRM_XE_GEM_CPU_CACHING_WB, and coherency + * between GPU- and CPU is guaranteed. The caching mode of + * existing CPU-mappings will be updated transparently to + * user-space clients. */ __u16 cpu_caching; /** @pad: MBZ */ -- cgit v1.2.3 From d4af01c3731ff9c6e224d7183f8226a56d72b56c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Jul 2024 14:30:55 -1000 Subject: sched_ext: Take out ->priq and ->flags from scx_dsq_node struct scx_dsq_node contains two data structure nodes to link the containing task to a DSQ and a flags field that is protected by the lock of the associated DSQ. One reason why they are grouped into a struct is to use the type independently as a cursor node when iterating tasks on a DSQ. However, when iterating, the cursor only needs to be linked on the FIFO list and the rb_node part ends up inflating the size of the iterator data structure unnecessarily making it potentially too expensive to place it on stack. Take ->priq and ->flags out of scx_dsq_node and put them in sched_ext_entity as ->dsq_priq and ->dsq_flags, respectively. scx_dsq_node is renamed to scx_dsq_list_node and the field names are renamed accordingly. This will help implementing DSQ task iterator that can be allocated on stack. No functional change intended. Signed-off-by: Tejun Heo Suggested-by: Alexei Starovoitov Acked-by: Alexei Starovoitov Cc: David Vernet --- include/linux/sched/ext.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index fe9a67ffe6b1..eb9cfd18a923 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -121,10 +121,8 @@ enum scx_kf_mask { __SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, }; -struct scx_dsq_node { - struct list_head list; /* dispatch order */ - struct rb_node priq; /* p->scx.dsq_vtime order */ - u32 flags; /* SCX_TASK_DSQ_* flags */ +struct scx_dsq_list_node { + struct list_head node; }; /* @@ -133,7 +131,9 @@ struct scx_dsq_node { */ struct sched_ext_entity { struct scx_dispatch_q *dsq; - struct scx_dsq_node dsq_node; /* protected by dsq lock */ + struct scx_dsq_list_node dsq_list; /* dispatch order */ + struct rb_node dsq_priq; /* p->scx.dsq_vtime order */ + u32 dsq_flags; /* protected by DSQ lock */ u32 flags; /* protected by rq lock */ u32 weight; s32 sticky_cpu; -- cgit v1.2.3 From 650ba21b131ed1f8ee57826b2c6295a3be221132 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Jul 2024 14:30:55 -1000 Subject: sched_ext: Implement DSQ iterator DSQs are very opaque in the consumption path. The BPF scheduler has no way of knowing which tasks are being considered and which is picked. This patch adds BPF DSQ iterator. - Allows iterating tasks queued on a DSQ in the dispatch order or reverse from anywhere using bpf_for_each(scx_dsq) or calling the iterator kfuncs directly. - Has ordering guarantee where only tasks which were already queued when the iteration started are visible and consumable during the iteration. v5: - Add a comment to the naked list_empty(&dsq->list) test in consume_dispatch_q() to explain the reasoning behind the lockless test and by extension why nldsq_next_task() isn't used there. - scx_qmap changes separated into its own patch. v4: - bpf_iter_scx_dsq_new() declaration in common.bpf.h was using the wrong type for the last argument (bool rev instead of u64 flags). Fix it. v3: - Alexei pointed out that the iterator is too big to allocate on stack. Added a prep patch to reduce the size of the cursor. Now bpf_iter_scx_dsq is 48 bytes and bpf_iter_scx_dsq_kern is 40 bytes on 64bit. - u32_before() comparison factored out. v2: - scx_bpf_consume_task() is separated out into a separate patch. - DSQ seq and iter flags don't need to be u64. Use u32. Signed-off-by: Tejun Heo Reviewed-by: David Vernet Acked-by: Alexei Starovoitov Cc: bpf@vger.kernel.org --- include/linux/sched/ext.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index eb9cfd18a923..593d2f4909dd 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -61,6 +61,7 @@ struct scx_dispatch_q { struct list_head list; /* tasks in dispatch order */ struct rb_root priq; /* used to order by p->scx.dsq_vtime */ u32 nr; + u32 seq; /* used by BPF iter */ u64 id; struct rhash_head hash_node; struct llist_node free_node; @@ -123,6 +124,7 @@ enum scx_kf_mask { struct scx_dsq_list_node { struct list_head node; + bool is_bpf_iter_cursor; }; /* @@ -133,6 +135,7 @@ struct sched_ext_entity { struct scx_dispatch_q *dsq; struct scx_dsq_list_node dsq_list; /* dispatch order */ struct rb_node dsq_priq; /* p->scx.dsq_vtime order */ + u32 dsq_seq; u32 dsq_flags; /* protected by DSQ lock */ u32 flags; /* protected by rq lock */ u32 weight; -- cgit v1.2.3 From f592e01664b4a57b109fcf6f6916145517f94bd7 Mon Sep 17 00:00:00 2001 From: Sebastian Wick Date: Tue, 2 Jul 2024 16:30:16 +0200 Subject: drm/drm_connector: Document Colorspace property variants The initial idea of the Colorspace prop was that this maps 1:1 to InfoFrames/SDP but KMS does not give user space enough information nor control over the output format to figure out which variants can be used for a given KMS commit. At the same time, properties like Broadcast RGB expect full range quantization range being produced by user space from the CRTC and drivers to convert to the range expected by the sink for the chosen output format, mode, InfoFrames, etc. This change documents the reality of the Colorspace property. The Default variant unfortunately is very much driver specific and not reflected by the EDID. The BT2020 variants are in active use by generic compositors which have expectations from the driver about the conversions it has to do when selecting certain output formats. Everything else is also marked as undefined. Coming up with valid behavior that makes it usable from user space and consistent with other KMS properties for those variants is left as an exercise for whoever wants to use them. v2: * Talk about "pixel operation properties" that user space configures * Mention that user space is responsible for checking the EDID for sink support * Make it clear that drivers can choose between RGB and YCbCr on their own Signed-off-by: Sebastian Wick Reviewed-by: Pekka Paalanen Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240702143017.2429975-1-sebastian.wick@redhat.com --- include/drm/drm_connector.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index c754651044d4..e3fa43291f44 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -471,14 +471,6 @@ enum drm_privacy_screen_status { * * DP definitions come from the DP v2.0 spec * HDMI definitions come from the CTA-861-H spec - * - * A note on YCC and RGB variants: - * - * Since userspace is not aware of the encoding on the wire - * (RGB or YCbCr), drivers are free to pick the appropriate - * variant, regardless of what userspace selects. E.g., if - * BT2020_RGB is selected by userspace a driver will pick - * BT2020_YCC if the encoding on the wire is YUV444 or YUV420. * * @DRM_MODE_COLORIMETRY_DEFAULT: * Driver specific behavior. -- cgit v1.2.3 From 6be74ddd0609959b4005f88b6a4d4af678e4a71f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 5 Jul 2024 17:31:59 +0200 Subject: drm/ttm: Allow TTM LRU list nodes of different types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be able to handle list unlocking while traversing the LRU list, we want the iterators not only to point to the next position of the list traversal, but to insert themselves as list nodes at that point to work around the fact that the next node might otherwise disappear from the list while the iterator is pointing to it. These list nodes need to be easily distinguishable from other list nodes so that others traversing the list can skip over them. So declare a struct ttm_lru_item, with a struct list_head member and a type enum. This will slightly increase the size of a struct ttm_resource. Changes in previous series: - Update enum ttm_lru_item_type documentation. v3: - Introduce ttm_lru_first_res_or_null() (Christian König, Thomas Hellström) v5: - Update also the TTM test code (Xe CI). Cc: Christian König Cc: Somalapuram Amaranath Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-2-thomas.hellstrom@linux.intel.com Signed-off-by: Christian König --- include/drm/ttm/ttm_resource.h | 54 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 69769355139f..1511d91e290d 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -49,6 +49,43 @@ struct io_mapping; struct sg_table; struct scatterlist; +/** + * enum ttm_lru_item_type - enumerate ttm_lru_item subclasses + */ +enum ttm_lru_item_type { + /** @TTM_LRU_RESOURCE: The resource subclass */ + TTM_LRU_RESOURCE, + /** @TTM_LRU_HITCH: The iterator hitch subclass */ + TTM_LRU_HITCH +}; + +/** + * struct ttm_lru_item - The TTM lru list node base class + * @link: The list link + * @type: The subclass type + */ +struct ttm_lru_item { + struct list_head link; + enum ttm_lru_item_type type; +}; + +/** + * ttm_lru_item_init() - initialize a struct ttm_lru_item + * @item: The item to initialize + * @type: The subclass type + */ +static inline void ttm_lru_item_init(struct ttm_lru_item *item, + enum ttm_lru_item_type type) +{ + item->type = type; + INIT_LIST_HEAD(&item->link); +} + +static inline bool ttm_lru_item_is_res(const struct ttm_lru_item *item) +{ + return item->type == TTM_LRU_RESOURCE; +} + struct ttm_resource_manager_func { /** * struct ttm_resource_manager_func member alloc @@ -217,9 +254,21 @@ struct ttm_resource { /** * @lru: Least recently used list, see &ttm_resource_manager.lru */ - struct list_head lru; + struct ttm_lru_item lru; }; +/** + * ttm_lru_item_to_res() - Downcast a struct ttm_lru_item to a struct ttm_resource + * @item: The struct ttm_lru_item to downcast + * + * Return: Pointer to the embedding struct ttm_resource + */ +static inline struct ttm_resource * +ttm_lru_item_to_res(struct ttm_lru_item *item) +{ + return container_of(item, struct ttm_resource, lru); +} + /** * struct ttm_resource_cursor * @@ -393,6 +442,9 @@ ttm_resource_manager_next(struct ttm_resource_manager *man, struct ttm_resource_cursor *cursor, struct ttm_resource *res); +struct ttm_resource * +ttm_lru_first_res_or_null(struct list_head *head); + /** * ttm_resource_manager_for_each_res - iterate over all resources * @man: the resource manager -- cgit v1.2.3 From 9c62fb62c9f0761eeda8f2a9517e007ff2cdbe9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 5 Jul 2024 17:32:00 +0200 Subject: drm/ttm: Slightly clean up LRU list iteration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make the transition to using lru hitches easier, simplify the ttm_resource_manager_next() interface to only take the cursor and reuse ttm_resource_manager_next() functionality from ttm_resource_manager_first(). Cc: Christian König Cc: Somalapuram Amaranath Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-3-thomas.hellstrom@linux.intel.com Signed-off-by: Christian König --- include/drm/ttm/ttm_resource.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 1511d91e290d..7d81fd5b5b83 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -272,11 +272,15 @@ ttm_lru_item_to_res(struct ttm_lru_item *item) /** * struct ttm_resource_cursor * + * @man: The resource manager currently being iterated over. + * @cur: The list head the cursor currently points to. * @priority: the current priority * * Cursor to iterate over the resources in a manager. */ struct ttm_resource_cursor { + struct ttm_resource_manager *man; + struct list_head *cur; unsigned int priority; }; @@ -438,9 +442,7 @@ struct ttm_resource * ttm_resource_manager_first(struct ttm_resource_manager *man, struct ttm_resource_cursor *cursor); struct ttm_resource * -ttm_resource_manager_next(struct ttm_resource_manager *man, - struct ttm_resource_cursor *cursor, - struct ttm_resource *res); +ttm_resource_manager_next(struct ttm_resource_cursor *cursor); struct ttm_resource * ttm_lru_first_res_or_null(struct list_head *head); @@ -455,7 +457,7 @@ ttm_lru_first_res_or_null(struct list_head *head); */ #define ttm_resource_manager_for_each_res(man, cursor, res) \ for (res = ttm_resource_manager_first(man, cursor); res; \ - res = ttm_resource_manager_next(man, cursor, res)) + res = ttm_resource_manager_next(cursor)) struct ttm_kmap_iter * ttm_kmap_iter_iomap_init(struct ttm_kmap_iter_iomap *iter_io, -- cgit v1.2.3 From 8e9bf0fb10a79aaed37474600948cd33d14aa606 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 5 Jul 2024 17:32:01 +0200 Subject: drm/ttm: Use LRU hitches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Have iterators insert themselves into the list they are iterating over using hitch list nodes. Since only the iterator owner can remove these list nodes from the list, it's safe to unlock the list and when continuing, use them as a starting point. Due to the way LRU bumping works in TTM, newly added items will not be missed, and bumped items will be iterated over a second time before reaching the end of the list. The exception is list with bulk move sublists. When bumping a sublist, a hitch that is part of that sublist will also be moved and we might miss items if restarting from it. This will be addressed in a later patch. Changes in previous series: - Updated ttm_resource_cursor_fini() documentation. v2: - Don't reorder ttm_resource_manager_first() and _next(). (Christian König). - Use list_add instead of list_move (Christian König) v3: - Split into two patches, one cleanup, one new functionality (Christian König) - use ttm_resource_cursor_fini_locked() instead of open-coding (Matthew Brost) Cc: Christian König Cc: Somalapuram Amaranath Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-4-thomas.hellstrom@linux.intel.com Signed-off-by: Christian König --- include/drm/ttm/ttm_resource.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 7d81fd5b5b83..8fac781f641e 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -273,17 +273,22 @@ ttm_lru_item_to_res(struct ttm_lru_item *item) * struct ttm_resource_cursor * * @man: The resource manager currently being iterated over. - * @cur: The list head the cursor currently points to. + * @hitch: A hitch list node inserted before the next resource + * to iterate over. * @priority: the current priority * * Cursor to iterate over the resources in a manager. */ struct ttm_resource_cursor { struct ttm_resource_manager *man; - struct list_head *cur; + struct ttm_lru_item hitch; unsigned int priority; }; +void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor); + +void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor); + /** * struct ttm_lru_bulk_move_pos * -- cgit v1.2.3 From 4c44f89c5daee9540cb7428de5d835bd00951350 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 5 Jul 2024 17:32:02 +0200 Subject: drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist moves MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To address the problem with hitches moving when bulk move sublists are lru-bumped, register the list cursors with the ttm_lru_bulk_move structure when traversing its list, and when lru-bumping the list, move the cursor hitch to the tail. This also means it's mandatory for drivers to call ttm_lru_bulk_move_init() and ttm_lru_bulk_move_fini() when initializing and finalizing the bulk move structure, so add those calls to the amdgpu- and xe driver. Compared to v1 this is slightly more code but less fragile and hopefully easier to understand. Changes in previous series: - Completely rework the functionality - Avoid a NULL pointer dereference assigning manager->mem_type - Remove some leftover code causing build problems v2: - For hitch bulk tail moves, store the mem_type in the cursor instead of with the manager. v3: - Remove leftover mem_type member from change in v2. v6: - Add some lockdep asserts (Matthew Brost) - Avoid NULL pointer dereference (Matthew Brost) - No need to check bo->resource before dereferencing bo->bulk_move (Matthew Brost) Cc: Christian König Cc: Somalapuram Amaranath Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Acked-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-5-thomas.hellstrom@linux.intel.com Signed-off-by: Christian König --- include/drm/ttm/ttm_resource.h | 56 ++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 8fac781f641e..571abb4861a6 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -269,26 +269,6 @@ ttm_lru_item_to_res(struct ttm_lru_item *item) return container_of(item, struct ttm_resource, lru); } -/** - * struct ttm_resource_cursor - * - * @man: The resource manager currently being iterated over. - * @hitch: A hitch list node inserted before the next resource - * to iterate over. - * @priority: the current priority - * - * Cursor to iterate over the resources in a manager. - */ -struct ttm_resource_cursor { - struct ttm_resource_manager *man; - struct ttm_lru_item hitch; - unsigned int priority; -}; - -void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor); - -void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor); - /** * struct ttm_lru_bulk_move_pos * @@ -304,8 +284,9 @@ struct ttm_lru_bulk_move_pos { /** * struct ttm_lru_bulk_move - * * @pos: first/last lru entry for resources in the each domain/priority + * @cursor_list: The list of cursors currently traversing any of + * the sublists of @pos. Protected by the ttm device's lru_lock. * * Container for the current bulk move state. Should be used with * ttm_lru_bulk_move_init() and ttm_bo_set_bulk_move(). @@ -315,8 +296,39 @@ struct ttm_lru_bulk_move_pos { */ struct ttm_lru_bulk_move { struct ttm_lru_bulk_move_pos pos[TTM_NUM_MEM_TYPES][TTM_MAX_BO_PRIORITY]; + struct list_head cursor_list; }; +/** + * struct ttm_resource_cursor + * @man: The resource manager currently being iterated over + * @hitch: A hitch list node inserted before the next resource + * to iterate over. + * @bulk_link: A list link for the list of cursors traversing the + * bulk sublist of @bulk. Protected by the ttm device's lru_lock. + * @bulk: Pointer to struct ttm_lru_bulk_move whose subrange @hitch is + * inserted to. NULL if none. Never dereference this pointer since + * the struct ttm_lru_bulk_move object pointed to might have been + * freed. The pointer is only for comparison. + * @mem_type: The memory type of the LRU list being traversed. + * This field is valid iff @bulk != NULL. + * @priority: the current priority + * + * Cursor to iterate over the resources in a manager. + */ +struct ttm_resource_cursor { + struct ttm_resource_manager *man; + struct ttm_lru_item hitch; + struct list_head bulk_link; + struct ttm_lru_bulk_move *bulk; + unsigned int mem_type; + unsigned int priority; +}; + +void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor); + +void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor); + /** * struct ttm_kmap_iter_iomap - Specialization for a struct io_mapping + * struct sg_table backed struct ttm_resource. @@ -405,6 +417,8 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man) void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk); void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk); +void ttm_lru_bulk_move_fini(struct ttm_device *bdev, + struct ttm_lru_bulk_move *bulk); void ttm_resource_add_bulk_move(struct ttm_resource *res, struct ttm_buffer_object *bo); -- cgit v1.2.3 From da966b82bf3d16f89a05732c933a589ec798d3f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 5 Jul 2024 17:32:03 +0200 Subject: drm/ttm: Provide a generic LRU walker helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide a generic LRU walker in TTM, in the spirit of drm_gem_lru_scan() but building on the restartable TTM LRU functionality. The LRU walker optionally supports locking objects as part of a ww mutex locking transaction, to mimic to some extent the current functionality in ttm. However any -EDEADLK return is converted to -ENOSPC and then to -ENOMEM before reaching the driver, so that the driver will need to backoff and possibly retry without being able to keep the ticket. v3: - Move the helper to core ttm. - Remove the drm_exec usage from it for now, it will be reintroduced later in the series. v4: - Handle the -EALREADY case if ticketlocking. v6: - Some cleanup and added code comments (Matthew Brost) - Clarified the ticketlock in the commit message (Matthew Brost) v7: - Use s64 rather than long for the target and progress (Christian König) - Update documentation to not encourage using pages as a progress measure. (Christian König) - Remove cond_resched(). (Christian König) Cc: Christian König Cc: Somalapuram Amaranath Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost #v6 Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-6-thomas.hellstrom@linux.intel.com Signed-off-by: Christian König --- include/drm/ttm/ttm_bo.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index ef0f52f56ebc..21fa9d5964ec 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -194,6 +194,41 @@ struct ttm_operation_ctx { uint64_t bytes_moved; }; +struct ttm_lru_walk; + +/** struct ttm_lru_walk_ops - Operations for a LRU walk. */ +struct ttm_lru_walk_ops { + /** + * process_bo - Process this bo. + * @walk: struct ttm_lru_walk describing the walk. + * @bo: A locked and referenced buffer object. + * + * Return: Negative error code on error, User-defined positive value + * (typically, but not always, size of the processed bo) on success. + * On success, the returned values are summed by the walk and the + * walk exits when its target is met. + * 0 also indicates success, -EBUSY means this bo was skipped. + */ + s64 (*process_bo)(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo); +}; + +/** + * struct ttm_lru_walk - Structure describing a LRU walk. + */ +struct ttm_lru_walk { + /** @ops: Pointer to the ops structure. */ + const struct ttm_lru_walk_ops *ops; + /** @ctx: Pointer to the struct ttm_operation_ctx. */ + struct ttm_operation_ctx *ctx; + /** @ticket: The struct ww_acquire_ctx if any. */ + struct ww_acquire_ctx *ticket; + /** @tryock_only: Only use trylock for locking. */ + bool trylock_only; +}; + +s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev, + struct ttm_resource_manager *man, s64 target); + /** * ttm_bo_get - reference a struct ttm_buffer_object * -- cgit v1.2.3 From 10efe34dae798c652053d4363871914c478f1475 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 5 Jul 2024 17:32:04 +0200 Subject: drm/ttm: Use the LRU walker helper for swapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rework the TTM swapping to use the LRU walker helper. This helps fixing up the ttm_bo_swapout() interface to be consistent about not requiring any locking. For now mimic the current behaviour of using trylock only. We could be using ticket-locks here but defer that until it's deemed necessary. The TTM swapout functionality is a bit weird anyway since it alternates between memory types without exhausting TTM_PL_SYSTEM first. Intentionally keep pages as the unit of progress since changing that to bytes is an unrelated change that can be done later. v6: - Improve on error code translation in the swapout callback (Matthew Brost). v7: - Use s64 rather than long. - Remove ttm_resource_cursor_fini() since it's no longer used. - Rename ttm_resource_cursor_fini_locked() to ttm_resource_cursor_fini(). - Don't swap out pinned bos. Cc: Christian König Cc: Somalapuram Amaranath Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost #v6 Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-7-thomas.hellstrom@linux.intel.com Signed-off-by: Christian König --- include/drm/ttm/ttm_bo.h | 5 +++-- include/drm/ttm/ttm_resource.h | 2 -- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index 21fa9d5964ec..eb2692de06b8 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -417,8 +417,9 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map); int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map); void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct iosys_map *map); int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object *bo); -int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, - gfp_t gfp_flags); +s64 ttm_bo_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, + struct ttm_resource_manager *man, gfp_t gfp_flags, + s64 target); void ttm_bo_pin(struct ttm_buffer_object *bo); void ttm_bo_unpin(struct ttm_buffer_object *bo); int ttm_mem_evict_first(struct ttm_device *bdev, diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 571abb4861a6..be034be56ba1 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -325,8 +325,6 @@ struct ttm_resource_cursor { unsigned int priority; }; -void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor); - void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor); /** -- cgit v1.2.3 From 3756310e9fe1e0182adac89cedaa98c0eea66675 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 5 Jul 2024 17:32:05 +0200 Subject: drm/ttm: Use the LRU walker for eviction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the LRU walker for eviction. This helps removing a lot of code with weird locking semantics. The functionality is slightly changed so that when trylocked buffer objects are exhausted, we continue to interleave walks with ticket-locks while there is still progress made. The list walks are not restarted in-between evictions. Also provide a separate ttm_bo_evict_first() function for its single user. The context of that user allows sleeping dma_resv locks. v6: - Various cleanups suggested by Matthew Brost. - Fix error return code of ttm_bo_evict_first(). (Matthew Brost) - Fix an error check that was inverted. (Matthew Brost) v7: - Use s64 rather than long (Christian König) - Early ttm_resource_cursor_fini() in ttm_bo_evict_first(). - Simplify check for bo_moved in ttm_bo_evict_first(). (Christian König) - Don't evict pinned bos. Cc: Christian König Cc: Somalapuram Amaranath Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost #v6 Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-8-thomas.hellstrom@linux.intel.com Signed-off-by: Christian König --- include/drm/ttm/ttm_bo.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index eb2692de06b8..d1a732d56259 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -422,11 +422,9 @@ s64 ttm_bo_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, s64 target); void ttm_bo_pin(struct ttm_buffer_object *bo); void ttm_bo_unpin(struct ttm_buffer_object *bo); -int ttm_mem_evict_first(struct ttm_device *bdev, - struct ttm_resource_manager *man, - const struct ttm_place *place, - struct ttm_operation_ctx *ctx, - struct ww_acquire_ctx *ticket); +int ttm_bo_evict_first(struct ttm_device *bdev, + struct ttm_resource_manager *man, + struct ttm_operation_ctx *ctx); vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, struct vm_fault *vmf); vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, -- cgit v1.2.3 From 76299a557f36d624ca32500173ad7856e1ad93c0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 3 Jul 2024 00:17:21 -0500 Subject: drm: Introduce 'power saving policy' drm property The `power saving policy` DRM property is an optional property that can be added to a connector by a driver. This property is for compositors to indicate intent of policy of whether a driver can use power saving features that may compromise the experience intended by the compositor. Acked-by: Leo Li Signed-off-by: Mario Limonciello Signed-off-by: Hamza Mahfooz Link: https://patchwork.freedesktop.org/patch/msgid/20240703051722.328-2-mario.limonciello@amd.com --- include/drm/drm_connector.h | 2 ++ include/drm/drm_mode_config.h | 5 +++++ include/uapi/drm/drm_mode.h | 7 +++++++ 3 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index e3fa43291f44..5ad735253413 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -2267,6 +2267,8 @@ int drm_mode_create_dp_colorspace_property(struct drm_connector *connector, u32 supported_colorspaces); int drm_mode_create_content_type_property(struct drm_device *dev); int drm_mode_create_suggested_offset_properties(struct drm_device *dev); +int drm_mode_create_power_saving_policy_property(struct drm_device *dev, + uint64_t supported_policies); int drm_connector_set_path_property(struct drm_connector *connector, const char *path); diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index ab0f167474b1..150f9a3b649f 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -969,6 +969,11 @@ struct drm_mode_config { */ struct drm_atomic_state *suspend_state; + /** + * @power_saving_policy: bitmask for power saving policy requests. + */ + struct drm_property *power_saving_policy; + const struct drm_mode_config_helper_funcs *helper_private; }; diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index d390011b89b4..880303c2ad97 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -152,6 +152,13 @@ extern "C" { #define DRM_MODE_SCALE_CENTER 2 /* Centered, no scaling */ #define DRM_MODE_SCALE_ASPECT 3 /* Full screen, preserve aspect */ +/* power saving policy options */ +#define DRM_MODE_REQUIRE_COLOR_ACCURACY BIT(0) /* Compositor requires color accuracy */ +#define DRM_MODE_REQUIRE_LOW_LATENCY BIT(1) /* Compositor requires low latency */ + +#define DRM_MODE_POWER_SAVING_POLICY_ALL (DRM_MODE_REQUIRE_COLOR_ACCURACY |\ + DRM_MODE_REQUIRE_LOW_LATENCY) + /* Dithering mode options */ #define DRM_MODE_DITHERING_OFF 0 #define DRM_MODE_DITHERING_ON 1 -- cgit v1.2.3 From 0daf44ea9dccfcdb1fe694e9ea85497b6cc3a065 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 8 Jul 2024 22:00:28 +0300 Subject: drm/dp: Add helper to dump an LTTPR PHY descriptor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper to dump the DPCD descriptor for an LTTPR PHY. This is based on [1] and [2] moving the helper to DRM core as suggested by Ville. [1] https://lore.kernel.org/all/20240703155937.1674856-5-imre.deak@intel.com [2] https://lore.kernel.org/all/20240703155937.1674856-6-imre.deak@intel.com Cc: dri-devel@lists.freedesktop.org Cc: Ville Syrjälä Reviewed-by: Ankit Nautiyal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240708190029.271247-6-imre.deak@intel.com --- include/drm/display/drm_dp.h | 4 ++++ include/drm/display/drm_dp_helper.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h index 0b032faa8cf2..0146ccfe9c2e 100644 --- a/include/drm/display/drm_dp.h +++ b/include/drm/display/drm_dp.h @@ -1523,6 +1523,10 @@ enum drm_dp_phy { #define DP_SYMBOL_ERROR_COUNT_LANE2_PHY_REPEATER1 0xf0039 /* 1.3 */ #define DP_SYMBOL_ERROR_COUNT_LANE3_PHY_REPEATER1 0xf003b /* 1.3 */ +#define DP_OUI_PHY_REPEATER1 0xf003d /* 1.3 */ +#define DP_OUI_PHY_REPEATER(dp_phy) \ + DP_LTTPR_REG(dp_phy, DP_OUI_PHY_REPEATER1) + #define __DP_FEC1_BASE 0xf0290 /* 1.4 */ #define __DP_FEC2_BASE 0xf0298 /* 1.4 */ #define DP_FEC_BASE(dp_phy) \ diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 8defcc399f42..f88d78e43443 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -656,6 +656,8 @@ struct drm_dp_desc { int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc, bool is_branch); +int drm_dp_dump_lttpr_desc(struct drm_dp_aux *aux, enum drm_dp_phy dp_phy); + /** * enum drm_dp_quirk - Display Port sink/branch device specific quirks * -- cgit v1.2.3 From e8858fc07eb8386b9b6436d3e86eeb4b81e4f660 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 13 Jul 2024 23:00:27 -0700 Subject: Input: msc5000_ts - remove the driver MCS-5000 belongs to the 1st generation of Melfas chips, manufactured in 2000-2007. The driver relies on custom platform data (no DT support) and there never were any users of this driver in the mainline kernel. The commit adding the driver mentioned that the driver was tested on S3C6410 NCP board (with Samsung S3C6410 SoC) but the touchscreen device was never added to the board file. This board was removed in v6.3 in commit 743c8fbb90ca ("ARM: s3c: remove most s3c64xx board support"). Remove the driver since there are no users. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240714060029.1528662-1-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- include/linux/platform_data/mcs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/mcs.h b/include/linux/platform_data/mcs.h index fcc6f2a1f5c3..f3b0749f1630 100644 --- a/include/linux/platform_data/mcs.h +++ b/include/linux/platform_data/mcs.h @@ -16,10 +16,6 @@ struct mcs_platform_data { void (*poweron)(bool); void (*cfg_pin)(void); - /* touchscreen */ - unsigned int x_size; - unsigned int y_size; - /* touchkey */ const u32 *keymap; unsigned int keymap_size; -- cgit v1.2.3 From dd29eadee1e8f07bbec57dddf4befbcd3e3c0af7 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 13 Jul 2024 23:00:28 -0700 Subject: Input: msc_touchkey - remove the driver MCS-5000/5080 chips belong to the 1st generation of Melfas chips, manufactured in 2000-2007. The driver relies on custom platform data (no DT support) and there never were any users of this driver in the mainline kernel. It is likely that the driver was (like mcs5000_ts driver) was tested on S3C6410 NCP board (with Samsung S3C6410 SoC), but the touchkey device was never added to the board file. This board was removed in v6.3 in commit 743c8fbb90ca ("ARM: s3c: remove most s3c64xx board support"). Remove the driver since there are no users. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240714060029.1528662-2-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- include/linux/platform_data/mcs.h | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 include/linux/platform_data/mcs.h (limited to 'include') diff --git a/include/linux/platform_data/mcs.h b/include/linux/platform_data/mcs.h deleted file mode 100644 index f3b0749f1630..000000000000 --- a/include/linux/platform_data/mcs.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2009 - 2010 Samsung Electronics Co.Ltd - * Author: Joonyoung Shim - * Author: HeungJun Kim - */ - -#ifndef __LINUX_MCS_H -#define __LINUX_MCS_H - -#define MCS_KEY_MAP(v, c) ((((v) & 0xff) << 16) | ((c) & 0xffff)) -#define MCS_KEY_VAL(v) (((v) >> 16) & 0xff) -#define MCS_KEY_CODE(v) ((v) & 0xffff) - -struct mcs_platform_data { - void (*poweron)(bool); - void (*cfg_pin)(void); - - /* touchkey */ - const u32 *keymap; - unsigned int keymap_size; - unsigned int key_maxval; - bool no_autorepeat; -}; - -#endif /* __LINUX_MCS_H */ -- cgit v1.2.3 From 0c60eb0cc320fffbb8b10329d276af14f6f5e6bf Mon Sep 17 00:00:00 2001 From: Kyoungrul Kim Date: Wed, 10 Jul 2024 08:25:20 +0900 Subject: scsi: ufs: core: Check LSDBS cap when !mcq If the user sets use_mcq_mode to 0, the host will try to activate the LSDB mode unconditionally even when the LSDBS of device HCI cap is 1. This makes commands time out and causes device probing to fail. To prevent that problem, check the LSDBS cap when MCQ is not supported. Signed-off-by: Kyoungrul Kim Link: https://lore.kernel.org/r/20240709232520epcms2p8ebdb5c4fccc30a6221390566589bf122@epcms2p8 Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 1 + include/ufs/ufshci.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index a43b14276bc3..cac0cdb9a916 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1109,6 +1109,7 @@ struct ufs_hba { bool ext_iid_sup; bool scsi_host_added; bool mcq_sup; + bool lsdb_sup; bool mcq_enabled; struct ufshcd_res_info res[RES_MAX]; void __iomem *mcq_base; diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index 38fe97971a65..9917c7743d80 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -77,6 +77,7 @@ enum { MASK_OUT_OF_ORDER_DATA_DELIVERY_SUPPORT = 0x02000000, MASK_UIC_DME_TEST_MODE_SUPPORT = 0x04000000, MASK_CRYPTO_SUPPORT = 0x10000000, + MASK_LSDB_SUPPORT = 0x20000000, MASK_MCQ_SUPPORT = 0x40000000, }; -- cgit v1.2.3 From af22bbe1f4a514c80b89a27252beef033168f4e9 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 16 Jul 2024 13:35:48 +0200 Subject: virtio: create admin queues alongside other virtqueues Admin virtqueue is just another virtqueue nothing that special about it. The current implementation treats it somehow separate though in terms of creation and deletion. Unify the admin virtqueue creation and deletion flows to be aligned with the rest of virtqueues, creating it from vp_find_vqs_*() helpers. Let the admin virtqueue to be deleted by vp_del_vqs() as the rest. Call vp_find_one_vq_msix() with slow_path argument being "true" to make sure that in case of limited interrupt vectors the config vector is used for admin queue. Signed-off-by: Jiri Pirko Message-Id: <20240716113552.80599-10-jiri@resnulli.us> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index ab4b9a3fef6b..169c7d367fac 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -104,8 +104,6 @@ struct virtqueue_info { * Returns 0 on success or error status * If disable_vq_and_reset is set, then enable_vq_after_reset must also be * set. - * @create_avq: create admin virtqueue resource. - * @destroy_avq: destroy admin virtqueue resource. */ struct virtio_config_ops { void (*get)(struct virtio_device *vdev, unsigned offset, @@ -133,8 +131,6 @@ struct virtio_config_ops { struct virtio_shm_region *region, u8 id); int (*disable_vq_and_reset)(struct virtqueue *vq); int (*enable_vq_after_reset)(struct virtqueue *vq); - int (*create_avq)(struct virtio_device *vdev); - void (*destroy_avq)(struct virtio_device *vdev); }; /* If driver didn't advertise the feature, it will never appear. */ -- cgit v1.2.3 From 4c3b54af907e709609d3d8beca92d65e2f0cfd83 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 16 Jul 2024 13:35:51 +0200 Subject: virtio_pci_modern: use completion instead of busy loop to wait on admin cmd result Currently, the code waits in a busy loop on every admin virtqueue issued command to get a reply. That prevents callers from issuing multiple commands in parallel. To overcome this limitation, introduce a virtqueue event callback for admin virtqueue. For every issued command, use completion mechanism to wait on a reply. In the event callback, trigger the completion is done for every incoming reply. Alongside with that, introduce a spin lock to protect the admin virtqueue operations. Signed-off-by: Jiri Pirko Message-Id: <20240716113552.80599-13-jiri@resnulli.us> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 96fea920873b..999ff5934392 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -10,6 +10,7 @@ #include #include #include +#include /** * struct virtqueue - a queue to register buffers for sending or receiving. @@ -109,6 +110,8 @@ struct virtio_admin_cmd { __le64 group_member_id; struct scatterlist *data_sg; struct scatterlist *result_sg; + struct completion completion; + int ret; }; /** -- cgit v1.2.3 From e1a261ba599eec97e1c5c7760d5c3698fc24e6a6 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Tue, 2 Jul 2024 14:26:04 +0200 Subject: printk: Add a short description string to kmsg_dump() kmsg_dump doesn't forward the panic reason string to the kmsg_dumper callback. This patch adds a new struct kmsg_dump_detail, that will hold the reason and description, and pass it to the dump() callback. To avoid updating all kmsg_dump() call, it adds a kmsg_dump_desc() function and a macro for backward compatibility. I've written this for drm_panic, but it can be useful for other kmsg_dumper. It allows to see the panic reason, like "sysrq triggered crash" or "VFS: Unable to mount root fs on xxxx" on the drm panic screen. v2: * Use a struct kmsg_dump_detail to hold the reason and description pointer, for more flexibility if we want to add other parameters. (Kees Cook) * Fix powerpc/nvram_64 build, as I didn't update the forward declaration of oops_to_nvram() Signed-off-by: Jocelyn Falempe Acked-by: Petr Mladek Acked-by: Michael Ellerman (powerpc) Acked-by: Kees Cook Link: https://patchwork.freedesktop.org/patch/msgid/20240702122639.248110-1-jfalempe@redhat.com --- include/linux/kmsg_dump.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 906521c2329c..6055fc969877 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -39,6 +39,17 @@ struct kmsg_dump_iter { u64 next_seq; }; +/** + * struct kmsg_dump_detail - kernel crash detail + * @reason: reason for the crash, see kmsg_dump_reason. + * @description: optional short string, to provide additional information. + */ + +struct kmsg_dump_detail { + enum kmsg_dump_reason reason; + const char *description; +}; + /** * struct kmsg_dumper - kernel crash message dumper structure * @list: Entry in the dumper list (private) @@ -49,13 +60,13 @@ struct kmsg_dump_iter { */ struct kmsg_dumper { struct list_head list; - void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); + void (*dump)(struct kmsg_dumper *dumper, struct kmsg_dump_detail *detail); enum kmsg_dump_reason max_reason; bool registered; }; #ifdef CONFIG_PRINTK -void kmsg_dump(enum kmsg_dump_reason reason); +void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc); bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, char *line, size_t size, size_t *len); @@ -71,7 +82,7 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper); const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason); #else -static inline void kmsg_dump(enum kmsg_dump_reason reason) +static inline void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc) { } @@ -107,4 +118,9 @@ static inline const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) } #endif +static inline void kmsg_dump(enum kmsg_dump_reason reason) +{ + kmsg_dump_desc(reason, NULL); +} + #endif /* _LINUX_KMSG_DUMP_H */ -- cgit v1.2.3 From 7108b4a589cd6d3a2c1276fd610b3500f46de66a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 10 Jul 2024 15:02:27 -0700 Subject: drm/xe/uapi: Expose SIMD16 EU mask in topology query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PVC, Xe2 and later platforms have 16-wide EUs. We were implicitly reporting for PVC the number of 16-wide EUs without giving userspace any hint that they were different than for other platforms. Xe2 and later also have 16-wide, but in those cases the reported number would correspond to the 8-wide count. To avoid confusion and make sure the right number is used by userspace depending on the platform, add a new item to the topology query and drop the one that is not available. The new mask reported for both PVC and Xe2 should now match the numbers reported via hwconfig. v2: Use a different topo item with EU type in its name to report the new mask instead of adding the type itself as the item (Matt Roper) Reviewed-by: Matt Roper Acked-by: José Roberto de Souza Acked-by: Mateusz Jablonski Acked-by: Wenbin Lu Acked-by: Effie Yu Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240710220446.2169797-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- include/uapi/drm/xe_drm.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 19619d4952a8..29425d7fdc77 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -517,7 +517,14 @@ struct drm_xe_query_gt_list { * available per Dual Sub Slices (DSS). For example a query response * containing the following in mask: * ``EU_PER_DSS ff ff 00 00 00 00 00 00`` - * means each DSS has 16 EU. + * means each DSS has 16 SIMD8 EUs. This type may be omitted if device + * doesn't have SIMD8 EUs. + * - %DRM_XE_TOPO_SIMD16_EU_PER_DSS - To query the mask of SIMD16 Execution + * Units (EU) available per Dual Sub Slices (DSS). For example a query + * response containing the following in mask: + * ``SIMD16_EU_PER_DSS ff ff 00 00 00 00 00 00`` + * means each DSS has 16 SIMD16 EUs. This type may be omitted if device + * doesn't have SIMD16 EUs. */ struct drm_xe_query_topology_mask { /** @gt_id: GT ID the mask is associated with */ @@ -527,6 +534,7 @@ struct drm_xe_query_topology_mask { #define DRM_XE_TOPO_DSS_COMPUTE 2 #define DRM_XE_TOPO_L3_BANK 3 #define DRM_XE_TOPO_EU_PER_DSS 4 +#define DRM_XE_TOPO_SIMD16_EU_PER_DSS 5 /** @type: type of mask */ __u16 type; -- cgit v1.2.3 From d20a9f568f99591886ec73b80ff7283486710643 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Wed, 17 Jul 2024 10:48:40 +0200 Subject: fbcon: Add an option to disable fbcon in panic This is required to avoid conflict between DRM_PANIC, and fbcon. If a drm device already handle panic with drm_panic, it should set the skip_panic field in fb_info, so that fbcon will stay quiet, and not overwrite the panic_screen. Signed-off-by: Jocelyn Falempe Reviewed-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240717090102.968152-3-jfalempe@redhat.com --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index db7d97b10964..865dad03e73e 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -510,6 +510,7 @@ struct fb_info { void *par; bool skip_vt_switch; /* no VT switch on suspend/resume required */ + bool skip_panic; /* Do not write to the fb after a panic */ }; /* This will go away -- cgit v1.2.3 From a838e5dca63d1dc701e63b2b1176943c57485c45 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Mon, 15 Jul 2024 21:05:32 +0800 Subject: quota: remove unneeded return value of register_quota_format The register_quota_format always returns 0, simply remove unneeded return value. Link: https://patch.msgid.link/20240715130534.2112678-3-shikemeng@huaweicloud.com Signed-off-by: Kemeng Shi Reviewed-by: Joseph Qi Signed-off-by: Jan Kara --- include/linux/quota.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/quota.h b/include/linux/quota.h index 07071e64abf3..89a0d83ddad0 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -526,7 +526,7 @@ struct quota_info { const struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ }; -int register_quota_format(struct quota_format_type *fmt); +void register_quota_format(struct quota_format_type *fmt); void unregister_quota_format(struct quota_format_type *fmt); struct quota_module_name { -- cgit v1.2.3 From d5e79eeba3086a52593b295ac4bf6eddd64d4aad Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Sat, 20 Jul 2024 15:15:42 +0800 Subject: dma-buf: heaps: Deduplicate docs and adopt common format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The docs for dma_heap_get_name were incorrect, and since they were duplicated in the header they were wrong there too. The docs formatting was inconsistent so I tried to make it more consistent across functions since I'm already in here doing cleanup. Remove multiple unused includes and alphabetize. Signed-off-by: T.J. Mercier Signed-off-by: Yong Wu [Yong: Just add a comment for "priv" to mute build warning] Signed-off-by: Yunfei Dong Link: https://patchwork.freedesktop.org/patch/msgid/20240720071606.27930-5-yunfei.dong@mediatek.com Signed-off-by: Christian König --- include/linux/dma-heap.h | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h index 064bad725061..27d15f60950a 100644 --- a/include/linux/dma-heap.h +++ b/include/linux/dma-heap.h @@ -9,14 +9,13 @@ #ifndef _DMA_HEAPS_H #define _DMA_HEAPS_H -#include #include struct dma_heap; /** * struct dma_heap_ops - ops to operate on a given heap - * @allocate: allocate dmabuf and return struct dma_buf ptr + * @allocate: allocate dmabuf and return struct dma_buf ptr * * allocate returns dmabuf on success, ERR_PTR(-errno) on error. */ @@ -41,28 +40,10 @@ struct dma_heap_export_info { void *priv; }; -/** - * dma_heap_get_drvdata() - get per-heap driver data - * @heap: DMA-Heap to retrieve private data for - * - * Returns: - * The per-heap data for the heap. - */ void *dma_heap_get_drvdata(struct dma_heap *heap); -/** - * dma_heap_get_name() - get heap name - * @heap: DMA-Heap to retrieve private data for - * - * Returns: - * The char* for the heap name. - */ const char *dma_heap_get_name(struct dma_heap *heap); -/** - * dma_heap_add - adds a heap to dmabuf heaps - * @exp_info: information needed to register this heap - */ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info); #endif /* _DMA_HEAPS_H */ -- cgit v1.2.3 From 7214da0ed2220a2b9ad22aa77a5974cdd2a62799 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 14 Jul 2024 23:55:02 +0300 Subject: drm/virtio: Add DRM capset definition Define DRM native context capset in the VirtIO-GPU protocol header. Signed-off-by: Dmitry Osipenko Reviewed-by: Rob Clark Reviewed-by: Pierre-Eric Pelloux-Prayer Link: https://patchwork.freedesktop.org/patch/msgid/20240714205502.3409718-1-dmitry.osipenko@collabora.com --- include/uapi/linux/virtio_gpu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 0e21f3998108..bf2c9cabd207 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -311,6 +311,7 @@ struct virtio_gpu_cmd_submit { #define VIRTIO_GPU_CAPSET_VIRGL2 2 /* 3 is reserved for gfxstream */ #define VIRTIO_GPU_CAPSET_VENUS 4 +#define VIRTIO_GPU_CAPSET_DRM 6 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ struct virtio_gpu_get_capset_info { -- cgit v1.2.3 From 83b501c1799a96a41e163973e88826253ffadfb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 19 Jul 2024 17:24:14 +0200 Subject: drm/scheduler: remove full_recover from drm_sched_start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was basically just another one of amdgpus hacks. The parameter allowed to restart the scheduler without turning fence signaling on again. That this is absolutely not a good idea should be obvious by now since the fences will then just sit there and never signal. While at it cleanup the code a bit. Signed-off-by: Christian König Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240722083816.99685-1-christian.koenig@amd.com --- include/drm/gpu_scheduler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 5acc64954a88..fe8edb917360 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -579,7 +579,7 @@ bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched); void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched); void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched); void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad); -void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery); +void drm_sched_start(struct drm_gpu_scheduler *sched); void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched); void drm_sched_increase_karma(struct drm_sched_job *bad); void drm_sched_reset_karma(struct drm_sched_job *bad); -- cgit v1.2.3 From e06b71b2313a00579ba64a1cc43ad29d64cb8d4c Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 21 May 2024 13:22:15 -0400 Subject: drm/amdkfd: allow users to target recommended SDMA engines Certain GPUs have better copy performance over xGMI on specific SDMA engines depending on the source and destination GPU. Allow users to create SDMA queues on these recommended engines. Close to 2x overall performance has been observed with this optimization. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 285a36601dc9..71a7ce5f2d4c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -42,9 +42,10 @@ * - 1.14 - Update kfd_event_data * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl * - 1.16 - Add contiguous VRAM allocation flag + * - 1.17 - Add SDMA queue creation with target SDMA engine ID */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 16 +#define KFD_IOCTL_MINOR_VERSION 17 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -56,6 +57,7 @@ struct kfd_ioctl_get_version_args { #define KFD_IOC_QUEUE_TYPE_SDMA 0x1 #define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 #define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 +#define KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID 0x4 #define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PRIORITY 15 @@ -78,6 +80,8 @@ struct kfd_ioctl_create_queue_args { __u64 ctx_save_restore_address; /* to KFD */ __u32 ctx_save_restore_size; /* to KFD */ __u32 ctl_stack_size; /* to KFD */ + __u32 sdma_engine_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_destroy_queue_args { -- cgit v1.2.3 From 564429a6bd8d26065b2cccffcaa9485359f74de7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Jul 2024 18:27:47 -0400 Subject: KVM: rename CONFIG_HAVE_KVM_GMEM_* to CONFIG_HAVE_KVM_ARCH_GMEM_* Add "ARCH" to the symbols; shortly, the "prepare" phase will include both the arch-independent step to clear out contents left in the page by the host, and the arch-dependent step enabled by CONFIG_HAVE_KVM_GMEM_PREPARE. For consistency do the same for CONFIG_HAVE_KVM_GMEM_INVALIDATE as well. Reviewed-by: Michael Roth Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 689e8be873a7..344d90771844 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2445,7 +2445,7 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, } #endif /* CONFIG_KVM_PRIVATE_MEM */ -#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); #endif @@ -2477,7 +2477,7 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque); -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); #endif -- cgit v1.2.3 From 7239ed74677af143857d1a96d402476446a0995a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Jul 2024 18:27:51 -0400 Subject: KVM: remove kvm_arch_gmem_prepare_needed() It is enough to return 0 if a guest need not do any preparation. This is in fact how sev_gmem_prepare() works for non-SNP guests, and it extends naturally to Intel hosts: the x86 callback for gmem_prepare is optional and returns 0 if not defined. Reviewed-by: Michael Roth Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 344d90771844..45373d42f314 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2447,7 +2447,6 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); -bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); #endif /** -- cgit v1.2.3 From 4b5f67120a88c713b82907d55a767693382e9e9d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Jul 2024 18:27:54 -0400 Subject: KVM: extend kvm_range_has_memory_attributes() to check subset of attributes While currently there is no other attribute than KVM_MEMORY_ATTRIBUTE_PRIVATE, KVM code such as kvm_mem_is_private() is written to expect their existence. Allow using kvm_range_has_memory_attributes() as a multi-page version of kvm_mem_is_private(), without it breaking later when more attributes are introduced. Reviewed-by: Michael Roth Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 45373d42f314..c223b97df03e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2414,7 +2414,7 @@ static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn } bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, - unsigned long attrs); + unsigned long mask, unsigned long attrs); bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, -- cgit v1.2.3 From e4ee5447927377c55777b73fe497a2455a25f948 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Jul 2024 18:27:55 -0400 Subject: KVM: guest_memfd: let kvm_gmem_populate() operate only on private gfns This check is currently performed by sev_gmem_post_populate(), but it applies to all callers of kvm_gmem_populate(): the point of the function is that the memory is being encrypted and some work has to be done on all the gfns in order to encrypt them. Therefore, check the KVM_MEMORY_ATTRIBUTE_PRIVATE attribute prior to invoking the callback, and stop the operation if a shared page is encountered. Because CONFIG_KVM_PRIVATE_MEM in principle does not require attributes, this makes kvm_gmem_populate() depend on CONFIG_KVM_GENERIC_PRIVATE_MEM (which does require them). Reviewed-by: Michael Roth Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c223b97df03e..79a6b1a63027 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2449,6 +2449,7 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); #endif +#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM /** * kvm_gmem_populate() - Populate/prepare a GPA range with guest data * @@ -2475,6 +2476,7 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque); +#endif #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); -- cgit v1.2.3 From 0d5040e406d2c4404d26b841c4aa34cec0bf1088 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Thu, 25 Jul 2024 16:51:08 -0400 Subject: drm/vblank: add dynamic per-crtc vblank configuration support We would like to be able to enable vblank_disable_immediate unconditionally, however there are a handful of cases where a small off delay is necessary (e.g. with PSR enabled). So, we would like to be able to adjust the vblank off delay and disable imminent values dynamically for a given CRTC. Since, it will allow drivers to apply static screen optimizations more quickly and consequently allow users to benefit more so from the power savings afforded by the aforementioned optimizations, while avoiding issues in cases where an off delay is still warranted. In particular, the PSR case requires a small off delay of 2 frames, otherwise display firmware isn't able to keep up with all of the requests made to amdgpu. So, introduce drm_crtc_vblank_on_config() which is like drm_crtc_vblank_on(), but it allows drivers to specify the vblank CRTC configuration before enabling vblanking support for a given CRTC. Signed-off-by: Hamza Mahfooz Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240725205109.209743-1-hamza.mahfooz@amd.com --- include/drm/drm_device.h | 5 +++-- include/drm/drm_vblank.h | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index 63767cf24371..c91f87b5242d 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -213,8 +213,9 @@ struct drm_device { * This can be set to true it the hardware has a working vblank counter * with high-precision timestamping (otherwise there are races) and the * driver uses drm_crtc_vblank_on() and drm_crtc_vblank_off() - * appropriately. See also @max_vblank_count and - * &drm_crtc_funcs.get_vblank_counter. + * appropriately. Also, see @max_vblank_count, + * &drm_crtc_funcs.get_vblank_counter and + * &drm_vblank_crtc_config.disable_immediate. */ bool vblank_disable_immediate; diff --git a/include/drm/drm_vblank.h b/include/drm/drm_vblank.h index c8f829b4307c..151ab1e85b1b 100644 --- a/include/drm/drm_vblank.h +++ b/include/drm/drm_vblank.h @@ -78,6 +78,31 @@ struct drm_pending_vblank_event { } event; }; +/** + * struct drm_vblank_crtc_config - vblank configuration for a CRTC + */ +struct drm_vblank_crtc_config { + /** + * @offdelay_ms: Vblank off delay in ms, used to determine how long + * &drm_vblank_crtc.disable_timer waits before disabling. + * + * Defaults to the value of drm_vblank_offdelay in drm_crtc_vblank_on(). + */ + int offdelay_ms; + + /** + * @disable_immediate: See &drm_device.vblank_disable_immediate + * for the exact semantics of immediate vblank disabling. + * + * Additionally, this tracks the disable immediate value per crtc, just + * in case it needs to differ from the default value for a given device. + * + * Defaults to the value of &drm_device.vblank_disable_immediate in + * drm_crtc_vblank_on(). + */ + bool disable_immediate; +}; + /** * struct drm_vblank_crtc - vblank tracking for a CRTC * @@ -99,8 +124,8 @@ struct drm_vblank_crtc { wait_queue_head_t queue; /** * @disable_timer: Disable timer for the delayed vblank disabling - * hysteresis logic. Vblank disabling is controlled through the - * drm_vblank_offdelay module option and the setting of the + * hysteresis logic. Vblank disabling is controlled through + * &drm_vblank_crtc_config.offdelay_ms and the setting of the * &drm_device.max_vblank_count value. */ struct timer_list disable_timer; @@ -198,6 +223,12 @@ struct drm_vblank_crtc { */ struct drm_display_mode hwmode; + /** + * @config: Stores vblank configuration values for a given CRTC. + * Also, see drm_crtc_vblank_on_config(). + */ + struct drm_vblank_crtc_config config; + /** * @enabled: Tracks the enabling state of the corresponding &drm_crtc to * avoid double-disabling and hence corrupting saved state. Needed by @@ -247,6 +278,8 @@ void drm_wait_one_vblank(struct drm_device *dev, unsigned int pipe); void drm_crtc_wait_one_vblank(struct drm_crtc *crtc); void drm_crtc_vblank_off(struct drm_crtc *crtc); void drm_crtc_vblank_reset(struct drm_crtc *crtc); +void drm_crtc_vblank_on_config(struct drm_crtc *crtc, + const struct drm_vblank_crtc_config *config); void drm_crtc_vblank_on(struct drm_crtc *crtc); u64 drm_crtc_accurate_vblank_count(struct drm_crtc *crtc); void drm_crtc_vblank_restore(struct drm_crtc *crtc); -- cgit v1.2.3 From 23a55f4492fcf868d068da31a2cd30c15f46207d Mon Sep 17 00:00:00 2001 From: Ofir Gal Date: Thu, 18 Jul 2024 11:45:12 +0300 Subject: net: introduce helper sendpages_ok() Network drivers are using sendpage_ok() to check the first page of an iterator in order to disable MSG_SPLICE_PAGES. The iterator can represent list of contiguous pages. When MSG_SPLICE_PAGES is enabled skb_splice_from_iter() is being used, it requires all pages in the iterator to be sendable. Therefore it needs to check that each page is sendable. The patch introduces a helper sendpages_ok(), it returns true if all the contiguous pages are sendable. Drivers who want to send contiguous pages with MSG_SPLICE_PAGES may use this helper to check whether the page list is OK. If the helper does not return true, the driver should remove MSG_SPLICE_PAGES flag. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Ofir Gal Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20240718084515.3833733-2-ofir.gal@volumez.com Signed-off-by: Jens Axboe --- include/linux/net.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 688320b79fcc..b75bc534c1b3 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -322,6 +322,25 @@ static inline bool sendpage_ok(struct page *page) return !PageSlab(page) && page_count(page) >= 1; } +/* + * Check sendpage_ok on contiguous pages. + */ +static inline bool sendpages_ok(struct page *page, size_t len, size_t offset) +{ + struct page *p = page + (offset >> PAGE_SHIFT); + size_t count = 0; + + while (count < len) { + if (!sendpage_ok(p)) + return false; + + p++; + count += PAGE_SIZE; + } + + return true; +} + int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, -- cgit v1.2.3 From 7543ae2269a855683a39af57048035f44bc8ef9c Mon Sep 17 00:00:00 2001 From: Wouter Verhelst Date: Thu, 25 Jul 2024 18:45:36 +0200 Subject: nbd: add support for rotational devices The NBD protocol defines the flag NBD_FLAG_ROTATIONAL to flag that the export in use should be treated as a rotational device. Add support for that flag to the kernel driver. Signed-off-by: Wouter Verhelst Reviewed-by: Eric Blake Link: https://lore.kernel.org/r/20240725164536.1275851-1-w@uter.be Signed-off-by: Jens Axboe --- include/uapi/linux/nbd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index 80ce0ef43afd..d75215f2c675 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -51,8 +51,9 @@ enum { #define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */ #define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */ #define NBD_FLAG_SEND_FUA (1 << 3) /* send FUA (forced unit access) */ -/* there is a gap here to match userspace */ +#define NBD_FLAG_ROTATIONAL (1 << 4) /* device is rotational */ #define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ +/* there is a gap here to match userspace */ #define NBD_FLAG_CAN_MULTI_CONN (1 << 8) /* Server supports multiple connections per export. */ /* values for cmd flags in the upper 16 bits of request type */ -- cgit v1.2.3 From 1a251f52cfdc417c84411a056bc142cbd77baef4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Jul 2024 15:49:18 -0700 Subject: minmax: make generic MIN() and MAX() macros available everywhere This just standardizes the use of MIN() and MAX() macros, with the very traditional semantics. The goal is to use these for C constant expressions and for top-level / static initializers, and so be able to simplify the min()/max() macros. These macro names were used by various kernel code - they are very traditional, after all - and all such users have been fixed up, with a few different approaches: - trivial duplicated macro definitions have been removed Note that 'trivial' here means that it's obviously kernel code that already included all the major kernel headers, and thus gets the new generic MIN/MAX macros automatically. - non-trivial duplicated macro definitions are guarded with #ifndef This is the "yes, they define their own versions, but no, the include situation is not entirely obvious, and maybe they don't get the generic version automatically" case. - strange use case #1 A couple of drivers decided that the way they want to describe their versioning is with #define MAJ 1 #define MIN 2 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) which adds zero value and I just did my Alexander the Great impersonation, and rewrote that pointless Gordian knot as #define DRV_VERSION "1.2" instead. - strange use case #2 A couple of drivers thought that it's a good idea to have a random 'MIN' or 'MAX' define for a value or index into a table, rather than the traditional macro that takes arguments. These values were re-written as C enum's instead. The new function-line macros only expand when followed by an open parenthesis, and thus don't clash with enum use. Happily, there weren't really all that many of these cases, and a lot of users already had the pattern of using '#ifndef' guarding (or in one case just using '#undef MIN') before defining their own private version that does the same thing. I left such cases alone. Cc: David Laight Cc: Lorenzo Stoakes Signed-off-by: Linus Torvalds --- include/linux/minmax.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 9c2848abc804..fc384714da45 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -277,6 +277,8 @@ static inline bool in_range32(u32 val, u32 start, u32 len) * Use these carefully: no type checking, and uses the arguments * multiple times. Use for obvious constants only. */ +#define MIN(a,b) __cmp(min,a,b) +#define MAX(a,b) __cmp(max,a,b) #define MIN_T(type,a,b) __cmp(min,(type)(a),(type)(b)) #define MAX_T(type,a,b) __cmp(max,(type)(a),(type)(b)) -- cgit v1.2.3 From aaa5e1aa39074fb466f6ef3df4de6903741dfeec Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 26 Jul 2024 17:52:36 +0200 Subject: ASoC: Use __counted_by() annotation for snd_soc_pcm_runtime The struct snd_soc_pcm_runtime has a flex array of snd_soc_component objects at its end, and the size is kept in num_components field. We can add __counted_by() annotation for compiler's assistance to catch array overflows. A slight additional change is the assignment of rtd->components[]; the array counter has to be incremented at first for avoiding false-positive reports from compilers. Also, the allocation size of snd_soc_pcm_runtime is cleaned up with the standard struct_size() helper, too. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240726155237.21961-1-tiwai@suse.de Signed-off-by: Mark Brown --- include/sound/soc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index a8e66bbf932b..e844f6afc5b5 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1209,8 +1209,9 @@ struct snd_soc_pcm_runtime { bool initialized; + /* CPU/Codec/Platform */ int num_components; - struct snd_soc_component *components[]; /* CPU/Codec/Platform */ + struct snd_soc_component *components[] __counted_by(num_components); }; /* see soc_new_pcm_runtime() */ -- cgit v1.2.3 From f58872f45c36ded048bccc22701b0986019c24d8 Mon Sep 17 00:00:00 2001 From: Marcelo Schmitt Date: Fri, 12 Jul 2024 16:20:36 -0300 Subject: spi: Enable controllers to extend the SPI protocol with MOSI idle configuration The behavior of an SPI controller data output line (SDO or MOSI or COPI (Controller Output Peripheral Input) for disambiguation) is usually not specified when the controller is not clocking out data on SCLK edges. However, there do exist SPI peripherals that require specific MOSI line state when data is not being clocked out of the controller. Conventional SPI controllers may set the MOSI line on SCLK edges then bring it low when no data is going out or leave the line the state of the last transfer bit. More elaborated controllers are capable to set the MOSI idle state according to different configurable levels and thus are more suitable for interfacing with demanding peripherals. Add SPI mode bits to allow peripherals to request explicit MOSI idle state when needed. When supporting a particular MOSI idle configuration, the data output line state is expected to remain at the configured level when the controller is not clocking out data. When a device that needs a specific MOSI idle state is identified, its driver should request the MOSI idle configuration by setting the proper SPI mode bit. Acked-by: Nuno Sa Reviewed-by: Jonathan Cameron Reviewed-by: David Lechner Tested-by: David Lechner Signed-off-by: Marcelo Schmitt Link: https://patch.msgid.link/9802160b5e5baed7f83ee43ac819cb757a19be55.1720810545.git.marcelo.schmitt@analog.com Signed-off-by: Mark Brown --- include/uapi/linux/spi/spi.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/spi/spi.h b/include/uapi/linux/spi/spi.h index ca56e477d161..ee4ac812b8f8 100644 --- a/include/uapi/linux/spi/spi.h +++ b/include/uapi/linux/spi/spi.h @@ -28,7 +28,8 @@ #define SPI_RX_OCTAL _BITUL(14) /* receive with 8 wires */ #define SPI_3WIRE_HIZ _BITUL(15) /* high impedance turnaround */ #define SPI_RX_CPHA_FLIP _BITUL(16) /* flip CPHA on Rx only xfer */ -#define SPI_MOSI_IDLE_LOW _BITUL(17) /* leave mosi line low when idle */ +#define SPI_MOSI_IDLE_LOW _BITUL(17) /* leave MOSI line low when idle */ +#define SPI_MOSI_IDLE_HIGH _BITUL(18) /* leave MOSI line high when idle */ /* * All the bits defined above should be covered by SPI_MODE_USER_MASK. @@ -38,6 +39,6 @@ * These bits must not overlap. A static assert check should make sure of that. * If adding extra bits, make sure to increase the bit index below as well. */ -#define SPI_MODE_USER_MASK (_BITUL(18) - 1) +#define SPI_MODE_USER_MASK (_BITUL(19) - 1) #endif /* _UAPI_SPI_H */ -- cgit v1.2.3 From 320f6693097bf89d67f9cabad24a2b911e23073f Mon Sep 17 00:00:00 2001 From: Marcelo Schmitt Date: Fri, 12 Jul 2024 16:21:03 -0300 Subject: spi: bitbang: Implement support for MOSI idle state configuration Some SPI peripherals may require strict MOSI line state when the controller is not clocking out data. Implement support for MOSI idle state configuration (low or high) by setting the data output line level on controller setup and after transfers. Bitbang operations now call controller specific set_mosi_idle() callback to set MOSI to its idle state. The MOSI line is kept at its idle state if no tx buffer is provided. Acked-by: Nuno Sa Reviewed-by: David Lechner Reviewed-by: Jonathan Cameron Signed-off-by: Marcelo Schmitt Link: https://patch.msgid.link/de61a600b56ed9cb714d5ea87afa88948e70041e.1720810545.git.marcelo.schmitt@analog.com Signed-off-by: Mark Brown --- include/linux/spi/spi_bitbang.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index d4cb83195f7a..c92cd43a47f4 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -24,6 +24,7 @@ struct spi_bitbang { #define BITBANG_CS_ACTIVE 1 /* normally nCS, active low */ #define BITBANG_CS_INACTIVE 0 + void (*set_mosi_idle)(struct spi_device *spi); /* txrx_bufs() may handle dma mapping for transfers that don't * already have one (transfer.{tx,rx}_dma is zero), or use PIO */ -- cgit v1.2.3 From d65d411c9259a2499081e1e7ed91088232666b57 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 25 Jul 2023 12:08:50 +0100 Subject: treewide: context_tracking: Rename CONTEXT_* into CT_STATE_* Context tracking state related symbols currently use a mix of the CONTEXT_ (e.g. CONTEXT_KERNEL) and CT_SATE_ (e.g. CT_STATE_MASK) prefixes. Clean up the naming and make the ctx_state enum use the CT_STATE_ prefix. Suggested-by: Frederic Weisbecker Signed-off-by: Valentin Schneider Acked-by: Frederic Weisbecker Acked-by: Thomas Gleixner Signed-off-by: Neeraj Upadhyay --- include/linux/context_tracking.h | 16 ++++++++-------- include/linux/context_tracking_state.h | 20 ++++++++++---------- include/linux/entry-common.h | 2 +- 3 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 6e76b9dba00e..28fcfa184903 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -26,26 +26,26 @@ extern void user_exit_callable(void); static inline void user_enter(void) { if (context_tracking_enabled()) - ct_user_enter(CONTEXT_USER); + ct_user_enter(CT_STATE_USER); } static inline void user_exit(void) { if (context_tracking_enabled()) - ct_user_exit(CONTEXT_USER); + ct_user_exit(CT_STATE_USER); } /* Called with interrupts disabled. */ static __always_inline void user_enter_irqoff(void) { if (context_tracking_enabled()) - __ct_user_enter(CONTEXT_USER); + __ct_user_enter(CT_STATE_USER); } static __always_inline void user_exit_irqoff(void) { if (context_tracking_enabled()) - __ct_user_exit(CONTEXT_USER); + __ct_user_exit(CT_STATE_USER); } static inline enum ctx_state exception_enter(void) @@ -57,7 +57,7 @@ static inline enum ctx_state exception_enter(void) return 0; prev_ctx = __ct_state(); - if (prev_ctx != CONTEXT_KERNEL) + if (prev_ctx != CT_STATE_KERNEL) ct_user_exit(prev_ctx); return prev_ctx; @@ -67,7 +67,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) { if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK) && context_tracking_enabled()) { - if (prev_ctx != CONTEXT_KERNEL) + if (prev_ctx != CT_STATE_KERNEL) ct_user_enter(prev_ctx); } } @@ -75,7 +75,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) static __always_inline bool context_tracking_guest_enter(void) { if (context_tracking_enabled()) - __ct_user_enter(CONTEXT_GUEST); + __ct_user_enter(CT_STATE_GUEST); return context_tracking_enabled_this_cpu(); } @@ -83,7 +83,7 @@ static __always_inline bool context_tracking_guest_enter(void) static __always_inline void context_tracking_guest_exit(void) { if (context_tracking_enabled()) - __ct_user_exit(CONTEXT_GUEST); + __ct_user_exit(CT_STATE_GUEST); } #define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond)) diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index bbff5f7f8803..f1c53125edee 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -10,18 +10,18 @@ #define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) enum ctx_state { - CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */ - CONTEXT_KERNEL = 0, - CONTEXT_IDLE = 1, - CONTEXT_USER = 2, - CONTEXT_GUEST = 3, - CONTEXT_MAX = 4, + CT_STATE_DISABLED = -1, /* returned by ct_state() if unknown */ + CT_STATE_KERNEL = 0, + CT_STATE_IDLE = 1, + CT_STATE_USER = 2, + CT_STATE_GUEST = 3, + CT_STATE_MAX = 4, }; /* Even value for idle, else odd. */ -#define RCU_DYNTICKS_IDX CONTEXT_MAX +#define RCU_DYNTICKS_IDX CT_STATE_MAX -#define CT_STATE_MASK (CONTEXT_MAX - 1) +#define CT_STATE_MASK (CT_STATE_MAX - 1) #define CT_DYNTICKS_MASK (~CT_STATE_MASK) struct context_tracking { @@ -123,14 +123,14 @@ static inline bool context_tracking_enabled_this_cpu(void) * * Returns the current cpu's context tracking state if context tracking * is enabled. If context tracking is disabled, returns - * CONTEXT_DISABLED. This should be used primarily for debugging. + * CT_STATE_DISABLED. This should be used primarily for debugging. */ static __always_inline int ct_state(void) { int ret; if (!context_tracking_enabled()) - return CONTEXT_DISABLED; + return CT_STATE_DISABLED; preempt_disable(); ret = __ct_state(); diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index b0fb775a600d..1e50cdb83ae5 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -108,7 +108,7 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs) arch_enter_from_user_mode(regs); lockdep_hardirqs_off(CALLER_ADDR0); - CT_WARN_ON(__ct_state() != CONTEXT_USER); + CT_WARN_ON(__ct_state() != CT_STATE_USER); user_exit_irqoff(); instrumentation_begin(); -- cgit v1.2.3 From 4aa35e0db6d758e8f952ed30d7ac3117c376562c Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 25 Jul 2023 12:19:01 +0100 Subject: context_tracking, rcu: Rename RCU_DYNTICKS_IDX into CT_RCU_WATCHING The symbols relating to the CT_STATE part of context_tracking.state are now all prefixed with CT_STATE. The RCU dynticks counter part of that atomic variable still involves symbols with different prefixes, align them all to be prefixed with CT_RCU_WATCHING. Suggested-by: "Paul E. McKenney" Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Acked-by: Thomas Gleixner Signed-off-by: Neeraj Upadhyay --- include/linux/context_tracking.h | 6 +++--- include/linux/context_tracking_state.h | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 28fcfa184903..a6c36780cc3b 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -119,7 +119,7 @@ extern void ct_idle_exit(void); */ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) { - return !(raw_atomic_read(this_cpu_ptr(&context_tracking.state)) & RCU_DYNTICKS_IDX); + return !(raw_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_RCU_WATCHING); } /* @@ -142,7 +142,7 @@ static __always_inline bool warn_rcu_enter(void) preempt_disable_notrace(); if (rcu_dynticks_curr_cpu_in_eqs()) { ret = true; - ct_state_inc(RCU_DYNTICKS_IDX); + ct_state_inc(CT_RCU_WATCHING); } return ret; @@ -151,7 +151,7 @@ static __always_inline bool warn_rcu_enter(void) static __always_inline void warn_rcu_exit(bool rcu) { if (rcu) - ct_state_inc(RCU_DYNTICKS_IDX); + ct_state_inc(CT_RCU_WATCHING); preempt_enable_notrace(); } diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index f1c53125edee..94d6a935af3b 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -18,11 +18,11 @@ enum ctx_state { CT_STATE_MAX = 4, }; -/* Even value for idle, else odd. */ -#define RCU_DYNTICKS_IDX CT_STATE_MAX +/* Odd value for watching, else even. */ +#define CT_RCU_WATCHING CT_STATE_MAX #define CT_STATE_MASK (CT_STATE_MAX - 1) -#define CT_DYNTICKS_MASK (~CT_STATE_MASK) +#define CT_RCU_WATCHING_MASK (~CT_STATE_MASK) struct context_tracking { #ifdef CONFIG_CONTEXT_TRACKING_USER @@ -58,21 +58,21 @@ static __always_inline int __ct_state(void) #ifdef CONFIG_CONTEXT_TRACKING_IDLE static __always_inline int ct_dynticks(void) { - return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_DYNTICKS_MASK; + return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_RCU_WATCHING_MASK; } static __always_inline int ct_dynticks_cpu(int cpu) { struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); - return atomic_read(&ct->state) & CT_DYNTICKS_MASK; + return atomic_read(&ct->state) & CT_RCU_WATCHING_MASK; } static __always_inline int ct_dynticks_cpu_acquire(int cpu) { struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); - return atomic_read_acquire(&ct->state) & CT_DYNTICKS_MASK; + return atomic_read_acquire(&ct->state) & CT_RCU_WATCHING_MASK; } static __always_inline long ct_dynticks_nesting(void) -- cgit v1.2.3 From a4a7921ec08d016f9d692752e2f82f66369c7ffa Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 16 Apr 2024 10:45:56 +0200 Subject: context_tracking, rcu: Rename ct_dynticks() into ct_rcu_watching() The context_tracking.state RCU_DYNTICKS subvariable has been renamed to RCU_WATCHING, reflect that change in the related helpers. Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/linux/context_tracking_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 94d6a935af3b..cb90d8c17810 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -56,7 +56,7 @@ static __always_inline int __ct_state(void) #endif #ifdef CONFIG_CONTEXT_TRACKING_IDLE -static __always_inline int ct_dynticks(void) +static __always_inline int ct_rcu_watching(void) { return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_RCU_WATCHING_MASK; } -- cgit v1.2.3 From a9fde9d1a5dd42edadf61cf4e64aa234b4c5bd3f Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 16 Apr 2024 10:47:14 +0200 Subject: context_tracking, rcu: Rename ct_dynticks_cpu() into ct_rcu_watching_cpu() The context_tracking.state RCU_DYNTICKS subvariable has been renamed to RCU_WATCHING, reflect that change in the related helpers. Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/linux/context_tracking_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index cb90d8c17810..ad5a06a42b4a 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -61,7 +61,7 @@ static __always_inline int ct_rcu_watching(void) return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_RCU_WATCHING_MASK; } -static __always_inline int ct_dynticks_cpu(int cpu) +static __always_inline int ct_rcu_watching_cpu(int cpu) { struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); -- cgit v1.2.3 From 125716c393889f9035567d4d78da239483f63ece Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 16 Apr 2024 14:47:12 +0200 Subject: context_tracking, rcu: Rename ct_dynticks_cpu_acquire() into ct_rcu_watching_cpu_acquire() The context_tracking.state RCU_DYNTICKS subvariable has been renamed to RCU_WATCHING, reflect that change in the related helpers. Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/linux/context_tracking_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index ad5a06a42b4a..ad6570ffeff3 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -68,7 +68,7 @@ static __always_inline int ct_rcu_watching_cpu(int cpu) return atomic_read(&ct->state) & CT_RCU_WATCHING_MASK; } -static __always_inline int ct_dynticks_cpu_acquire(int cpu) +static __always_inline int ct_rcu_watching_cpu_acquire(int cpu) { struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); -- cgit v1.2.3 From 7aeba709a048d870c15940af8b620b16281c3b9e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 30 May 2024 15:45:42 +0200 Subject: rcu/nocb: Introduce RCU_NOCB_LOCKDEP_WARN() Checking for races against concurrent (de-)offloading implies the creation of !CONFIG_RCU_NOCB_CPU stubs to check if each relevant lock is held. For now this only implies the nocb_lock but more are to be expected. Create instead a NOCB specific version of RCU_LOCKDEP_WARN() to avoid the proliferation of stubs. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Reviewed-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay --- include/linux/rcupdate.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 13f6f00aecf9..d48d3c237305 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -144,11 +144,18 @@ void rcu_init_nohz(void); int rcu_nocb_cpu_offload(int cpu); int rcu_nocb_cpu_deoffload(int cpu); void rcu_nocb_flush_deferred_wakeup(void); + +#define RCU_NOCB_LOCKDEP_WARN(c, s) RCU_LOCKDEP_WARN(c, s) + #else /* #ifdef CONFIG_RCU_NOCB_CPU */ + static inline void rcu_init_nohz(void) { } static inline int rcu_nocb_cpu_offload(int cpu) { return -EINVAL; } static inline int rcu_nocb_cpu_deoffload(int cpu) { return 0; } static inline void rcu_nocb_flush_deferred_wakeup(void) { } + +#define RCU_NOCB_LOCKDEP_WARN(c, s) + #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ /* -- cgit v1.2.3 From bae6076ebbd14cc1f1bd4de65c2db21d3ab109d7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 30 May 2024 15:45:50 +0200 Subject: rcu/nocb: Remove SEGCBLIST_RCU_CORE RCU core can't be running anymore while in the middle of (de-)offloading since this sort of transition now only applies to offline CPUs. The SEGCBLIST_RCU_CORE state can therefore be removed. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Reviewed-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay --- include/linux/rcu_segcblist.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index ba95c06675e1..5469c54cd778 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -185,11 +185,10 @@ struct rcu_cblist { * ---------------------------------------------------------------------------- */ #define SEGCBLIST_ENABLED BIT(0) -#define SEGCBLIST_RCU_CORE BIT(1) -#define SEGCBLIST_LOCKING BIT(2) -#define SEGCBLIST_KTHREAD_CB BIT(3) -#define SEGCBLIST_KTHREAD_GP BIT(4) -#define SEGCBLIST_OFFLOADED BIT(5) +#define SEGCBLIST_LOCKING BIT(1) +#define SEGCBLIST_KTHREAD_CB BIT(2) +#define SEGCBLIST_KTHREAD_GP BIT(3) +#define SEGCBLIST_OFFLOADED BIT(4) struct rcu_segcblist { struct rcu_head *head; -- cgit v1.2.3 From 91e43b9044a4f9b6976dc28b3f1446b733cc68de Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 30 May 2024 15:45:51 +0200 Subject: rcu/nocb: Remove SEGCBLIST_KTHREAD_CB This state excerpt from the (de-)offloading state machine was used to implement an ad-hoc kthread parking of rcuo kthreads. This code has been removed and therefore the related state can be erased as well. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Reviewed-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay --- include/linux/rcu_segcblist.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 5469c54cd778..1ef1bb54853d 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -186,9 +186,8 @@ struct rcu_cblist { */ #define SEGCBLIST_ENABLED BIT(0) #define SEGCBLIST_LOCKING BIT(1) -#define SEGCBLIST_KTHREAD_CB BIT(2) -#define SEGCBLIST_KTHREAD_GP BIT(3) -#define SEGCBLIST_OFFLOADED BIT(4) +#define SEGCBLIST_KTHREAD_GP BIT(2) +#define SEGCBLIST_OFFLOADED BIT(3) struct rcu_segcblist { struct rcu_head *head; -- cgit v1.2.3 From dc1c8034e31b14a2e5e212104ec508aec44ce1b9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Jul 2024 20:24:12 -0700 Subject: minmax: simplify min()/max()/clamp() implementation Now that we no longer have any C constant expression contexts (ie array size declarations or static initializers) that use min() or max(), we can simpify the implementation by not having to worry about the result staying as a C constant expression. So now we can unconditionally just use temporary variables of the right type, and get rid of the excessive expansion that used to come from the use of __builtin_choose_expr(__is_constexpr(...), .. to pick the specialized code for constant expressions. Another expansion simplification is to pass the temporary variables (in addition to the original expression) to our __types_ok() macro. That may superficially look like it complicates the macro, but when we only want the type of the expression, expanding the temporary variable names is much simpler and smaller than expanding the potentially complicated original expression. As a result, on my machine, doing a $ time make drivers/staging/media/atomisp/pci/isp/kernels/ynr/ynr_1.0/ia_css_ynr.host.i goes from real 0m16.621s user 0m15.360s sys 0m1.221s to real 0m2.532s user 0m2.091s sys 0m0.452s because the token expansion goes down dramatically. In particular, the longest line expansion (which was line 71 of that 'ia_css_ynr.host.c' file) shrinks from 23,338kB (yes, 23MB for one single line) to "just" 1,444kB (now "only" 1.4MB). And yes, that line is still the line from hell, because it's doing multiple levels of "min()/max()" expansion thanks to some of them being hidden inside the uDIGIT_FITTING() macro. Lorenzo has a nice cleanup patch that makes that driver use inline functions instead of macros for sDIGIT_FITTING() and uDIGIT_FITTING(), which will fix that line once and for all, but the 16-fold reduction in this case does show why we need to simplify these helpers. Cc: David Laight Cc: Lorenzo Stoakes Signed-off-by: Linus Torvalds --- include/linux/minmax.h | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/minmax.h b/include/linux/minmax.h index fc384714da45..e3e4353df983 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -35,10 +35,10 @@ #define __is_noneg_int(x) \ (__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0) -#define __types_ok(x, y) \ - (__is_signed(x) == __is_signed(y) || \ - __is_signed((x) + 0) == __is_signed((y) + 0) || \ - __is_noneg_int(x) || __is_noneg_int(y)) +#define __types_ok(x, y, ux, uy) \ + (__is_signed(ux) == __is_signed(uy) || \ + __is_signed((ux) + 0) == __is_signed((uy) + 0) || \ + __is_noneg_int(x) || __is_noneg_int(y)) #define __cmp_op_min < #define __cmp_op_max > @@ -51,34 +51,31 @@ #define __cmp_once(op, type, x, y) \ __cmp_once_unique(op, type, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_)) -#define __careful_cmp_once(op, x, y) ({ \ - static_assert(__types_ok(x, y), \ +#define __careful_cmp_once(op, x, y, ux, uy) ({ \ + __auto_type ux = (x); __auto_type uy = (y); \ + static_assert(__types_ok(x, y, ux, uy), \ #op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \ - __cmp_once(op, __auto_type, x, y); }) + __cmp(op, ux, uy); }) -#define __careful_cmp(op, x, y) \ - __builtin_choose_expr(__is_constexpr((x) - (y)), \ - __cmp(op, x, y), __careful_cmp_once(op, x, y)) +#define __careful_cmp(op, x, y) \ + __careful_cmp_once(op, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_)) #define __clamp(val, lo, hi) \ ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) -#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ - typeof(val) unique_val = (val); \ - typeof(lo) unique_lo = (lo); \ - typeof(hi) unique_hi = (hi); \ +#define __clamp_once(val, lo, hi, uval, ulo, uhi) ({ \ + __auto_type uval = (val); \ + __auto_type ulo = (lo); \ + __auto_type uhi = (hi); \ static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \ (lo) <= (hi), true), \ "clamp() low limit " #lo " greater than high limit " #hi); \ - static_assert(__types_ok(val, lo), "clamp() 'lo' signedness error"); \ - static_assert(__types_ok(val, hi), "clamp() 'hi' signedness error"); \ - __clamp(unique_val, unique_lo, unique_hi); }) - -#define __careful_clamp(val, lo, hi) ({ \ - __builtin_choose_expr(__is_constexpr((val) - (lo) + (hi)), \ - __clamp(val, lo, hi), \ - __clamp_once(val, lo, hi, __UNIQUE_ID(__val), \ - __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); }) + static_assert(__types_ok(uval, lo, uval, ulo), "clamp() 'lo' signedness error"); \ + static_assert(__types_ok(uval, hi, uval, uhi), "clamp() 'hi' signedness error"); \ + __clamp(uval, ulo, uhi); }) + +#define __careful_clamp(val, lo, hi) \ + __clamp_once(val, lo, hi, __UNIQUE_ID(v_), __UNIQUE_ID(l_), __UNIQUE_ID(h_)) /** * min - return minimum of two values of the same or compatible types -- cgit v1.2.3 From 59c34008d3bdeef4c8ebc0ed2426109b474334d4 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Mon, 22 Jul 2024 14:58:01 +0530 Subject: x86/amd_nb: Add new PCI IDs for AMD family 1Ah model 60h Add new PCI device IDs into the root IDs and miscellaneous IDs lists to provide support for the latest generation of AMD 1Ah family 60h processor models. Signed-off-by: Shyam Sundar S K Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20240722092801.3480266-1-Shyam-sundar.S-k@amd.com --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e388c8b1cbc2..91182aa1d2ec 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -580,6 +580,7 @@ #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F3 0x12fb #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3 #define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb +#define PCI_DEVICE_ID_AMD_1AH_M60H_DF_F3 0x124b #define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F3 0x12bb #define PCI_DEVICE_ID_AMD_MI200_DF_F3 0x14d3 #define PCI_DEVICE_ID_AMD_MI300_DF_F3 0x152b -- cgit v1.2.3 From 8423895d456672736a278d64f86d8b434eef2b54 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 14 May 2024 15:08:58 +0200 Subject: video: Handle HAS_IOPORT dependencies In a future patch HAS_IOPORT=n will disable inb()/outb() and friends at compile time. We thus need to #ifdef functions and their callsites which unconditionally use these I/O accessors. In the include/video/vga.h these are conveniently all those functions with the vga_io_* prefix. Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Niklas Schnelle Signed-off-by: Helge Deller --- include/video/vga.h | 58 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/video/vga.h b/include/video/vga.h index 947c0abd04ef..468764d6727a 100644 --- a/include/video/vga.h +++ b/include/video/vga.h @@ -197,9 +197,26 @@ struct vgastate { extern int save_vga(struct vgastate *state); extern int restore_vga(struct vgastate *state); +static inline unsigned char vga_mm_r (void __iomem *regbase, unsigned short port) +{ + return readb (regbase + port); +} + +static inline void vga_mm_w (void __iomem *regbase, unsigned short port, unsigned char val) +{ + writeb (val, regbase + port); +} + +static inline void vga_mm_w_fast (void __iomem *regbase, unsigned short port, + unsigned char reg, unsigned char val) +{ + writew (VGA_OUT16VAL (val, reg), regbase + port); +} + /* * generic VGA port read/write */ +#ifdef CONFIG_HAS_IOPORT static inline unsigned char vga_io_r (unsigned short port) { @@ -217,22 +234,6 @@ static inline void vga_io_w_fast (unsigned short port, unsigned char reg, outw(VGA_OUT16VAL (val, reg), port); } -static inline unsigned char vga_mm_r (void __iomem *regbase, unsigned short port) -{ - return readb (regbase + port); -} - -static inline void vga_mm_w (void __iomem *regbase, unsigned short port, unsigned char val) -{ - writeb (val, regbase + port); -} - -static inline void vga_mm_w_fast (void __iomem *regbase, unsigned short port, - unsigned char reg, unsigned char val) -{ - writew (VGA_OUT16VAL (val, reg), regbase + port); -} - static inline unsigned char vga_r (void __iomem *regbase, unsigned short port) { if (regbase) @@ -258,8 +259,25 @@ static inline void vga_w_fast (void __iomem *regbase, unsigned short port, else vga_io_w_fast (port, reg, val); } +#else /* CONFIG_HAS_IOPORT */ +static inline unsigned char vga_r (void __iomem *regbase, unsigned short port) +{ + return vga_mm_r (regbase, port); +} + +static inline void vga_w (void __iomem *regbase, unsigned short port, unsigned char val) +{ + vga_mm_w (regbase, port, val); +} +static inline void vga_w_fast (void __iomem *regbase, unsigned short port, + unsigned char reg, unsigned char val) +{ + vga_mm_w_fast (regbase, port, reg, val); +} +#endif /* CONFIG_HAS_IOPORT */ + /* * VGA CRTC register read/write */ @@ -280,6 +298,7 @@ static inline void vga_wcrt (void __iomem *regbase, unsigned char reg, unsigned #endif /* VGA_OUTW_WRITE */ } +#ifdef CONFIG_HAS_IOPORT static inline unsigned char vga_io_rcrt (unsigned char reg) { vga_io_w (VGA_CRT_IC, reg); @@ -295,6 +314,7 @@ static inline void vga_io_wcrt (unsigned char reg, unsigned char val) vga_io_w (VGA_CRT_DC, val); #endif /* VGA_OUTW_WRITE */ } +#endif /* CONFIG_HAS_IOPORT */ static inline unsigned char vga_mm_rcrt (void __iomem *regbase, unsigned char reg) { @@ -333,6 +353,7 @@ static inline void vga_wseq (void __iomem *regbase, unsigned char reg, unsigned #endif /* VGA_OUTW_WRITE */ } +#ifdef CONFIG_HAS_IOPORT static inline unsigned char vga_io_rseq (unsigned char reg) { vga_io_w (VGA_SEQ_I, reg); @@ -348,6 +369,7 @@ static inline void vga_io_wseq (unsigned char reg, unsigned char val) vga_io_w (VGA_SEQ_D, val); #endif /* VGA_OUTW_WRITE */ } +#endif /* CONFIG_HAS_IOPORT */ static inline unsigned char vga_mm_rseq (void __iomem *regbase, unsigned char reg) { @@ -385,6 +407,7 @@ static inline void vga_wgfx (void __iomem *regbase, unsigned char reg, unsigned #endif /* VGA_OUTW_WRITE */ } +#ifdef CONFIG_HAS_IOPORT static inline unsigned char vga_io_rgfx (unsigned char reg) { vga_io_w (VGA_GFX_I, reg); @@ -400,6 +423,7 @@ static inline void vga_io_wgfx (unsigned char reg, unsigned char val) vga_io_w (VGA_GFX_D, val); #endif /* VGA_OUTW_WRITE */ } +#endif /* CONFIG_HAS_IOPORT */ static inline unsigned char vga_mm_rgfx (void __iomem *regbase, unsigned char reg) { @@ -434,6 +458,7 @@ static inline void vga_wattr (void __iomem *regbase, unsigned char reg, unsigned vga_w (regbase, VGA_ATT_W, val); } +#ifdef CONFIG_HAS_IOPORT static inline unsigned char vga_io_rattr (unsigned char reg) { vga_io_w (VGA_ATT_IW, reg); @@ -445,6 +470,7 @@ static inline void vga_io_wattr (unsigned char reg, unsigned char val) vga_io_w (VGA_ATT_IW, reg); vga_io_w (VGA_ATT_W, val); } +#endif /* CONFIG_HAS_IOPORT */ static inline unsigned char vga_mm_rattr (void __iomem *regbase, unsigned char reg) { -- cgit v1.2.3 From ba386777a30b38dabcc7fb8a89ec2869a09915f7 Mon Sep 17 00:00:00 2001 From: Vignesh Balasubramanian Date: Thu, 25 Jul 2024 21:40:18 +0530 Subject: x86/elf: Add a new FPU buffer layout info to x86 core files Add a new .note section containing type, size, offset and flags of every xfeature that is present. This information will be used by debuggers to understand the XSAVE layout of the machine where the core file has been dumped, and to read XSAVE registers, especially during cross-platform debugging. The XSAVE layouts of modern AMD and Intel CPUs differ, especially since Memory Protection Keys and the AVX-512 features have been inculcated into the AMD CPUs. Since AMD never adopted (and hence never left room in the XSAVE layout for) the Intel MPX feature, tools like GDB had assumed a fixed XSAVE layout matching that of Intel (based on the XCR0 mask). Hence, core dumps from AMD CPUs didn't match the known size for the XCR0 mask. This resulted in GDB and other tools not being able to access the values of the AVX-512 and PKRU registers on AMD CPUs. To solve this, an interim solution has been accepted into GDB, and is already a part of GDB 14, see https://sourceware.org/pipermail/gdb-patches/2023-March/198081.html. But it depends on heuristics based on the total XSAVE register set size and the XCR0 mask to infer the layouts of the various register blocks for core dumps, and hence, is not a foolproof mechanism to determine the layout of the XSAVE area. Therefore, add a new core dump note in order to allow GDB/LLDB and other relevant tools to determine the layout of the XSAVE area of the machine where the corefile was dumped. The new core dump note (which is being proposed as a per-process .note section), NT_X86_XSAVE_LAYOUT (0x205) contains an array of structures. Each structure describes an individual extended feature containing offset, size and flags in this format: struct x86_xfeat_component { u32 type; u32 size; u32 offset; u32 flags; }; and in an independent manner, allowing for future extensions without depending on hw arch specifics like CPUID etc. [ bp: Massage commit message, zap trailing whitespace. ] Co-developed-by: Jini Susan George Signed-off-by: Jini Susan George Co-developed-by: Borislav Petkov (AMD) Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Vignesh Balasubramanian Link: https://lore.kernel.org/r/20240725161017.112111-2-vigbalas@amd.com --- include/uapi/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b54b313bcf07..e30a9b47dc87 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -411,6 +411,7 @@ typedef struct elf64_shdr { #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ /* Old binutils treats 0x203 as a CET state */ #define NT_X86_SHSTK 0x204 /* x86 SHSTK state */ +#define NT_X86_XSAVE_LAYOUT 0x205 /* XSAVE layout description */ #define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */ #define NT_S390_TIMER 0x301 /* s390 timer register */ #define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */ -- cgit v1.2.3 From 52c3fb1a0f822fd64529ca64f3792095524de450 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jul 2024 10:21:49 +0200 Subject: perf/x86: Add hw_perf_event::aux_config Start a new section for AUX PMUs in hw_perf_event. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/perf_event.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1a8942277dda..6bb0c21d6335 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -168,6 +168,9 @@ struct hw_perf_event { struct hw_perf_event_extra extra_reg; struct hw_perf_event_extra branch_reg; }; + struct { /* aux / Intel-PT */ + u64 aux_config; + }; struct { /* software */ struct hrtimer hrtimer; }; -- cgit v1.2.3 From f23c042ce34ba265cf3129d530702b5d218e3f4b Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Mon, 27 May 2024 14:06:47 +0200 Subject: sched/deadline: Comment sched_dl_entity::dl_server variable Add an explanation for the newly added variable. Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers") Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Peter Zijlstra (Intel) Tested-by: Juri Lelli Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/147f7aa8cb8fd925f36aa8059af6a35aad08b45a.1716811044.git.bristot@kernel.org --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f8d150343d42..1c771ea4481d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -639,6 +639,8 @@ struct sched_dl_entity { * * @dl_overrun tells if the task asked to be informed about runtime * overruns. + * + * @dl_server tells if this is a server entity. */ unsigned int dl_throttled : 1; unsigned int dl_yielded : 1; -- cgit v1.2.3 From a110a81c52a9de73e2e57e826dd3bf0fd4c22226 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Mon, 27 May 2024 14:06:51 +0200 Subject: sched/deadline: Deferrable dl server Among the motivations for the DL servers is the real-time throttling mechanism. This mechanism works by throttling the rt_rq after running for a long period without leaving space for fair tasks. The base dl server avoids this problem by boosting fair tasks instead of throttling the rt_rq. The point is that it boosts without waiting for potential starvation, causing some non-intuitive cases. For example, an IRQ dispatches two tasks on an idle system, a fair and an RT. The DL server will be activated, running the fair task before the RT one. This problem can be avoided by deferring the dl server activation. By setting the defer option, the dl_server will dispatch an SCHED_DEADLINE reservation with replenished runtime, but throttled. The dl_timer will be set for the defer time at (period - runtime) ns from start time. Thus boosting the fair rq at defer time. If the fair scheduler has the opportunity to run while waiting for defer time, the dl server runtime will be consumed. If the runtime is completely consumed before the defer time, the server will be replenished while still in a throttled state. Then, the dl_timer will be reset to the new defer time If the fair server reaches the defer time without consuming its runtime, the server will start running, following CBS rules (thus without breaking SCHED_DEADLINE). Then the server will continue the running state (without deferring) until it fair tasks are able to execute as regular fair scheduler (end of the starvation). Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Peter Zijlstra (Intel) Tested-by: Juri Lelli Link: https://lore.kernel.org/r/dd175943c72533cd9f0b87767c6499204879cc38.1716811044.git.bristot@kernel.org --- include/linux/sched.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1c771ea4481d..4edd7e2096fb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -641,12 +641,24 @@ struct sched_dl_entity { * overruns. * * @dl_server tells if this is a server entity. + * + * @dl_defer tells if this is a deferred or regular server. For + * now only defer server exists. + * + * @dl_defer_armed tells if the deferrable server is waiting + * for the replenishment timer to activate it. + * + * @dl_defer_running tells if the deferrable server is actually + * running, skipping the defer phase. */ unsigned int dl_throttled : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; unsigned int dl_server : 1; + unsigned int dl_defer : 1; + unsigned int dl_defer_armed : 1; + unsigned int dl_defer_running : 1; /* * Bandwidth enforcement timer. Each -deadline task has its -- cgit v1.2.3 From c8a85394cfdb4696b4e2f8a0f3066a1c921af426 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Mon, 27 May 2024 14:06:54 +0200 Subject: sched/core: Fix picking of tasks for core scheduling with DL server * Use simple CFS pick_task for DL pick_task DL server's pick_task calls CFS's pick_next_task_fair(), this is wrong because core scheduling's pick_task only calls CFS's pick_task() for evaluation / checking of the CFS task (comparing across CPUs), not for actually affirmatively picking the next task. This causes RB tree corruption issues in CFS that were found by syzbot. * Make pick_task_fair clear DL server A DL task pick might set ->dl_server, but it is possible the task will never run (say the other HT has a stop task). If the CFS task is picked in the future directly (say without DL server), ->dl_server will be set. So clear it in pick_task_fair(). This fixes the KASAN issue reported by syzbot in set_next_entity(). (DL refactoring suggestions by Vineeth Pillai). Reported-by: Suleiman Souhlal Signed-off-by: "Joel Fernandes (Google)" Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vineeth Pillai Tested-by: Juri Lelli Link: https://lore.kernel.org/r/b10489ab1f03d23e08e6097acea47442e7d6466f.1716811044.git.bristot@kernel.org --- include/linux/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4edd7e2096fb..2c1b4ee3234f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -686,7 +686,8 @@ struct sched_dl_entity { */ struct rq *rq; dl_server_has_tasks_f server_has_tasks; - dl_server_pick_f server_pick; + dl_server_pick_f server_pick_next; + dl_server_pick_f server_pick_task; #ifdef CONFIG_RT_MUTEXES /* -- cgit v1.2.3 From 02672e609fa93dd5835783830bf18387916a077a Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 19 Jul 2024 11:39:30 +0200 Subject: dt-bindings: clock: axg-audio: add earcrx clock ids Add clock IDs for the eARC Rx device found on sm1 SoCs Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240719093934.3985139-2-jbrunet@baylibre.com Signed-off-by: Jerome Brunet --- include/dt-bindings/clock/axg-audio-clkc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/axg-audio-clkc.h b/include/dt-bindings/clock/axg-audio-clkc.h index 08c82c22fa5f..607f23b83fa7 100644 --- a/include/dt-bindings/clock/axg-audio-clkc.h +++ b/include/dt-bindings/clock/axg-audio-clkc.h @@ -155,5 +155,12 @@ #define AUD_CLKID_SYSCLK_B_DIV 175 #define AUD_CLKID_SYSCLK_A_EN 176 #define AUD_CLKID_SYSCLK_B_EN 177 +#define AUD_CLKID_EARCRX 178 +#define AUD_CLKID_EARCRX_CMDC_SEL 179 +#define AUD_CLKID_EARCRX_CMDC_DIV 180 +#define AUD_CLKID_EARCRX_CMDC 181 +#define AUD_CLKID_EARCRX_DMAC_SEL 182 +#define AUD_CLKID_EARCRX_DMAC_DIV 183 +#define AUD_CLKID_EARCRX_DMAC 184 #endif /* __AXG_AUDIO_CLKC_BINDINGS_H */ -- cgit v1.2.3 From 8cd44dd1d17a23d5cc8c443c659ca57aa76e2fa5 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 15 Feb 2023 09:18:02 +0900 Subject: btrfs: zoned: fix zone_unusable accounting on making block group read-write again When btrfs makes a block group read-only, it adds all free regions in the block group to space_info->bytes_readonly. That free space excludes reserved and pinned regions. OTOH, when btrfs makes the block group read-write again, it moves all the unused regions into the block group's zone_unusable. That unused region includes reserved and pinned regions. As a result, it counts too much zone_unusable bytes. Fortunately (or unfortunately), having erroneous zone_unusable does not affect the calculation of space_info->bytes_readonly, because free space (num_bytes in btrfs_dec_block_group_ro) calculation is done based on the erroneous zone_unusable and it reduces the num_bytes just to cancel the error. This behavior can be easily discovered by adding a WARN_ON to check e.g, "bg->pinned > 0" in btrfs_dec_block_group_ro(), and running fstests test case like btrfs/282. Fix it by properly considering pinned and reserved in btrfs_dec_block_group_ro(). Also, add a WARN_ON and introduce btrfs_space_info_update_bytes_zone_unusable() to catch a similar mistake. Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones") CC: stable@vger.kernel.org # 5.15+ Signed-off-by: Naohiro Aota Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index eeb56975bee7..de55a555d95b 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2383,6 +2383,14 @@ DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned, TP_ARGS(fs_info, sinfo, old, diff) ); +DEFINE_EVENT(btrfs__space_info_update, update_bytes_zone_unusable, + + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_space_info *sinfo, u64 old, s64 diff), + + TP_ARGS(fs_info, sinfo, old, diff) +); + DECLARE_EVENT_CLASS(btrfs_raid56_bio, TP_PROTO(const struct btrfs_raid_bio *rbio, -- cgit v1.2.3 From 5297bba507dc54045e6efeb9955c1271ca9aafe1 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 23 Jul 2024 15:27:01 +0200 Subject: genirq/msi: Silence 'set affinity failed' warning Various PCI controllers that mux MSIs onto a single IRQ line produce these "IRQ%d: set affinity failed" warnings when entering suspend. This has been discussed before [1] [2] and an example test case is included at the end of this commit message. Controller drivers that create MSI IRQ domain with MSI_FLAG_USE_DEF_CHIP_OPS and do not override the .irq_set_affinity() irqchip callback get assigned the default msi_domain_set_affinity() callback. That is not desired on controllers where it is not possible to set affinity of each MSI IRQ line to a specific CPU core due to hardware limitation. Introduce flag MSI_FLAG_NO_AFFINITY, which keeps .irq_set_affinity() unset if the controller driver did not assign it. This way, migrate_one_irq() can exit right away, without printing the warning. The .irq_set_affinity() implementations which only return -EINVAL can be removed from multiple controller drivers. $ grep 25 /proc/interrupts 25: 0 0 0 0 0 0 0 0 PCIe MSI 0 Edge PCIe PME $ echo core > /sys/power/pm_test ; echo mem > /sys/power/state ... Disabling non-boot CPUs ... IRQ25: set affinity failed(-22). <---------- This is being silenced here psci: CPU7 killed (polled 4 ms) ... [1] https://lore.kernel.org/all/d4a6eea3c5e33a3a4056885419df95a7@kernel.org/ [2] https://lore.kernel.org/all/5f4947b18bf381615a37aa81c2242477@kernel.org/ Link: https://lore.kernel.org/r/20240723132958.41320-2-marek.vasut+renesas@mailbox.org Signed-off-by: Marek Vasut [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Damien Le Moal Reviewed-by: Manivannan Sadhasivam Acked-by: Thomas Gleixner --- include/linux/msi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 944979763825..b10093c4d00e 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -554,6 +554,8 @@ enum { MSI_FLAG_MSIX_CONTIGUOUS = (1 << 19), /* PCI/MSI-X vectors can be dynamically allocated/freed post MSI-X enable */ MSI_FLAG_PCI_MSIX_ALLOC_DYN = (1 << 20), + /* PCI MSIs cannot be steered separately to CPU cores */ + MSI_FLAG_NO_AFFINITY = (1 << 21), }; /** -- cgit v1.2.3 From 2accfdb7eff65f390c4308b0e9cb7c3fe48ad63c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 Jul 2024 10:58:28 -0700 Subject: profiling: attempt to remove per-cpu profile flip buffer This is the really old legacy kernel profiling code, which has long since been obviated by "real profiling" (ie 'prof' and company), and mainly remains as a source of syzbot reports. There are anecdotal reports that people still use it for boot-time profiling, but it's unlikely that such use would care about the old NUMA optimizations in this code from 2004 (commit ad02973d42: "profile: 512x Altix timer interrupt livelock fix" in the BK import archive at [1]) So in order to head off future syzbot reports, let's try to simplify this code and get rid of the per-cpu profile buffers that are quite a large portion of the complexity footprint of this thing (including CPU hotplug callbacks etc). It's unlikely anybody will actually notice, or possibly, as Thomas put it: "Only people who indulge in nostalgia will notice :)". That said, if it turns out that this code is actually actively used by somebody, we can always revert this removal. Thus the "attempt" in the summary line. [ Note: in a small nod to "the profiling code can cause NUMA problems", this also removes the "increment the last entry in the profiling array on any unknown hits" logic. That would account any program counter in a module to that single counter location, and might exacerbate any NUMA cacheline bouncing issues ] Link: https://lore.kernel.org/all/CAHk-=wgs52BxT4Zjmjz8aNvHWKxf5_ThBY4bYL1Y6CTaNL2dTw@mail.gmail.com/ Link: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git [1] Cc: Thomas Gleixner Cc: Tetsuo Handa Signed-off-by: Linus Torvalds --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 51ba681b915a..affdd890899e 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -100,7 +100,6 @@ enum cpuhp_state { CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, - CPUHP_PROFILE_PREPARE, CPUHP_X2APIC_PREPARE, CPUHP_SMPCFD_PREPARE, CPUHP_RELAY_PREPARE, -- cgit v1.2.3 From de79583ffe794663c53b77f97be814522d4edc4f Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Tue, 2 Jul 2024 18:02:33 +0200 Subject: iio: core: add accessors 'masklength' 'masklength' is supposed to be an IIO private member. However, drivers (often in trigger handlers) need to access it to iterate over the enabled channels for example (there are other reasons). Hence, a couple of new accessors are being added: * iio_for_each_active_channel() - Iterates over the active channels; * iio_get_masklength() - Get length of the channels mask. The goal of these new accessors is to annotate 'masklength' as private as soon as all drivers accessing it are converted to use the new helpers. Signed-off-by: Nuno Sa Reviewed-by: Alexandru Ardelean Link: https://patch.msgid.link/20240702-dev-iio-masklength-private-v1-1-98193bf536a6@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 894309294182..dd6bbc468283 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -855,6 +855,24 @@ static inline const struct iio_scan_type return &chan->scan_type; } +/** + * iio_get_masklength - Get length of the channels mask + * @indio_dev: the IIO device to get the masklength for + */ +static inline unsigned int iio_get_masklength(const struct iio_dev *indio_dev) +{ + return indio_dev->masklength; +} + +/** + * iio_for_each_active_channel - Iterated over active channels + * @indio_dev: the IIO device + * @chan: Holds the index of the enabled channel + */ +#define iio_for_each_active_channel(indio_dev, chan) \ + for_each_set_bit((chan), (indio_dev)->active_scan_mask, \ + iio_get_masklength(indio_dev)) + ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals); int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer, -- cgit v1.2.3 From 4bf79f9be434e000c8e12fe83b2f4402480f1460 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 18 Jul 2024 13:23:53 -0700 Subject: bpf: Track equal scalars history on per-instruction level Use bpf_verifier_state->jmp_history to track which registers were updated by find_equal_scalars() (renamed to collect_linked_regs()) when conditional jump was verified. Use recorded information in backtrack_insn() to propagate precision. E.g. for the following program: while verifying instructions 1: r1 = r0 | 2: if r1 < 8 goto ... | push r0,r1 as linked registers in jmp_history 3: if r0 > 16 goto ... | push r0,r1 as linked registers in jmp_history 4: r2 = r10 | 5: r2 += r0 v mark_chain_precision(r0) while doing mark_chain_precision(r0) 5: r2 += r0 | mark r0 precise 4: r2 = r10 | 3: if r0 > 16 goto ... | mark r0,r1 as precise 2: if r1 < 8 goto ... | mark r0,r1 as precise 1: r1 = r0 v Technically, do this as follows: - Use 10 bits to identify each register that gains range because of sync_linked_regs(): - 3 bits for frame number; - 6 bits for register or stack slot number; - 1 bit to indicate if register is spilled. - Use u64 as a vector of 6 such records + 4 bits for vector length. - Augment struct bpf_jmp_history_entry with a field 'linked_regs' representing such vector. - When doing check_cond_jmp_op() remember up to 6 registers that gain range because of sync_linked_regs() in such a vector. - Don't propagate range information and reset IDs for registers that don't fit in 6-value vector. - Push a pair {instruction index, linked registers vector} to bpf_verifier_state->jmp_history. - When doing backtrack_insn() check if any of recorded linked registers is currently marked precise, if so mark all linked registers as precise. This also requires fixes for two test_verifier tests: - precise: test 1 - precise: test 2 Both tests contain the following instruction sequence: 19: (bf) r2 = r9 ; R2=scalar(id=3) R9=scalar(id=3) 20: (a5) if r2 < 0x8 goto pc+1 ; R2=scalar(id=3,umin=8) 21: (95) exit 22: (07) r2 += 1 ; R2_w=scalar(id=3+1,...) 23: (bf) r1 = r10 ; R1_w=fp0 R10=fp0 24: (07) r1 += -8 ; R1_w=fp-8 25: (b7) r3 = 0 ; R3_w=0 26: (85) call bpf_probe_read_kernel#113 The call to bpf_probe_read_kernel() at (26) forces r2 to be precise. Previously, this forced all registers with same id to become precise immediately when mark_chain_precision() is called. After this change, the precision is propagated to registers sharing same id only when 'if' instruction is backtracked. Hence verification log for both tests is changed: regs=r2,r9 -> regs=r2 for instructions 25..20. Fixes: 904e6ddf4133 ("bpf: Use scalar ids in mark_chain_precision()") Reported-by: Hao Sun Suggested-by: Andrii Nakryiko Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240718202357.1746514-2-eddyz87@gmail.com Closes: https://lore.kernel.org/bpf/CAEf4BzZ0xidVCqB47XnkXcNhkPWF6_nTV7yt+_Lf0kcFEut2Mg@mail.gmail.com/ --- include/linux/bpf_verifier.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 6503c85b10a3..731a0a4ac13c 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -371,6 +371,10 @@ struct bpf_jmp_history_entry { u32 prev_idx : 22; /* special flags, e.g., whether insn is doing register stack spill/load */ u32 flags : 10; + /* additional registers that need precision tracking when this + * jump is backtracked, vector of six 10-bit records + */ + u64 linked_regs; }; /* Maximum number of register states that can exist at once */ -- cgit v1.2.3 From e42ac14180554fa23a3312d4f921dc4ea7972fb7 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 22 Jul 2024 11:30:45 -0700 Subject: bpf: Check unsupported ops from the bpf_struct_ops's cfi_stubs The bpf_tcp_ca struct_ops currently uses a "u32 unsupported_ops[]" array to track which ops is not supported. After cfi_stubs had been added, the function pointer in cfi_stubs is also NULL for the unsupported ops. Thus, the "u32 unsupported_ops[]" becomes redundant. This observation was originally brought up in the bpf/cfi discussion: https://lore.kernel.org/bpf/CAADnVQJoEkdjyCEJRPASjBw1QGsKYrF33QdMGc1RZa9b88bAEA@mail.gmail.com/ The recent bpf qdisc patch (https://lore.kernel.org/bpf/20240714175130.4051012-6-amery.hung@bytedance.com/) also needs to specify quite many unsupported ops. It is a good time to clean it up. This patch removes the need of "u32 unsupported_ops[]" and tests for null-ness in the cfi_stubs instead. Testing the cfi_stubs is done in a new function bpf_struct_ops_supported(). The verifier will call bpf_struct_ops_supported() when loading the struct_ops program. The ".check_member" is removed from the bpf_tcp_ca in this patch. ".check_member" could still be useful for other subsytems to enforce other restrictions (e.g. sched_ext checks for prog->sleepable). To keep the same error return, ENOTSUPP is used. Cc: Amery Hung Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240722183049.2254692-2-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko --- include/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3b94ec161e8c..4c54864316ee 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1795,6 +1795,7 @@ struct bpf_struct_ops_common_value { #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); +int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value); int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, @@ -1851,6 +1852,10 @@ static inline void bpf_module_put(const void *data, struct module *owner) { module_put(owner); } +static inline int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff) +{ + return -ENOTSUPP; +} static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value) -- cgit v1.2.3 From 52dea0a15cc888e89f0144dca7b712fb56d12a28 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:28 +0200 Subject: posix-timers: Convert timer list to hlist No requirement for a real list. Spare a few bytes. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- include/linux/posix-timers.h | 2 +- include/linux/sched/signal.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index dc7b738de299..453691710839 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -158,7 +158,7 @@ static inline void posix_cputimers_init_work(void) { } * @rcu: RCU head for freeing the timer. */ struct k_itimer { - struct list_head list; + struct hlist_node list; struct hlist_node t_hash; spinlock_t it_lock; const struct k_clock *kclock; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0a0e23c45406..622fdef78e14 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -137,7 +137,7 @@ struct signal_struct { /* POSIX.1b Interval Timers */ unsigned int next_posix_timer_id; - struct list_head posix_timers; + struct hlist_head posix_timers; /* ITIMER_REAL timer for the process */ struct hrtimer real_timer; -- cgit v1.2.3 From a2b80ce87a87fc18c594e74d13031d5e347b69cb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:33 +0200 Subject: signal: Remove task argument from dequeue_signal() The task pointer which is handed to dequeue_signal() is always current. The argument along with the first comment about signalfd in that function is confusing at best. Remove it and use current internally. Update the stale comment for dequeue_signal() while at it. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Reviewed-by: Oleg Nesterov Acked-by: Peter Zijlstra (Intel) --- include/linux/sched/signal.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 622fdef78e14..c8ed09ac29ac 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -276,8 +276,7 @@ static inline void signal_set_stop_flags(struct signal_struct *sig, extern void flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); -extern int dequeue_signal(struct task_struct *task, sigset_t *mask, - kernel_siginfo_t *info, enum pid_type *type); +extern int dequeue_signal(sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type); static inline int kernel_dequeue_signal(void) { @@ -287,7 +286,7 @@ static inline int kernel_dequeue_signal(void) int ret; spin_lock_irq(&task->sighand->siglock); - ret = dequeue_signal(task, &task->blocked, &__info, &__type); + ret = dequeue_signal(&task->blocked, &__info, &__type); spin_unlock_irq(&task->sighand->siglock); return ret; -- cgit v1.2.3 From 5d99e198be279045e6ecefe220f5c52f8ce9bfd5 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Fri, 19 Jul 2024 19:00:52 +0800 Subject: bpf, lsm: Add check for BPF LSM return value A bpf prog returning a positive number attached to file_alloc_security hook makes kernel panic. This happens because file system can not filter out the positive number returned by the LSM prog using IS_ERR, and misinterprets this positive number as a file pointer. Given that hook file_alloc_security never returned positive number before the introduction of BPF LSM, and other BPF LSM hooks may encounter similar issues, this patch adds LSM return value check in verifier, to ensure no unexpected value is returned. Fixes: 520b7aa00d8c ("bpf: lsm: Initialize the BPF LSM hooks") Reported-by: Xin Liu Signed-off-by: Xu Kuohai Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240719110059.797546-3-xukuohai@huaweicloud.com Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko --- include/linux/bpf.h | 1 + include/linux/bpf_lsm.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4c54864316ee..5739cc9986f8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -927,6 +927,7 @@ struct bpf_insn_access_aux { }; }; struct bpf_verifier_log *log; /* for verbose logs */ + bool is_retval; /* is accessing function return value ? */ }; static inline void diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 1de7ece5d36d..aefcd6564251 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -9,6 +9,7 @@ #include #include +#include #include #ifdef CONFIG_BPF_LSM @@ -45,6 +46,8 @@ void bpf_inode_storage_free(struct inode *inode); void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func); +int bpf_lsm_get_retval_range(const struct bpf_prog *prog, + struct bpf_retval_range *range); #else /* !CONFIG_BPF_LSM */ static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) @@ -78,6 +81,11 @@ static inline void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, { } +static inline int bpf_lsm_get_retval_range(const struct bpf_prog *prog, + struct bpf_retval_range *range) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ -- cgit v1.2.3 From 28ead3eaabc16ecc907cfb71876da028080f6356 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Fri, 19 Jul 2024 19:00:53 +0800 Subject: bpf: Prevent tail call between progs attached to different hooks bpf progs can be attached to kernel functions, and the attached functions can take different parameters or return different return values. If prog attached to one kernel function tail calls prog attached to another kernel function, the ctx access or return value verification could be bypassed. For example, if prog1 is attached to func1 which takes only 1 parameter and prog2 is attached to func2 which takes two parameters. Since verifier assumes the bpf ctx passed to prog2 is constructed based on func2's prototype, verifier allows prog2 to access the second parameter from the bpf ctx passed to it. The problem is that verifier does not prevent prog1 from passing its bpf ctx to prog2 via tail call. In this case, the bpf ctx passed to prog2 is constructed from func1 instead of func2, that is, the assumption for ctx access verification is bypassed. Another example, if BPF LSM prog1 is attached to hook file_alloc_security, and BPF LSM prog2 is attached to hook bpf_lsm_audit_rule_known. Verifier knows the return value rules for these two hooks, e.g. it is legal for bpf_lsm_audit_rule_known to return positive number 1, and it is illegal for file_alloc_security to return positive number. So verifier allows prog2 to return positive number 1, but does not allow prog1 to return positive number. The problem is that verifier does not prevent prog1 from calling prog2 via tail call. In this case, prog2's return value 1 will be used as the return value for prog1's hook file_alloc_security. That is, the return value rule is bypassed. This patch adds restriction for tail call to prevent such bypasses. Signed-off-by: Xu Kuohai Link: https://lore.kernel.org/r/20240719110059.797546-4-xukuohai@huaweicloud.com Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5739cc9986f8..f16d0753f518 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -294,6 +294,7 @@ struct bpf_map { * same prog type, JITed flag and xdp_has_frags flag. */ struct { + const struct btf_type *attach_func_proto; spinlock_t lock; enum bpf_prog_type type; bool jited; -- cgit v1.2.3 From 2aff9d20d50ac45dd13a013ef5231f4fb8912356 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 10 Jul 2024 14:32:25 -0700 Subject: lsm: infrastructure management of the sock security Move management of the sock->sk_security blob out of the individual security modules and into the security infrastructure. Instead of allocating the blobs from within the modules the modules tell the infrastructure how much space is required, and the space is allocated there. Acked-by: Paul Moore Reviewed-by: Kees Cook Reviewed-by: John Johansen Acked-by: Stephen Smalley Signed-off-by: Casey Schaufler [PM: subject tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index a2ade0ffe9e7..efd4a0655159 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -73,6 +73,7 @@ struct lsm_blob_sizes { int lbs_cred; int lbs_file; int lbs_inode; + int lbs_sock; int lbs_superblock; int lbs_ipc; int lbs_msg_msg; -- cgit v1.2.3 From 5f8d28f6d7d568dbbc8c5bce94894474c07afd4f Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 10 Jul 2024 14:32:26 -0700 Subject: lsm: infrastructure management of the key security blob Move management of the key->security blob out of the individual security modules and into the security infrastructure. Instead of allocating the blobs from within the modules the modules tell the infrastructure how much space is required, and the space is allocated there. There are no existing modules that require a key_free hook, so the call to it and the definition for it have been removed. Signed-off-by: Casey Schaufler Reviewed-by: John Johansen [PM: subject tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 1 - include/linux/lsm_hooks.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 855db460e08b..8cc60644f3bd 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -403,7 +403,6 @@ LSM_HOOK(int, 0, xfrm_decode_session, struct sk_buff *skb, u32 *secid, #ifdef CONFIG_KEYS LSM_HOOK(int, 0, key_alloc, struct key *key, const struct cred *cred, unsigned long flags) -LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key) LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred, enum key_need_perm need_perm) LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **buffer) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index efd4a0655159..7233bc0737be 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -76,6 +76,7 @@ struct lsm_blob_sizes { int lbs_sock; int lbs_superblock; int lbs_ipc; + int lbs_key; int lbs_msg_msg; int lbs_task; int lbs_xattr_count; /* number of xattr slots in new_xattrs array */ -- cgit v1.2.3 From a39c0f77dbbe083f3eec6c3b32d90f168f7575eb Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 10 Jul 2024 14:32:28 -0700 Subject: lsm: infrastructure management of the dev_tun blob Move management of the dev_tun security blob out of the individual security modules and into the LSM infrastructure. The security modules tell the infrastructure how much space they require at initialization. There are no longer any modules that require the dev_tun_free hook. The hook definition has been removed. Signed-off-by: Casey Schaufler Reviewed-by: John Johansen [PM: subject tweak, selinux style fixes] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 3 +-- include/linux/lsm_hooks.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 8cc60644f3bd..22c475f530ae 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -353,8 +353,7 @@ LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_inc, void) LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_dec, void) LSM_HOOK(void, LSM_RET_VOID, req_classify_flow, const struct request_sock *req, struct flowi_common *flic) -LSM_HOOK(int, 0, tun_dev_alloc_security, void **security) -LSM_HOOK(void, LSM_RET_VOID, tun_dev_free_security, void *security) +LSM_HOOK(int, 0, tun_dev_alloc_security, void *security) LSM_HOOK(int, 0, tun_dev_create, void) LSM_HOOK(int, 0, tun_dev_attach_queue, void *security) LSM_HOOK(int, 0, tun_dev_attach, struct sock *sk, void *security) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 7233bc0737be..0ff14ff128c8 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -80,6 +80,7 @@ struct lsm_blob_sizes { int lbs_msg_msg; int lbs_task; int lbs_xattr_count; /* number of xattr slots in new_xattrs array */ + int lbs_tun_dev; }; /** -- cgit v1.2.3 From 66de33a0bbb59ef3909d2c65dbbb7fc503d573bd Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 10 Jul 2024 14:32:29 -0700 Subject: lsm: infrastructure management of the infiniband blob Move management of the infiniband security blob out of the individual security modules and into the LSM infrastructure. The security modules tell the infrastructure how much space they require at initialization. There are no longer any modules that require the ib_free() hook. The hook definition has been removed. Signed-off-by: Casey Schaufler Reviewed-by: John Johansen [PM: subject tweak, selinux style fixes] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 3 +-- include/linux/lsm_hooks.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 22c475f530ae..4fcbc5b3f58c 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -373,8 +373,7 @@ LSM_HOOK(int, 0, mptcp_add_subflow, struct sock *sk, struct sock *ssk) LSM_HOOK(int, 0, ib_pkey_access, void *sec, u64 subnet_prefix, u16 pkey) LSM_HOOK(int, 0, ib_endport_manage_subnet, void *sec, const char *dev_name, u8 port_num) -LSM_HOOK(int, 0, ib_alloc_security, void **sec) -LSM_HOOK(void, LSM_RET_VOID, ib_free_security, void *sec) +LSM_HOOK(int, 0, ib_alloc_security, void *sec) #endif /* CONFIG_SECURITY_INFINIBAND */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 0ff14ff128c8..b6fc6ac88723 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -72,6 +72,7 @@ struct security_hook_list { struct lsm_blob_sizes { int lbs_cred; int lbs_file; + int lbs_ib; int lbs_inode; int lbs_sock; int lbs_superblock; -- cgit v1.2.3 From 61a1dcdceb44d79e5ab511295791b88ea178c045 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 10 Jul 2024 14:32:30 -0700 Subject: lsm: infrastructure management of the perf_event security blob Move management of the perf_event->security blob out of the individual security modules and into the security infrastructure. Instead of allocating the blobs from within the modules the modules tell the infrastructure how much space is required, and the space is allocated there. There are no longer any modules that require the perf_event_free() hook. The hook definition has been removed. Signed-off-by: Casey Schaufler Reviewed-by: John Johansen [PM: subject tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 1 - include/linux/lsm_hooks.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 4fcbc5b3f58c..12c81be78eb9 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -439,7 +439,6 @@ LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) #ifdef CONFIG_PERF_EVENTS LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type) LSM_HOOK(int, 0, perf_event_alloc, struct perf_event *event) -LSM_HOOK(void, LSM_RET_VOID, perf_event_free, struct perf_event *event) LSM_HOOK(int, 0, perf_event_read, struct perf_event *event) LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) #endif /* CONFIG_PERF_EVENTS */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index b6fc6ac88723..f1ca8082075a 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -79,6 +79,7 @@ struct lsm_blob_sizes { int lbs_ipc; int lbs_key; int lbs_msg_msg; + int lbs_perf_event; int lbs_task; int lbs_xattr_count; /* number of xattr slots in new_xattrs array */ int lbs_tun_dev; -- cgit v1.2.3 From 92de36080c93296ef9005690705cba260b9bd68a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jul 2024 08:34:39 -0700 Subject: bpf: Fail verification for sign-extension of packet data/data_end/data_meta syzbot reported a kernel crash due to commit 1f1e864b6555 ("bpf: Handle sign-extenstin ctx member accesses"). The reason is due to sign-extension of 32-bit load for packet data/data_end/data_meta uapi field. The original code looks like: r2 = *(s32 *)(r1 + 76) /* load __sk_buff->data */ r3 = *(u32 *)(r1 + 80) /* load __sk_buff->data_end */ r0 = r2 r0 += 8 if r3 > r0 goto +1 ... Note that __sk_buff->data load has 32-bit sign extension. After verification and convert_ctx_accesses(), the final asm code looks like: r2 = *(u64 *)(r1 +208) r2 = (s32)r2 r3 = *(u64 *)(r1 +80) r0 = r2 r0 += 8 if r3 > r0 goto pc+1 ... Note that 'r2 = (s32)r2' may make the kernel __sk_buff->data address invalid which may cause runtime failure. Currently, in C code, typically we have void *data = (void *)(long)skb->data; void *data_end = (void *)(long)skb->data_end; ... and it will generate r2 = *(u64 *)(r1 +208) r3 = *(u64 *)(r1 +80) r0 = r2 r0 += 8 if r3 > r0 goto pc+1 If we allow sign-extension, void *data = (void *)(long)(int)skb->data; void *data_end = (void *)(long)skb->data_end; ... the generated code looks like r2 = *(u64 *)(r1 +208) r2 <<= 32 r2 s>>= 32 r3 = *(u64 *)(r1 +80) r0 = r2 r0 += 8 if r3 > r0 goto pc+1 and this will cause verification failure since "r2 <<= 32" is not allowed as "r2" is a packet pointer. To fix this issue for case r2 = *(s32 *)(r1 + 76) /* load __sk_buff->data */ this patch added additional checking in is_valid_access() callback function for packet data/data_end/data_meta access. If those accesses are with sign-extenstion, the verification will fail. [1] https://lore.kernel.org/bpf/000000000000c90eee061d236d37@google.com/ Reported-by: syzbot+ad9ec60c8eaf69e6f99c@syzkaller.appspotmail.com Fixes: 1f1e864b6555 ("bpf: Handle sign-extenstin ctx member accesses") Acked-by: Eduard Zingerman Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20240723153439.2429035-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f16d0753f518..f560ea0c2b36 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -920,6 +920,7 @@ static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); */ struct bpf_insn_access_aux { enum bpf_reg_type reg_type; + bool is_ldsx; union { int ctx_field_size; struct { -- cgit v1.2.3 From 5b5f51bff1b66cedb62b5ba74a1878341204e057 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 22 Jul 2024 16:38:36 -0700 Subject: bpf: no_caller_saved_registers attribute for helper calls GCC and LLVM define a no_caller_saved_registers function attribute. This attribute means that function scratches only some of the caller saved registers defined by ABI. For BPF the set of such registers could be defined as follows: - R0 is scratched only if function is non-void; - R1-R5 are scratched only if corresponding parameter type is defined in the function prototype. This commit introduces flag bpf_func_prot->allow_nocsr. If this flag is set for some helper function, verifier assumes that it follows no_caller_saved_registers calling convention. The contract between kernel and clang allows to simultaneously use such functions and maintain backwards compatibility with old kernels that don't understand no_caller_saved_registers calls (nocsr for short): - clang generates a simple pattern for nocsr calls, e.g.: r1 = 1; r2 = 2; *(u64 *)(r10 - 8) = r1; *(u64 *)(r10 - 16) = r2; call %[to_be_inlined] r2 = *(u64 *)(r10 - 16); r1 = *(u64 *)(r10 - 8); r0 = r1; r0 += r2; exit; - kernel removes unnecessary spills and fills, if called function is inlined by verifier or current JIT (with assumption that patch inserted by verifier or JIT honors nocsr contract, e.g. does not scratch r3-r5 for the example above), e.g. the code above would be transformed to: r1 = 1; r2 = 2; call %[to_be_inlined] r0 = r1; r0 += r2; exit; Technically, the transformation is split into the following phases: - function mark_nocsr_patterns(), called from bpf_check() searches and marks potential patterns in instruction auxiliary data; - upon stack read or write access, function check_nocsr_stack_contract() is used to verify if stack offsets, presumably reserved for nocsr patterns, are used only from those patterns; - function remove_nocsr_spills_fills(), called from bpf_check(), applies the rewrite for valid patterns. See comment in mark_nocsr_pattern_for_call() for more details. Suggested-by: Alexei Starovoitov Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240722233844.1406874-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko --- include/linux/bpf.h | 6 ++++++ include/linux/bpf_verifier.h | 14 ++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f560ea0c2b36..b9425e410bcb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -808,6 +808,12 @@ struct bpf_func_proto { bool gpl_only; bool pkt_access; bool might_sleep; + /* set to true if helper follows contract for gcc/llvm + * attribute no_caller_saved_registers: + * - void functions do not scratch r0 + * - functions taking N arguments scratch only registers r1-rN + */ + bool allow_nocsr; enum bpf_return_type ret_type; union { struct { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 731a0a4ac13c..5cea15c81b8a 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -576,6 +576,14 @@ struct bpf_insn_aux_data { bool is_iter_next; /* bpf_iter__next() kfunc call */ bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */ u8 alu_state; /* used in combination with alu_limit */ + /* true if STX or LDX instruction is a part of a spill/fill + * pattern for a no_caller_saved_registers call. + */ + u8 nocsr_pattern:1; + /* for CALL instructions, a number of spill/fill pairs in the + * no_caller_saved_registers pattern. + */ + u8 nocsr_spills_num:3; /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ @@ -645,6 +653,10 @@ struct bpf_subprog_info { u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u16 stack_depth; /* max. stack depth used by this function */ u16 stack_extra; + /* offsets in range [stack_depth .. nocsr_stack_off) + * are used for no_caller_saved_registers spills and fills. + */ + s16 nocsr_stack_off; bool has_tail_call: 1; bool tail_call_reachable: 1; bool has_ld_abs: 1; @@ -652,6 +664,8 @@ struct bpf_subprog_info { bool is_async_cb: 1; bool is_exception_cb: 1; bool args_cached: 1; + /* true if nocsr stack region is used by functions that can't be inlined */ + bool keep_nocsr_stack: 1; u8 arg_cnt; struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS]; -- cgit v1.2.3 From 7ebd8c5acad5f8ca41f37b36dc62570e1fa13d8b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 18 Jul 2024 16:59:06 +0900 Subject: ata: libata: Use QUIRK instead of HORKAGE According to Wiktionary, the verb "hork" is computing slang defined as "To foul up; to be occupied with difficulty, tangle, or unpleasantness; to be broken" (https://en.wiktionary.org/wiki/hork#Verb). libata uses this with the term "horkage" to refer to broken device features. Given that this term is not widely used and its meaning unknown to many, rename it to the more commonly used term "quirk", similar to many other places in the kernel. The renaming done is: 1) Rename all ATA_HORKAGE_XXX flags to ATA_QUIRK_XXX 2) Rename struct ata_device horkage field to quirks 3) Rename struct ata_blacklist_entry to struct ata_dev_quirks_entry. The array of these structures defining quirks for known devices is renamed __ata_dev_quirks. 4) The functions ata_dev_blacklisted() and ata_force_horkage() are renamed to ata_dev_quirks() and ata_force_quirks() respectively. 5) All the force_horkage_xxx() macros are renamed to force_quirk_xxx() And while at it, make sure that the type "unsigned int" is used consistantly for quirk flags variables and data structure fields. Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Reviewed-by: Igor Pylypiv --- include/linux/libata.h | 71 +++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 17394098bee9..05dd7038ab30 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -362,40 +362,41 @@ enum { */ ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 8, - /* Horkage types. May be set by libata or controller on drives - (some horkage may be drive/controller pair dependent */ - - ATA_HORKAGE_DIAGNOSTIC = (1 << 0), /* Failed boot diag */ - ATA_HORKAGE_NODMA = (1 << 1), /* DMA problems */ - ATA_HORKAGE_NONCQ = (1 << 2), /* Don't use NCQ */ - ATA_HORKAGE_MAX_SEC_128 = (1 << 3), /* Limit max sects to 128 */ - ATA_HORKAGE_BROKEN_HPA = (1 << 4), /* Broken HPA */ - ATA_HORKAGE_DISABLE = (1 << 5), /* Disable it */ - ATA_HORKAGE_HPA_SIZE = (1 << 6), /* native size off by one */ - ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ - ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ - ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ - ATA_HORKAGE_ATAPI_MOD16_DMA = (1 << 11), /* use ATAPI DMA for commands - not multiple of 16 bytes */ - ATA_HORKAGE_FIRMWARE_WARN = (1 << 12), /* firmware update warning */ - ATA_HORKAGE_1_5_GBPS = (1 << 13), /* force 1.5 Gbps */ - ATA_HORKAGE_NOSETXFER = (1 << 14), /* skip SETXFER, SATA only */ - ATA_HORKAGE_BROKEN_FPDMA_AA = (1 << 15), /* skip AA */ - ATA_HORKAGE_DUMP_ID = (1 << 16), /* dump IDENTIFY data */ - ATA_HORKAGE_MAX_SEC_LBA48 = (1 << 17), /* Set max sects to 65535 */ - ATA_HORKAGE_ATAPI_DMADIR = (1 << 18), /* device requires dmadir */ - ATA_HORKAGE_NO_NCQ_TRIM = (1 << 19), /* don't use queued TRIM */ - ATA_HORKAGE_NOLPM = (1 << 20), /* don't use LPM */ - ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21), /* some WDs have broken LPM */ - ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ - ATA_HORKAGE_NO_DMA_LOG = (1 << 23), /* don't use DMA for log read */ - ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ - ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ - ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ - ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ - ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28), /* Identify device log missing */ - ATA_HORKAGE_NO_LOG_DIR = (1 << 29), /* Do not read log directory */ - ATA_HORKAGE_NO_FUA = (1 << 30), /* Do not use FUA */ + /* + * Quirk flags: may be set by libata or controller drivers on drives. + * Some quirks may be drive/controller pair dependent. + */ + ATA_QUIRK_DIAGNOSTIC = (1 << 0), /* Failed boot diag */ + ATA_QUIRK_NODMA = (1 << 1), /* DMA problems */ + ATA_QUIRK_NONCQ = (1 << 2), /* Do not use NCQ */ + ATA_QUIRK_MAX_SEC_128 = (1 << 3), /* Limit max sects to 128 */ + ATA_QUIRK_BROKEN_HPA = (1 << 4), /* Broken HPA */ + ATA_QUIRK_DISABLE = (1 << 5), /* Disable it */ + ATA_QUIRK_HPA_SIZE = (1 << 6), /* Native size off by one */ + ATA_QUIRK_IVB = (1 << 8), /* CBL det validity bit bugs */ + ATA_QUIRK_STUCK_ERR = (1 << 9), /* Stuck ERR on next PACKET */ + ATA_QUIRK_BRIDGE_OK = (1 << 10), /* No bridge limits */ + ATA_QUIRK_ATAPI_MOD16_DMA = (1 << 11), /* Use ATAPI DMA for commands */ + /* not multiple of 16 bytes */ + ATA_QUIRK_FIRMWARE_WARN = (1 << 12), /* Firmware update warning */ + ATA_QUIRK_1_5_GBPS = (1 << 13), /* Force 1.5 Gbps */ + ATA_QUIRK_NOSETXFER = (1 << 14), /* Skip SETXFER, SATA only */ + ATA_QUIRK_BROKEN_FPDMA_AA = (1 << 15), /* Skip AA */ + ATA_QUIRK_DUMP_ID = (1 << 16), /* Dump IDENTIFY data */ + ATA_QUIRK_MAX_SEC_LBA48 = (1 << 17), /* Set max sects to 65535 */ + ATA_QUIRK_ATAPI_DMADIR = (1 << 18), /* Device requires dmadir */ + ATA_QUIRK_NO_NCQ_TRIM = (1 << 19), /* Do not use queued TRIM */ + ATA_QUIRK_NOLPM = (1 << 20), /* Do not use LPM */ + ATA_QUIRK_WD_BROKEN_LPM = (1 << 21), /* Some WDs have broken LPM */ + ATA_QUIRK_ZERO_AFTER_TRIM = (1 << 22), /* Guarantees zero after trim */ + ATA_QUIRK_NO_DMA_LOG = (1 << 23), /* Do not use DMA for log read */ + ATA_QUIRK_NOTRIM = (1 << 24), /* Do not use TRIM */ + ATA_QUIRK_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ + ATA_QUIRK_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ + ATA_QUIRK_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ + ATA_QUIRK_NO_ID_DEV_LOG = (1 << 28), /* Identify device log missing */ + ATA_QUIRK_NO_LOG_DIR = (1 << 29), /* Do not read log directory */ + ATA_QUIRK_NO_FUA = (1 << 30), /* Do not use FUA */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ @@ -663,7 +664,7 @@ struct ata_cpr_log { struct ata_device { struct ata_link *link; unsigned int devno; /* 0 or 1 */ - unsigned int horkage; /* List of broken features */ + unsigned int quirks; /* List of broken features */ unsigned long flags; /* ATA_DFLAG_xxx */ struct scsi_device *sdev; /* attached SCSI device */ void *private_data; -- cgit v1.2.3 From 58157d607aecb4e05ab793408038b014c84e466f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 18 Jul 2024 16:54:03 +0900 Subject: ata: libata: Print quirks applied to devices Introduce the function ata_dev_print_quirks() to print the quirk flags that will be applied to a scanned device. This new function is called from ata_dev_quirks() when a match on a device model or device model and revision is found for a device in the __ata_dev_quirks array. To implement this function, the ATA_QUIRK_ flags are redefined using the new enum ata_quirk which defines the bit shift for each quirk flag. The array of strings ata_quirk_names is used to define the name of each flag, which are printed by ata_dev_print_quirks(). Example output for a device listed in the __ata_dev_quirks array and which has the ATA_QUIRK_DISABLE flag applied: [10193.461270] ata1: SATA link up 6.0 Gbps (SStatus 133 SControl 300) [10193.469190] ata1.00: Model 'ASMT109x- Config', rev '2143 5', applying quirks: disable [10193.469195] ata1.00: unsupported device, disabling [10193.481564] ata1.00: disable device enum ata_quirk also defines the __ATA_QUIRK_MAX value as one plus the last quirk flag defined. This value is used in ata_dev_quirks() to add a build time check that all quirk flags fit within the unsigned int (32-bits) quirks field of struct ata_device. Signed-off-by: Damien Le Moal Reviewed-by: Igor Pylypiv Reviewed-by: Niklas Cassel --- include/linux/libata.h | 106 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 05dd7038ab30..d598ef690e50 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -55,6 +55,46 @@ /* defines only for the constants which don't work well as enums */ #define ATA_TAG_POISON 0xfafbfcfdU +/* + * Quirk flags bits. + * ata_device->quirks is an unsigned int, so __ATA_QUIRK_MAX must not exceed 32. + */ +enum ata_quirks { + __ATA_QUIRK_DIAGNOSTIC, /* Failed boot diag */ + __ATA_QUIRK_NODMA, /* DMA problems */ + __ATA_QUIRK_NONCQ, /* Don't use NCQ */ + __ATA_QUIRK_MAX_SEC_128, /* Limit max sects to 128 */ + __ATA_QUIRK_BROKEN_HPA, /* Broken HPA */ + __ATA_QUIRK_DISABLE, /* Disable it */ + __ATA_QUIRK_HPA_SIZE, /* Native size off by one */ + __ATA_QUIRK_IVB, /* cbl det validity bit bugs */ + __ATA_QUIRK_STUCK_ERR, /* Stuck ERR on next PACKET */ + __ATA_QUIRK_BRIDGE_OK, /* No bridge limits */ + __ATA_QUIRK_ATAPI_MOD16_DMA, /* Use ATAPI DMA for commands that */ + /* are not a multiple of 16 bytes */ + __ATA_QUIRK_FIRMWARE_WARN, /* Firmware update warning */ + __ATA_QUIRK_1_5_GBPS, /* Force 1.5 Gbps */ + __ATA_QUIRK_NOSETXFER, /* Skip SETXFER, SATA only */ + __ATA_QUIRK_BROKEN_FPDMA_AA, /* Skip AA */ + __ATA_QUIRK_DUMP_ID, /* Dump IDENTIFY data */ + __ATA_QUIRK_MAX_SEC_LBA48, /* Set max sects to 65535 */ + __ATA_QUIRK_ATAPI_DMADIR, /* Device requires dmadir */ + __ATA_QUIRK_NO_NCQ_TRIM, /* Do not use queued TRIM */ + __ATA_QUIRK_NOLPM, /* Do not use LPM */ + __ATA_QUIRK_WD_BROKEN_LPM, /* Some WDs have broken LPM */ + __ATA_QUIRK_ZERO_AFTER_TRIM, /* Guarantees zero after trim */ + __ATA_QUIRK_NO_DMA_LOG, /* Do not use DMA for log read */ + __ATA_QUIRK_NOTRIM, /* Do not use TRIM */ + __ATA_QUIRK_MAX_SEC_1024, /* Limit max sects to 1024 */ + __ATA_QUIRK_MAX_TRIM_128M, /* Limit max trim size to 128M */ + __ATA_QUIRK_NO_NCQ_ON_ATI, /* Disable NCQ on ATI chipset */ + __ATA_QUIRK_NO_ID_DEV_LOG, /* Identify device log missing */ + __ATA_QUIRK_NO_LOG_DIR, /* Do not read log directory */ + __ATA_QUIRK_NO_FUA, /* Do not use FUA */ + + __ATA_QUIRK_MAX, +}; + enum { /* various global constants */ LIBATA_MAX_PRD = ATA_MAX_PRD / 2, @@ -366,40 +406,38 @@ enum { * Quirk flags: may be set by libata or controller drivers on drives. * Some quirks may be drive/controller pair dependent. */ - ATA_QUIRK_DIAGNOSTIC = (1 << 0), /* Failed boot diag */ - ATA_QUIRK_NODMA = (1 << 1), /* DMA problems */ - ATA_QUIRK_NONCQ = (1 << 2), /* Do not use NCQ */ - ATA_QUIRK_MAX_SEC_128 = (1 << 3), /* Limit max sects to 128 */ - ATA_QUIRK_BROKEN_HPA = (1 << 4), /* Broken HPA */ - ATA_QUIRK_DISABLE = (1 << 5), /* Disable it */ - ATA_QUIRK_HPA_SIZE = (1 << 6), /* Native size off by one */ - ATA_QUIRK_IVB = (1 << 8), /* CBL det validity bit bugs */ - ATA_QUIRK_STUCK_ERR = (1 << 9), /* Stuck ERR on next PACKET */ - ATA_QUIRK_BRIDGE_OK = (1 << 10), /* No bridge limits */ - ATA_QUIRK_ATAPI_MOD16_DMA = (1 << 11), /* Use ATAPI DMA for commands */ - /* not multiple of 16 bytes */ - ATA_QUIRK_FIRMWARE_WARN = (1 << 12), /* Firmware update warning */ - ATA_QUIRK_1_5_GBPS = (1 << 13), /* Force 1.5 Gbps */ - ATA_QUIRK_NOSETXFER = (1 << 14), /* Skip SETXFER, SATA only */ - ATA_QUIRK_BROKEN_FPDMA_AA = (1 << 15), /* Skip AA */ - ATA_QUIRK_DUMP_ID = (1 << 16), /* Dump IDENTIFY data */ - ATA_QUIRK_MAX_SEC_LBA48 = (1 << 17), /* Set max sects to 65535 */ - ATA_QUIRK_ATAPI_DMADIR = (1 << 18), /* Device requires dmadir */ - ATA_QUIRK_NO_NCQ_TRIM = (1 << 19), /* Do not use queued TRIM */ - ATA_QUIRK_NOLPM = (1 << 20), /* Do not use LPM */ - ATA_QUIRK_WD_BROKEN_LPM = (1 << 21), /* Some WDs have broken LPM */ - ATA_QUIRK_ZERO_AFTER_TRIM = (1 << 22), /* Guarantees zero after trim */ - ATA_QUIRK_NO_DMA_LOG = (1 << 23), /* Do not use DMA for log read */ - ATA_QUIRK_NOTRIM = (1 << 24), /* Do not use TRIM */ - ATA_QUIRK_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ - ATA_QUIRK_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ - ATA_QUIRK_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ - ATA_QUIRK_NO_ID_DEV_LOG = (1 << 28), /* Identify device log missing */ - ATA_QUIRK_NO_LOG_DIR = (1 << 29), /* Do not read log directory */ - ATA_QUIRK_NO_FUA = (1 << 30), /* Do not use FUA */ - - /* DMA mask for user DMA control: User visible values; DO NOT - renumber */ + ATA_QUIRK_DIAGNOSTIC = (1U << __ATA_QUIRK_DIAGNOSTIC), + ATA_QUIRK_NODMA = (1U << __ATA_QUIRK_NODMA), + ATA_QUIRK_NONCQ = (1U << __ATA_QUIRK_NONCQ), + ATA_QUIRK_MAX_SEC_128 = (1U << __ATA_QUIRK_MAX_SEC_128), + ATA_QUIRK_BROKEN_HPA = (1U << __ATA_QUIRK_BROKEN_HPA), + ATA_QUIRK_DISABLE = (1U << __ATA_QUIRK_DISABLE), + ATA_QUIRK_HPA_SIZE = (1U << __ATA_QUIRK_HPA_SIZE), + ATA_QUIRK_IVB = (1U << __ATA_QUIRK_IVB), + ATA_QUIRK_STUCK_ERR = (1U << __ATA_QUIRK_STUCK_ERR), + ATA_QUIRK_BRIDGE_OK = (1U << __ATA_QUIRK_BRIDGE_OK), + ATA_QUIRK_ATAPI_MOD16_DMA = (1U << __ATA_QUIRK_ATAPI_MOD16_DMA), + ATA_QUIRK_FIRMWARE_WARN = (1U << __ATA_QUIRK_FIRMWARE_WARN), + ATA_QUIRK_1_5_GBPS = (1U << __ATA_QUIRK_1_5_GBPS), + ATA_QUIRK_NOSETXFER = (1U << __ATA_QUIRK_NOSETXFER), + ATA_QUIRK_BROKEN_FPDMA_AA = (1U << __ATA_QUIRK_BROKEN_FPDMA_AA), + ATA_QUIRK_DUMP_ID = (1U << __ATA_QUIRK_DUMP_ID), + ATA_QUIRK_MAX_SEC_LBA48 = (1U << __ATA_QUIRK_MAX_SEC_LBA48), + ATA_QUIRK_ATAPI_DMADIR = (1U << __ATA_QUIRK_ATAPI_DMADIR), + ATA_QUIRK_NO_NCQ_TRIM = (1U << __ATA_QUIRK_NO_NCQ_TRIM), + ATA_QUIRK_NOLPM = (1U << __ATA_QUIRK_NOLPM), + ATA_QUIRK_WD_BROKEN_LPM = (1U << __ATA_QUIRK_WD_BROKEN_LPM), + ATA_QUIRK_ZERO_AFTER_TRIM = (1U << __ATA_QUIRK_ZERO_AFTER_TRIM), + ATA_QUIRK_NO_DMA_LOG = (1U << __ATA_QUIRK_NO_DMA_LOG), + ATA_QUIRK_NOTRIM = (1U << __ATA_QUIRK_NOTRIM), + ATA_QUIRK_MAX_SEC_1024 = (1U << __ATA_QUIRK_MAX_SEC_1024), + ATA_QUIRK_MAX_TRIM_128M = (1U << __ATA_QUIRK_MAX_TRIM_128M), + ATA_QUIRK_NO_NCQ_ON_ATI = (1U << __ATA_QUIRK_NO_NCQ_ON_ATI), + ATA_QUIRK_NO_ID_DEV_LOG = (1U << __ATA_QUIRK_NO_ID_DEV_LOG), + ATA_QUIRK_NO_LOG_DIR = (1U << __ATA_QUIRK_NO_LOG_DIR), + ATA_QUIRK_NO_FUA = (1U << __ATA_QUIRK_NO_FUA), + + /* User visible DMA mask for DMA control. DO NOT renumber. */ ATA_DMA_MASK_ATA = (1 << 0), /* DMA on ATA Disk */ ATA_DMA_MASK_ATAPI = (1 << 1), /* DMA on ATAPI */ ATA_DMA_MASK_CFA = (1 << 2), /* DMA on CF Card */ -- cgit v1.2.3 From 6774e90f3146818c284b805b16ecdc144dda1c60 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2024 14:05:01 -0700 Subject: of: Add test managed wrappers for of_overlay_apply()/of_node_put() Add test managed wrappers for of_overlay_apply() that automatically removes the overlay when the test is finished. This API is intended for use by KUnit tests that test code which relies on 'struct device_node's and of_*() APIs. KUnit tests will call of_overlay_apply_kunit() to load an overlay that's been built into the kernel image. When the test is complete, the overlay will be removed. This has a few benefits: 1) It keeps the tests hermetic because the overlay is removed when the test is complete. Tests won't even be aware that an overlay was loaded in another test. 2) The overlay code can live right next to the unit test that loads it. The overlay and the unit test can be compiled into one kernel module if desired. 3) We can test different device tree configurations by loading different overlays. The overlays can be written for a specific test, and there can be many of them loaded per-test without needing to jam all possible combinations into one DTB. 4) It also allows KUnit to test device tree dependent code on any architecture, not just UML. This allows KUnit tests to test architecture specific device tree code. There are some potential pitfalls though. Test authors need to be careful to not overwrite properties in the live tree. The easiest way to do this is to add and remove nodes with a 'kunit-' prefix, almost guaranteeing that the same node won't be present in the tree loaded at boot. Suggested-by: Rob Herring Cc: Rob Herring Cc: Saravana Kannan Reviewed-by: Rob Herring (Arm) Reviewed-by: David Gow Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240718210513.3801024-3-sboyd@kernel.org --- include/kunit/of.h | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 include/kunit/of.h (limited to 'include') diff --git a/include/kunit/of.h b/include/kunit/of.h new file mode 100644 index 000000000000..48d4e70c9666 --- /dev/null +++ b/include/kunit/of.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KUNIT_OF_H +#define _KUNIT_OF_H + +#include + +struct device_node; + +#ifdef CONFIG_OF + +void of_node_put_kunit(struct kunit *test, struct device_node *node); + +#else + +static inline +void of_node_put_kunit(struct kunit *test, struct device_node *node) +{ + kunit_skip(test, "requires CONFIG_OF"); +} + +#endif /* !CONFIG_OF */ + +#if defined(CONFIG_OF) && defined(CONFIG_OF_OVERLAY) && defined(CONFIG_OF_EARLY_FLATTREE) + +int of_overlay_fdt_apply_kunit(struct kunit *test, void *overlay_fdt, + u32 overlay_fdt_size, int *ovcs_id); +#else + +static inline int +of_overlay_fdt_apply_kunit(struct kunit *test, void *overlay_fdt, + u32 overlay_fdt_size, int *ovcs_id) +{ + kunit_skip(test, "requires CONFIG_OF and CONFIG_OF_OVERLAY and CONFIG_OF_EARLY_FLATTREE for root node"); + return -EINVAL; +} + +#endif + +/** + * __of_overlay_apply_kunit() - Test managed of_overlay_fdt_apply() variant + * @test: test context + * @overlay_begin: start address of overlay to apply + * @overlay_end: end address of overlay to apply + * + * This is mostly internal API. See of_overlay_apply_kunit() for the wrapper + * that makes this easier to use. + * + * Similar to of_overlay_fdt_apply(), except the overlay is managed by the test + * case and is automatically removed with of_overlay_remove() after the test + * case concludes. + * + * Return: 0 on success, negative errno on failure + */ +static inline int __of_overlay_apply_kunit(struct kunit *test, + u8 *overlay_begin, + const u8 *overlay_end) +{ + int unused; + + return of_overlay_fdt_apply_kunit(test, overlay_begin, + overlay_end - overlay_begin, + &unused); +} + +/** + * of_overlay_apply_kunit() - Test managed of_overlay_fdt_apply() for built-in overlays + * @test: test context + * @overlay_name: name of overlay to apply + * + * This macro is used to apply a device tree overlay built with the + * cmd_dt_S_dtbo rule in scripts/Makefile.lib that has been compiled into the + * kernel image or KUnit test module. The overlay is automatically removed when + * the test is finished. + * + * Unit tests that need device tree nodes should compile an overlay file with + * @overlay_name\.dtbo.o in their Makefile along with their unit test and then + * load the overlay during their test. The @overlay_name matches the filename + * of the overlay without the dtbo filename extension. If CONFIG_OF_OVERLAY is + * not enabled, the @test will be skipped. + * + * In the Makefile + * + * .. code-block:: none + * + * obj-$(CONFIG_OF_OVERLAY_KUNIT_TEST) += overlay_test.o kunit_overlay_test.dtbo.o + * + * In the test + * + * .. code-block:: c + * + * static void of_overlay_kunit_of_overlay_apply(struct kunit *test) + * { + * struct device_node *np; + * + * KUNIT_ASSERT_EQ(test, 0, + * of_overlay_apply_kunit(test, kunit_overlay_test)); + * + * np = of_find_node_by_name(NULL, "test-kunit"); + * KUNIT_EXPECT_NOT_ERR_OR_NULL(test, np); + * of_node_put(np); + * } + * + * Return: 0 on success, negative errno on failure. + */ +#define of_overlay_apply_kunit(test, overlay_name) \ +({ \ + extern uint8_t __dtbo_##overlay_name##_begin[]; \ + extern uint8_t __dtbo_##overlay_name##_end[]; \ + \ + __of_overlay_apply_kunit((test), \ + __dtbo_##overlay_name##_begin, \ + __dtbo_##overlay_name##_end); \ +}) + +#endif -- cgit v1.2.3 From 5ac79730324c6f37106ce397586020ffe6e8e234 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2024 14:05:04 -0700 Subject: platform: Add test managed platform_device/driver APIs Introduce KUnit resource wrappers around platform_driver_register(), platform_device_alloc(), and platform_device_add() so that test authors can register platform drivers/devices from their tests and have the drivers/devices automatically be unregistered when the test is done. This makes test setup code simpler when a platform driver or platform device is needed. Add a few test cases at the same time to make sure the APIs work as intended. Cc: Brendan Higgins Reviewed-by: David Gow Cc: Rae Moar Reviewed-by: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240718210513.3801024-6-sboyd@kernel.org --- include/kunit/platform_device.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/kunit/platform_device.h (limited to 'include') diff --git a/include/kunit/platform_device.h b/include/kunit/platform_device.h new file mode 100644 index 000000000000..0fc0999d2420 --- /dev/null +++ b/include/kunit/platform_device.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KUNIT_PLATFORM_DRIVER_H +#define _KUNIT_PLATFORM_DRIVER_H + +struct kunit; +struct platform_device; +struct platform_driver; + +struct platform_device * +kunit_platform_device_alloc(struct kunit *test, const char *name, int id); +int kunit_platform_device_add(struct kunit *test, struct platform_device *pdev); + +int kunit_platform_device_prepare_wait_for_probe(struct kunit *test, + struct platform_device *pdev, + struct completion *x); + +int kunit_platform_driver_register(struct kunit *test, + struct platform_driver *drv); + +#endif -- cgit v1.2.3 From d690bd11e87adfc684265209a5aabd1f58fa367e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2024 14:05:05 -0700 Subject: clk: Add test managed clk provider/consumer APIs Unit tests are more ergonomic and simpler to understand if they don't have to hoist a bunch of code into the test harness init and exit functions. Add some test managed wrappers for the clk APIs so that clk unit tests can write more code in the actual test and less code in the harness. Only add APIs that are used for now. More wrappers can be added in the future as necessary. Cc: Brendan Higgins Cc: David Gow Cc: Rae Moar Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240718210513.3801024-7-sboyd@kernel.org --- include/kunit/clk.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/kunit/clk.h (limited to 'include') diff --git a/include/kunit/clk.h b/include/kunit/clk.h new file mode 100644 index 000000000000..73bc99cefe7b --- /dev/null +++ b/include/kunit/clk.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _CLK_KUNIT_H +#define _CLK_KUNIT_H + +struct clk; +struct clk_hw; +struct device; +struct device_node; +struct kunit; + +struct clk * +clk_get_kunit(struct kunit *test, struct device *dev, const char *con_id); +struct clk * +of_clk_get_kunit(struct kunit *test, struct device_node *np, int index); + +struct clk * +clk_hw_get_clk_kunit(struct kunit *test, struct clk_hw *hw, const char *con_id); +struct clk * +clk_hw_get_clk_prepared_enabled_kunit(struct kunit *test, struct clk_hw *hw, + const char *con_id); + +int clk_prepare_enable_kunit(struct kunit *test, struct clk *clk); + +int clk_hw_register_kunit(struct kunit *test, struct device *dev, struct clk_hw *hw); +int of_clk_hw_register_kunit(struct kunit *test, struct device_node *node, + struct clk_hw *hw); + +#endif -- cgit v1.2.3 From 4e9652003bc39032c3ab79e607aa3d1913c7cf57 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 26 Jul 2024 17:28:15 +0200 Subject: ALSA: control: Annotate snd_kcontrol with __counted_by() struct snd_kcontrol contains a flex array of snd_kcontrol_volatile objects at its end, and the array size is stored in count field. This can be annotated gracefully with __counted_by() for catching possible array overflows. One additional change is the order of the count field initialization; The assignment of the count field is moved before assignment of vd[] elements for avoiding false-positive warnings from compilers. Link: https://patch.msgid.link/20240726152840.8629-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/control.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/control.h b/include/sound/control.h index c1659036c4a7..13511c50825f 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -81,7 +81,7 @@ struct snd_kcontrol { unsigned long private_value; void *private_data; void (*private_free)(struct snd_kcontrol *kcontrol); - struct snd_kcontrol_volatile vd[]; /* volatile data */ + struct snd_kcontrol_volatile vd[] __counted_by(count); /* volatile data */ }; #define snd_kcontrol(n) list_entry(n, struct snd_kcontrol, list) -- cgit v1.2.3 From 0642a3c5cacc0321c755d45ae48f2c84475469a6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 29 Jul 2024 16:13:14 +0200 Subject: ALSA: ump: Update substream name from assigned FB names We had a nice name scheme in ALSA sequencer UMP binding for each sequencer port referring to each assigned Function Block name, while the legacy rawmidi refers only to the UMP Endpoint name. It's better to align both. This patch moves the UMP Group attribute update functions into the core UMP code from the sequencer binding code, and improve the substream name of the legacy rawmidi. Link: https://patch.msgid.link/20240729141315.18253-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/ump.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/sound/ump.h b/include/sound/ump.h index 91238dabe307..7f68056acdff 100644 --- a/include/sound/ump.h +++ b/include/sound/ump.h @@ -13,6 +13,14 @@ struct snd_ump_ops; struct ump_cvt_to_ump; struct snd_seq_ump_ops; +struct snd_ump_group { + int group; /* group index (0-based) */ + unsigned int dir_bits; /* directions */ + bool active; /* activeness */ + bool valid; /* valid group (referred by blocks) */ + char name[64]; /* group name */ +}; + struct snd_ump_endpoint { struct snd_rawmidi core; /* raw UMP access */ @@ -41,6 +49,8 @@ struct snd_ump_endpoint { struct mutex open_mutex; + struct snd_ump_group groups[SNDRV_UMP_MAX_GROUPS]; /* table of groups */ + #if IS_ENABLED(CONFIG_SND_UMP_LEGACY_RAWMIDI) spinlock_t legacy_locks[2]; struct snd_rawmidi *legacy_rmidi; -- cgit v1.2.3 From b6a66e521a2032f7fcba2af5a9bcbaeaa19b7ca3 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Sat, 27 Jul 2024 12:01:23 +0200 Subject: mptcp: sched: check both directions for backup The 'mptcp_subflow_context' structure has two items related to the backup flags: - 'backup': the subflow has been marked as backup by the other peer - 'request_bkup': the backup flag has been set by the host Before this patch, the scheduler was only looking at the 'backup' flag. That can make sense in some cases, but it looks like that's not what we wanted for the general use, because either the path-manager was setting both of them when sending an MP_PRIO, or the receiver was duplicating the 'backup' flag in the subflow request. Note that the use of these two flags in the path-manager are going to be fixed in the next commits, but this change here is needed not to modify the behaviour. Fixes: f296234c98a8 ("mptcp: Add handling of incoming MP_JOIN requests") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- include/trace/events/mptcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index 09e72215b9f9..085b749cdd97 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -34,7 +34,7 @@ TRACE_EVENT(mptcp_subflow_get_send, struct sock *ssk; __entry->active = mptcp_subflow_active(subflow); - __entry->backup = subflow->backup; + __entry->backup = subflow->backup || subflow->request_bkup; if (subflow->tcp_sock && sk_fullsock(subflow->tcp_sock)) __entry->free = sk_stream_memory_free(subflow->tcp_sock); -- cgit v1.2.3 From 22f5468731491e53356ba7c028f0fdea20b18e2c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 30 Jul 2024 10:36:47 -0700 Subject: minmax: improve macro expansion and type checking This clarifies the rules for min()/max()/clamp() type checking and makes them a much more efficient macro expansion. In particular, we now look at the type and range of the inputs to see whether they work together, generating a mask of acceptable comparisons, and then just verifying that the inputs have a shared case: - an expression with a signed type can be used for (1) signed comparisons (2) unsigned comparisons if it is statically known to have a non-negative value - an expression with an unsigned type can be used for (3) unsigned comparison (4) signed comparisons if the type is smaller than 'int' and thus the C integer promotion rules will make it signed anyway Here rule (1) and (3) are obvious, and rule (2) is important in order to allow obvious trivial constants to be used together with unsigned values. Rule (4) is not necessarily a good idea, but matches what we used to do, and we have extant cases of this situation in the kernel. Notably with bcachefs having an expression like min(bch2_bucket_sectors_dirty(a), ca->mi.bucket_size) where bch2_bucket_sectors_dirty() returns an 's64', and 'ca->mi.bucket_size' is of type 'u16'. Technically that bcachefs comparison is clearly sensible on a C type level, because the 'u16' will go through the normal C integer promotion, and become 'int', and then we're comparing two signed values and everything looks sane. However, it's not entirely clear that a 'min(s64,u16)' operation makes a lot of conceptual sense, and it's possible that we will remove rule (4). After all, the _reason_ we have these complicated type checks is exactly that the C type promotion rules are not very intuitive. But at least for now the rule is in place for backwards compatibility. Also note that rule (2) existed before, but is hugely relaxed by this commit. It used to be true only for the simplest compile-time non-negative integer constants. The new macro model will allow cases where the compiler can trivially see that an expression is non-negative even if it isn't necessarily a constant. For example, the amdgpu driver does min_t(size_t, sizeof(fru_info->serial), pia[addr] & 0x3F)); because our old 'min()' macro would see that 'pia[addr] & 0x3F' is of type 'int' and clearly not a C constant expression, so doing a 'min()' with a 'size_t' is a signedness violation. Our new 'min()' macro still sees that 'pia[addr] & 0x3F' is of type 'int', but is smart enough to also see that it is clearly non-negative, and thus would allow that case without any complaints. Cc: Arnd Bergmann Cc: David Laight Cc: Lorenzo Stoakes Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 9 ++++++ include/linux/minmax.h | 74 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 68 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 2594553bb30b..2df665fa2964 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -296,6 +296,15 @@ static inline void *offset_to_ptr(const int *off) #define is_signed_type(type) (((type)(-1)) < (__force type)1) #define is_unsigned_type(type) (!is_signed_type(type)) +/* + * Useful shorthand for "is this condition known at compile-time?" + * + * Note that the condition may involve non-constant values, + * but the compiler may know enough about the details of the + * values to determine that the condition is statically true. + */ +#define statically_true(x) (__builtin_constant_p(x) && (x)) + /* * This is needed in functions which generate the stack canary, see * arch/x86/kernel/smpboot.c::start_secondary() for an example. diff --git a/include/linux/minmax.h b/include/linux/minmax.h index e3e4353df983..41da6f85a407 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -26,19 +26,63 @@ #define __typecheck(x, y) \ (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) -/* is_signed_type() isn't a constexpr for pointer types */ -#define __is_signed(x) \ - __builtin_choose_expr(__is_constexpr(is_signed_type(typeof(x))), \ - is_signed_type(typeof(x)), 0) +/* + * __sign_use for integer expressions: + * bit #0 set if ok for unsigned comparisons + * bit #1 set if ok for signed comparisons + * + * In particular, statically non-negative signed integer + * expressions are ok for both. + * + * NOTE! Unsigned types smaller than 'int' are implicitly + * converted to 'int' in expressions, and are accepted for + * signed conversions for now. This is debatable. + * + * Note that 'x' is the original expression, and 'ux' is + * the unique variable that contains the value. + * + * We use 'ux' for pure type checking, and 'x' for when + * we need to look at the value (but without evaluating + * it for side effects! Careful to only ever evaluate it + * with sizeof() or __builtin_constant_p() etc). + * + * Pointers end up being checked by the normal C type + * rules at the actual comparison, and these expressions + * only need to be careful to not cause warnings for + * pointer use. + */ +#define __signed_type_use(x,ux) (2+__is_nonneg(x,ux)) +#define __unsigned_type_use(x,ux) (1+2*(sizeof(ux)<4)) +#define __sign_use(x,ux) (is_signed_type(typeof(ux))? \ + __signed_type_use(x,ux):__unsigned_type_use(x,ux)) + +/* + * To avoid warnings about casting pointers to integers + * of different sizes, we need that special sign type. + * + * On 64-bit we can just always use 'long', since any + * integer or pointer type can just be cast to that. + * + * This does not work for 128-bit signed integers since + * the cast would truncate them, but we do not use s128 + * types in the kernel (we do use 'u128', but they will + * be handled by the !is_signed_type() case). + * + * NOTE! The cast is there only to avoid any warnings + * from when values that aren't signed integer types. + */ +#ifdef CONFIG_64BIT + #define __signed_type(ux) long +#else + #define __signed_type(ux) typeof(__builtin_choose_expr(sizeof(ux)>4,1LL,1L)) +#endif +#define __is_nonneg(x,ux) statically_true((__signed_type(ux))(x)>=0) -/* True for a non-negative signed int constant */ -#define __is_noneg_int(x) \ - (__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0) +#define __types_ok(x,y,ux,uy) \ + (__sign_use(x,ux) & __sign_use(y,uy)) -#define __types_ok(x, y, ux, uy) \ - (__is_signed(ux) == __is_signed(uy) || \ - __is_signed((ux) + 0) == __is_signed((uy) + 0) || \ - __is_noneg_int(x) || __is_noneg_int(y)) +#define __types_ok3(x,y,z,ux,uy,uz) \ + (__sign_use(x,ux) & __sign_use(y,uy) & __sign_use(z,uz)) #define __cmp_op_min < #define __cmp_op_max > @@ -53,8 +97,8 @@ #define __careful_cmp_once(op, x, y, ux, uy) ({ \ __auto_type ux = (x); __auto_type uy = (y); \ - static_assert(__types_ok(x, y, ux, uy), \ - #op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \ + BUILD_BUG_ON_MSG(!__types_ok(x,y,ux,uy), \ + #op"("#x", "#y") signedness error"); \ __cmp(op, ux, uy); }) #define __careful_cmp(op, x, y) \ @@ -70,8 +114,8 @@ static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \ (lo) <= (hi), true), \ "clamp() low limit " #lo " greater than high limit " #hi); \ - static_assert(__types_ok(uval, lo, uval, ulo), "clamp() 'lo' signedness error"); \ - static_assert(__types_ok(uval, hi, uval, uhi), "clamp() 'hi' signedness error"); \ + BUILD_BUG_ON_MSG(!__types_ok3(val,lo,hi,uval,ulo,uhi), \ + "clamp("#val", "#lo", "#hi") signedness error"); \ __clamp(uval, ulo, uhi); }) #define __careful_clamp(val, lo, hi) \ -- cgit v1.2.3 From f2881dfdaaa9ec873dbd383ef5512fc31e576cbb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 29 Jul 2024 11:26:34 +0200 Subject: drm/xe/oa/uapi: Make bit masks unsigned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with gcc-5: In function ‘decode_oa_format.isra.26’, inlined from ‘xe_oa_set_prop_oa_format’ at drivers/gpu/drm/xe/xe_oa.c:1664:6: ././include/linux/compiler_types.h:510:38: error: call to ‘__compiletime_assert_1336’ declared with attribute error: FIELD_GET: mask is not constant [...] ./include/linux/bitfield.h:155:3: note: in expansion of macro ‘__BF_FIELD_CHECK’ __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ ^ drivers/gpu/drm/xe/xe_oa.c:1573:18: note: in expansion of macro ‘FIELD_GET’ u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); ^ Fixes: b6fd51c62119 ("drm/xe/oa/uapi: Define and parse OA stream properties") Signed-off-by: Geert Uytterhoeven Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240729092634.2227611-1-geert+renesas@glider.be Signed-off-by: Lucas De Marchi --- include/uapi/drm/xe_drm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 29425d7fdc77..b6fbe4988f2e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1598,10 +1598,10 @@ enum drm_xe_oa_property_id { * b. Counter select c. Counter size and d. BC report. Also refer to the * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. */ -#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0) -#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8) -#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16) -#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24) +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xffu << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xffu << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xffu << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xffu << 24) /** * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit -- cgit v1.2.3 From 1cbe8143fd2f588031a5f157d15ae15ce12215e2 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 30 Jul 2024 13:37:33 +0800 Subject: bpf: kprobe: Remove unused declaring of bpf_kprobe_override After the commit 66665ad2f102 ("tracing/kprobe: bpf: Compare instruction pointer with original one"), "bpf_kprobe_override" is not used anywhere anymore, and we can remove it now. Fixes: 66665ad2f102 ("tracing/kprobe: bpf: Compare instruction pointer with original one") Signed-off-by: Menglong Dong Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240730053733.885785-1-dongml2@chinatelecom.cn --- include/linux/trace_events.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 9df3e2973626..9435185c10ef 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -880,7 +880,6 @@ do { \ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); -DECLARE_PER_CPU(int, bpf_kprobe_override); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); -- cgit v1.2.3 From c149c4a48b19afbf0c383614e57b452d39b154de Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Sat, 13 Jul 2024 08:59:16 +0000 Subject: cgroup/cpuset: Remove cpuset_slab_spread_rotor Since the SLAB implementation was removed in v6.8, so the cpuset_slab_spread_rotor is no longer used and can be removed. Signed-off-by: Xiu Jianfeng Reviewed-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cpuset.h | 6 ------ include/linux/sched.h | 1 - 2 files changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index de4cf0ee96f7..2a6981eeebf8 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -113,7 +113,6 @@ extern int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); extern int cpuset_mem_spread_node(void); -extern int cpuset_slab_spread_node(void); static inline int cpuset_do_page_mem_spread(void) { @@ -246,11 +245,6 @@ static inline int cpuset_mem_spread_node(void) return 0; } -static inline int cpuset_slab_spread_node(void) -{ - return 0; -} - static inline int cpuset_do_page_mem_spread(void) { return 0; diff --git a/include/linux/sched.h b/include/linux/sched.h index f8d150343d42..3773c1c8f099 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1243,7 +1243,6 @@ struct task_struct { /* Sequence number to catch updates: */ seqcount_spinlock_t mems_allowed_seq; int cpuset_mem_spread_rotor; - int cpuset_slab_spread_rotor; #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock: */ -- cgit v1.2.3 From 93c8332c8373fee415bd79f08d5ba4ba7ca5ad15 Mon Sep 17 00:00:00 2001 From: Xavier Date: Thu, 4 Jul 2024 14:24:43 +0800 Subject: Union-Find: add a new module in kernel library This patch implements a union-find data structure in the kernel library, which includes operations for allocating nodes, freeing nodes, finding the root of a node, and merging two nodes. Signed-off-by: Xavier Signed-off-by: Tejun Heo --- include/linux/union_find.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/linux/union_find.h (limited to 'include') diff --git a/include/linux/union_find.h b/include/linux/union_find.h new file mode 100644 index 000000000000..cfd49263c138 --- /dev/null +++ b/include/linux/union_find.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_UNION_FIND_H +#define __LINUX_UNION_FIND_H +/** + * union_find.h - union-find data structure implementation + * + * This header provides functions and structures to implement the union-find + * data structure. The union-find data structure is used to manage disjoint + * sets and supports efficient union and find operations. + * + * See Documentation/core-api/union_find.rst for documentation and samples. + */ + +struct uf_node { + struct uf_node *parent; + unsigned int rank; +}; + +/* This macro is used for static initialization of a union-find node. */ +#define UF_INIT_NODE(node) {.parent = &node, .rank = 0} + +/** + * uf_node_init - Initialize a union-find node + * @node: pointer to the union-find node to be initialized + * + * This function sets the parent of the node to itself and + * initializes its rank to 0. + */ +static inline void uf_node_init(struct uf_node *node) +{ + node->parent = node; + node->rank = 0; +} + +/* find the root of a node */ +struct uf_node *uf_find(struct uf_node *node); + +/* Merge two intersecting nodes */ +void uf_union(struct uf_node *node1, struct uf_node *node2); + +#endif /* __LINUX_UNION_FIND_H */ -- cgit v1.2.3 From 89add40066f9ed9abe5f7f886fe5789ff7e0c50e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 29 Jul 2024 16:10:12 -0400 Subject: net: drop bad gso csum_start and offset in virtio_net_hdr Tighten csum_start and csum_offset checks in virtio_net_hdr_to_skb for GSO packets. The function already checks that a checksum requested with VIRTIO_NET_HDR_F_NEEDS_CSUM is in skb linear. But for GSO packets this might not hold for segs after segmentation. Syzkaller demonstrated to reach this warning in skb_checksum_help offset = skb_checksum_start_offset(skb); ret = -EINVAL; if (WARN_ON_ONCE(offset >= skb_headlen(skb))) By injecting a TSO packet: WARNING: CPU: 1 PID: 3539 at net/core/dev.c:3284 skb_checksum_help+0x3d0/0x5b0 ip_do_fragment+0x209/0x1b20 net/ipv4/ip_output.c:774 ip_finish_output_gso net/ipv4/ip_output.c:279 [inline] __ip_finish_output+0x2bd/0x4b0 net/ipv4/ip_output.c:301 iptunnel_xmit+0x50c/0x930 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x2296/0x2c70 net/ipv4/ip_tunnel.c:813 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x759/0xa60 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4850 [inline] netdev_start_xmit include/linux/netdevice.h:4864 [inline] xmit_one net/core/dev.c:3595 [inline] dev_hard_start_xmit+0x261/0x8c0 net/core/dev.c:3611 __dev_queue_xmit+0x1b97/0x3c90 net/core/dev.c:4261 packet_snd net/packet/af_packet.c:3073 [inline] The geometry of the bad input packet at tcp_gso_segment: [ 52.003050][ T8403] skb len=12202 headroom=244 headlen=12093 tailroom=0 [ 52.003050][ T8403] mac=(168,24) mac_len=24 net=(192,52) trans=244 [ 52.003050][ T8403] shinfo(txflags=0 nr_frags=1 gso(size=1552 type=3 segs=0)) [ 52.003050][ T8403] csum(0x60000c7 start=199 offset=1536 ip_summed=3 complete_sw=0 valid=0 level=0) Mitigate with stricter input validation. csum_offset: for GSO packets, deduce the correct value from gso_type. This is already done for USO. Extend it to TSO. Let UFO be: udp[46]_ufo_fragment ignores these fields and always computes the checksum in software. csum_start: finding the real offset requires parsing to the transport header. Do not add a parser, use existing segmentation parsing. Thanks to SKB_GSO_DODGY, that also catches bad packets that are hw offloaded. Again test both TSO and USO. Do not test UFO for the above reason, and do not test UDP tunnel offload. GSO packet are almost always CHECKSUM_PARTIAL. USO packets may be CHECKSUM_NONE since commit 10154dbded6d6 ("udp: Allow GSO transmit from devices with no checksum offload"), but then still these fields are initialized correctly in udp4_hwcsum/udp6_hwcsum_outgoing. So no need to test for ip_summed == CHECKSUM_PARTIAL first. This revises an existing fix mentioned in the Fixes tag, which broke small packets with GSO offload, as detected by kselftests. Link: https://syzkaller.appspot.com/bug?extid=e1db31216c789f552871 Link: https://lore.kernel.org/netdev/20240723223109.2196886-1-kuba@kernel.org Fixes: e269d79c7d35 ("net: missing check virtio") Cc: stable@vger.kernel.org Signed-off-by: Willem de Bruijn Link: https://patch.msgid.link/20240729201108.1615114-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/virtio_net.h | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index d1d7825318c3..6c395a2600e8 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -56,7 +56,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, unsigned int thlen = 0; unsigned int p_off = 0; unsigned int ip_proto; - u64 ret, remainder, gso_size; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -99,16 +98,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); - if (hdr->gso_size) { - gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); - ret = div64_u64_rem(skb->len, gso_size, &remainder); - if (!(ret && (hdr->gso_size > needed) && - ((remainder > needed) || (remainder == 0)))) { - return -EINVAL; - } - skb_shinfo(skb)->tx_flags |= SKBFL_SHARED_FRAG; - } - if (!pskb_may_pull(skb, needed)) return -EINVAL; @@ -182,6 +171,11 @@ retry: if (gso_type != SKB_GSO_UDP_L4) return -EINVAL; break; + case SKB_GSO_TCPV4: + case SKB_GSO_TCPV6: + if (skb->csum_offset != offsetof(struct tcphdr, check)) + return -EINVAL; + break; } /* Kernel has a special handling for GSO_BY_FRAGS. */ -- cgit v1.2.3 From 6dfbafd8a1d51a52a623d912a2219e9982020854 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 12 Jul 2024 16:08:00 +0200 Subject: soundwire: bus: drop unused driver name field The soundwire driver name field is not currently used by any driver (and even appears to never have been used) so drop it. Signed-off-by: Johan Hovold Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20240712140801.24267-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 94fc1b57c57b..5e0dd47a0412 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -704,8 +704,6 @@ struct sdw_master_device { container_of(d, struct sdw_master_device, dev) struct sdw_driver { - const char *name; - int (*probe)(struct sdw_slave *sdw, const struct sdw_device_id *id); int (*remove)(struct sdw_slave *sdw); -- cgit v1.2.3 From ba5c778cab1dd3e4918f940989e771e2818afee8 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sun, 30 Jun 2024 22:33:44 +0100 Subject: media: rc: remove unused tx_resolution field The tx_resolution field is never read. In theory you can imagine this field being useful for detecting whether the transmitter has the resolution for the message you are trying to send, but I am not aware of any hardware where this could be an issue. Just remove. Signed-off-by: Sean Young Signed-off-by: Hans Verkuil --- include/media/rc-core.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/media/rc-core.h b/include/media/rc-core.h index 803349599c27..d095908073ef 100644 --- a/include/media/rc-core.h +++ b/include/media/rc-core.h @@ -127,7 +127,6 @@ struct lirc_fh { * @min_timeout: minimum timeout supported by device * @max_timeout: maximum timeout supported by device * @rx_resolution : resolution (in us) of input sampler - * @tx_resolution: resolution (in us) of output sampler * @lirc_dev: lirc device * @lirc_cdev: lirc char cdev * @gap_start: start time for gap after timeout if non-zero @@ -194,7 +193,6 @@ struct rc_dev { u32 min_timeout; u32 max_timeout; u32 rx_resolution; - u32 tx_resolution; #ifdef CONFIG_LIRC struct device lirc_dev; struct cdev lirc_cdev; -- cgit v1.2.3 From a755947e050b8751fc5402609a7d600e9c756fa7 Mon Sep 17 00:00:00 2001 From: Mitul Golani Date: Mon, 15 Jul 2024 21:55:13 +0530 Subject: drm/dp: Describe target_rr_divider in struct drm_dp_as_sdp Describe newly added parameter target_rr_divider in struct drm_dp_as_sdp. -v2: Remove extra line from commit message.(Lucas) -v3: Rebase. Fixes: a20c6d954d75 ("drm/dp: Add refresh rate divider to struct representing AS SDP") Cc: Mitul Golani Cc: Arun R Murthy Cc: Suraj Kandpal Cc: Ankit Nautiyal Cc: Jani Nikula Cc: Stephen Rothwell Signed-off-by: Mitul Golani Reviewed-by: Ankit Nautiyal Acked-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240715162514.2836421-1-mitulkumar.ajitkumar.golani@intel.com --- include/drm/display/drm_dp_helper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index bbb1cdc4fc68..279624833ea9 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -112,6 +112,7 @@ struct drm_dp_vsc_sdp { * @target_rr: Target Refresh * @duration_incr_ms: Successive frame duration increase * @duration_decr_ms: Successive frame duration decrease + * @target_rr_divider: Target refresh rate divider * @mode: Adaptive Sync Operation Mode */ struct drm_dp_as_sdp { -- cgit v1.2.3 From d579b04a52a183db47dfcb7a44304d7747d551e1 Mon Sep 17 00:00:00 2001 From: Yu-Ting Tseng Date: Tue, 9 Jul 2024 00:00:47 -0700 Subject: binder: frozen notification Frozen processes present a significant challenge in binder transactions. When a process is frozen, it cannot, by design, accept and/or respond to binder transactions. As a result, the sender needs to adjust its behavior, such as postponing transactions until the peer process unfreezes. However, there is currently no way to subscribe to these state change events, making it impossible to implement frozen-aware behaviors efficiently. Introduce a binder API for subscribing to frozen state change events. This allows programs to react to changes in peer process state, mitigating issues related to binder transactions sent to frozen processes. Implementation details: For a given binder_ref, the state of frozen notification can be one of the followings: 1. Userspace doesn't want a notification. binder_ref->freeze is null. 2. Userspace wants a notification but none is in flight. list_empty(&binder_ref->freeze->work.entry) = true 3. A notification is in flight and waiting to be read by userspace. binder_ref_freeze.sent is false. 4. A notification was read by userspace and kernel is waiting for an ack. binder_ref_freeze.sent is true. When a notification is in flight, new state change events are coalesced into the existing binder_ref_freeze struct. If userspace hasn't picked up the notification yet, the driver simply rewrites the state. Otherwise, the notification is flagged as requiring a resend, which will be performed once userspace acks the original notification that's inflight. See https://r.android.com/3070045 for how userspace is going to use this feature. Signed-off-by: Yu-Ting Tseng Acked-by: Carlos Llamas Link: https://lore.kernel.org/r/20240709070047.4055369-4-yutingtseng@google.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index d44a8118b2ed..1fd92021a573 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -236,6 +236,12 @@ struct binder_frozen_status_info { __u32 async_recv; }; +struct binder_frozen_state_info { + binder_uintptr_t cookie; + __u32 is_frozen; + __u32 reserved; +}; + /* struct binder_extened_error - extended error information * @id: identifier for the failed operation * @command: command as defined by binder_driver_return_protocol @@ -467,6 +473,17 @@ enum binder_driver_return_protocol { /* * The target of the last async transaction is frozen. No parameters. */ + + BR_FROZEN_BINDER = _IOR('r', 21, struct binder_frozen_state_info), + /* + * The cookie and a boolean (is_frozen) that indicates whether the process + * transitioned into a frozen or an unfrozen state. + */ + + BR_CLEAR_FREEZE_NOTIFICATION_DONE = _IOR('r', 22, binder_uintptr_t), + /* + * void *: cookie + */ }; enum binder_driver_command_protocol { @@ -550,6 +567,25 @@ enum binder_driver_command_protocol { /* * binder_transaction_data_sg: the sent command. */ + + BC_REQUEST_FREEZE_NOTIFICATION = + _IOW('c', 19, struct binder_handle_cookie), + /* + * int: handle + * void *: cookie + */ + + BC_CLEAR_FREEZE_NOTIFICATION = _IOW('c', 20, + struct binder_handle_cookie), + /* + * int: handle + * void *: cookie + */ + + BC_FREEZE_NOTIFICATION_DONE = _IOW('c', 21, binder_uintptr_t), + /* + * void *: cookie + */ }; #endif /* _UAPI_LINUX_BINDER_H */ -- cgit v1.2.3 From 990c304930138dcd7a49763417e6e5313b81293e Mon Sep 17 00:00:00 2001 From: Patrick Rohr Date: Mon, 29 Jul 2024 15:00:59 -0700 Subject: Add support for PIO p flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit draft-ietf-6man-pio-pflag is adding a new flag to the Prefix Information Option to signal that the network can allocate a unique IPv6 prefix per client via DHCPv6-PD (see draft-ietf-v6ops-dhcp-pd-per-device). When ra_honor_pio_pflag is enabled, the presence of a P-flag causes SLAAC autoconfiguration to be disabled for that particular PIO. An automated test has been added in Android (r.android.com/3195335) to go along with this change. Cc: Maciej Żenczykowski Cc: Lorenzo Colitti Cc: David Lamparter Cc: Simon Horman Signed-off-by: Patrick Rohr Reviewed-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/net/addrconf.h | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 383a0ea2ab91..a6e2aadbb91b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -89,6 +89,7 @@ struct ipv6_devconf { __u8 ioam6_enabled; __u8 ndisc_evict_nocarrier; __u8 ra_honor_pio_life; + __u8 ra_honor_pio_pflag; struct ctl_table_header *sysctl_header; }; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 62a407db1bf5..b18e81f0c9e1 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -37,10 +37,14 @@ struct prefix_info { struct __packed { #if defined(__BIG_ENDIAN_BITFIELD) __u8 onlink : 1, - autoconf : 1, - reserved : 6; + autoconf : 1, + routeraddr : 1, + preferpd : 1, + reserved : 4; #elif defined(__LITTLE_ENDIAN_BITFIELD) - __u8 reserved : 6, + __u8 reserved : 4, + preferpd : 1, + routeraddr : 1, autoconf : 1, onlink : 1; #else -- cgit v1.2.3 From e6ce8a28c768dbbad3f818db286cd0f4c7a921a8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 31 Jul 2024 15:05:22 +0200 Subject: ALSA: ump: Transmit RPN/NRPN message at each MSB/LSB data reception The UMP 1.1 spec says that an RPN/NRPN should be sent when one of the following occurs: * a CC 38 is received * a subsequent CC 6 is received * a CC 98, 99, 100, and 101 is received, indicating the last RPN/NRPN message has ended and a new one has started That said, we should send a partial data even if it's not fully filled. Let's change the UMP conversion helper code to follow that rule. Link: https://patch.msgid.link/20240731130528.12600-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/ump_convert.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/ump_convert.h b/include/sound/ump_convert.h index 28c364c63245..d099ae27f849 100644 --- a/include/sound/ump_convert.h +++ b/include/sound/ump_convert.h @@ -13,6 +13,7 @@ struct ump_cvt_to_ump_bank { unsigned char cc_nrpn_msb, cc_nrpn_lsb; unsigned char cc_data_msb, cc_data_lsb; unsigned char cc_bank_msb, cc_bank_lsb; + bool cc_data_msb_set, cc_data_lsb_set; }; /* context for converting from MIDI1 byte stream to UMP packet */ -- cgit v1.2.3 From 01ce1bf22adc0d09d906319787091ce784cb9914 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Tue, 23 Jul 2024 11:33:10 -0500 Subject: dt-bindings: clock: exynos850: Add TMU clock Add a constant for TMU PCLK clock. It acts simultaneously as an interface clock (to access TMU registers) and an operating clock which makes TMU IP-core functional. Signed-off-by: Sam Protsenko Link: https://lore.kernel.org/r/20240723163311.28654-1-semen.protsenko@linaro.org Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/exynos850.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/exynos850.h b/include/dt-bindings/clock/exynos850.h index 7666241520f8..80dacda57229 100644 --- a/include/dt-bindings/clock/exynos850.h +++ b/include/dt-bindings/clock/exynos850.h @@ -358,6 +358,7 @@ #define CLK_GOUT_UART_PCLK 32 #define CLK_GOUT_WDT0_PCLK 33 #define CLK_GOUT_WDT1_PCLK 34 +#define CLK_GOUT_BUSIF_TMU_PCLK 35 /* CMU_CORE */ #define CLK_MOUT_CORE_BUS_USER 1 -- cgit v1.2.3 From dbaeef363ea54f4c18112874b77503c72ba60fec Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:54:51 +0300 Subject: drm/dp_mst: Add a helper to queue a topology probe A follow up i915 patch will need to reprobe the MST topology after the initial probing, add a helper for this. Cc: Lyude Paul Cc: dri-devel@lists.freedesktop.org Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-3-imre.deak@intel.com --- include/drm/display/drm_dp_mst_helper.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index cfe096389d94..02b037d3a93f 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -885,6 +885,8 @@ int drm_dp_check_act_status(struct drm_dp_mst_topology_mgr *mgr); void drm_dp_mst_dump_topology(struct seq_file *m, struct drm_dp_mst_topology_mgr *mgr); +void drm_dp_mst_topology_queue_probe(struct drm_dp_mst_topology_mgr *mgr); + void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr); int __must_check drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, -- cgit v1.2.3 From 9aed3b51fd6186582a95abd9fa67782982540749 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 2 Jul 2024 18:49:35 -0700 Subject: rcu: Better define "atomic" for list replacement The kernel-doc headers for list_replace_rcu() and hlist_replace_rcu() claim that the replacement is atomic, which it is, but only for readers. Avoid confusion by making it clear that the atomic nature of these functions applies only to readers, not to concurrent updaters. Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay --- include/linux/rculist.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 3dc1e58865f7..14dfa6008467 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -191,7 +191,10 @@ static inline void hlist_del_init_rcu(struct hlist_node *n) * @old : the element to be replaced * @new : the new element to insert * - * The @old entry will be replaced with the @new entry atomically. + * The @old entry will be replaced with the @new entry atomically from + * the perspective of concurrent readers. It is the caller's responsibility + * to synchronize with concurrent updaters, if any. + * * Note: @old should not be empty. */ static inline void list_replace_rcu(struct list_head *old, @@ -519,7 +522,9 @@ static inline void hlist_del_rcu(struct hlist_node *n) * @old : the element to be replaced * @new : the new element to insert * - * The @old entry will be replaced with the @new entry atomically. + * The @old entry will be replaced with the @new entry atomically from + * the perspective of concurrent readers. It is the caller's responsibility + * to synchronize with concurrent updaters, if any. */ static inline void hlist_replace_rcu(struct hlist_node *old, struct hlist_node *new) -- cgit v1.2.3 From 21b136cc63d2a9ddd60d4699552b69c214b32964 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 30 Jul 2024 15:44:16 -0700 Subject: minmax: fix up min3() and max3() too David Laight pointed out that we should deal with the min3() and max3() mess too, which still does excessive expansion. And our current macros are actually rather broken. In particular, the macros did this: #define min3(x, y, z) min((typeof(x))min(x, y), z) #define max3(x, y, z) max((typeof(x))max(x, y), z) and that not only is a nested expansion of possibly very complex arguments with all that involves, the typing with that "typeof()" cast is completely wrong. For example, imagine what happens in max3() if 'x' happens to be a 'unsigned char', but 'y' and 'z' are 'unsigned long'. The types are compatible, and there's no warning - but the result is just random garbage. No, I don't think we've ever hit that issue in practice, but since we now have sane infrastructure for doing this right, let's just use it. It fixes any excessive expansion, and also avoids these kinds of broken type issues. Requested-by: David Laight Acked-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- include/linux/minmax.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 41da6f85a407..98008dd92153 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -152,13 +152,20 @@ #define umax(x, y) \ __careful_cmp(max, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) +#define __careful_op3(op, x, y, z, ux, uy, uz) ({ \ + __auto_type ux = (x); __auto_type uy = (y);__auto_type uz = (z);\ + BUILD_BUG_ON_MSG(!__types_ok3(x,y,z,ux,uy,uz), \ + #op"3("#x", "#y", "#z") signedness error"); \ + __cmp(op, ux, __cmp(op, uy, uz)); }) + /** * min3 - return minimum of three values * @x: first value * @y: second value * @z: third value */ -#define min3(x, y, z) min((typeof(x))min(x, y), z) +#define min3(x, y, z) \ + __careful_op3(min, x, y, z, __UNIQUE_ID(x_), __UNIQUE_ID(y_), __UNIQUE_ID(z_)) /** * max3 - return maximum of three values @@ -166,7 +173,8 @@ * @y: second value * @z: third value */ -#define max3(x, y, z) max((typeof(x))max(x, y), z) +#define max3(x, y, z) \ + __careful_op3(max, x, y, z, __UNIQUE_ID(x_), __UNIQUE_ID(y_), __UNIQUE_ID(z_)) /** * min_not_zero - return the minimum that is _not_ zero, unless both are zero -- cgit v1.2.3 From ab03125268679e058e1e7b6612f6d12610761769 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 15 Jul 2024 11:00:34 -0400 Subject: cgroup: Show # of subsystem CSSes in cgroup.stat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cgroup subsystem state (CSS) is an abstraction in the cgroup layer to help manage different structures in various cgroup subsystems by being an embedded element inside a larger structure like cpuset or mem_cgroup. The /proc/cgroups file shows the number of cgroups for each of the subsystems. With cgroup v1, the number of CSSes is the same as the number of cgroups. That is not the case anymore with cgroup v2. The /proc/cgroups file cannot show the actual number of CSSes for the subsystems that are bound to cgroup v2. So if a v2 cgroup subsystem is leaking cgroups (usually memory cgroup), we can't tell by looking at /proc/cgroups which cgroup subsystems may be responsible. As cgroup v2 had deprecated the use of /proc/cgroups, the hierarchical cgroup.stat file is now being extended to show the number of live and dying CSSes associated with all the non-inhibited cgroup subsystems that have been bound to cgroup v2. The number includes CSSes in the current cgroup as well as in all the descendants underneath it. This will help us pinpoint which subsystems are responsible for the increasing number of dying (nr_dying_descendants) cgroups. The CSSes dying counts are stored in the cgroup structure itself instead of inside the CSS as suggested by Johannes. This will allow us to accurately track dying counts of cgroup subsystems that have recently been disabled in a cgroup. It is now possible that a zero subsystem number is coupled with a non-zero dying subsystem number. The cgroup-v2.rst file is updated to discuss this new behavior. With this patch applied, a sample output from root cgroup.stat file was shown below. nr_descendants 56 nr_subsys_cpuset 1 nr_subsys_cpu 43 nr_subsys_io 43 nr_subsys_memory 56 nr_subsys_perf_event 57 nr_subsys_hugetlb 1 nr_subsys_pids 56 nr_subsys_rdma 1 nr_subsys_misc 1 nr_dying_descendants 30 nr_dying_subsys_cpuset 0 nr_dying_subsys_cpu 0 nr_dying_subsys_io 0 nr_dying_subsys_memory 30 nr_dying_subsys_perf_event 0 nr_dying_subsys_hugetlb 0 nr_dying_subsys_pids 0 nr_dying_subsys_rdma 0 nr_dying_subsys_misc 0 Another sample output from system.slice/cgroup.stat was: nr_descendants 34 nr_subsys_cpuset 0 nr_subsys_cpu 32 nr_subsys_io 32 nr_subsys_memory 34 nr_subsys_perf_event 35 nr_subsys_hugetlb 0 nr_subsys_pids 34 nr_subsys_rdma 0 nr_subsys_misc 0 nr_dying_descendants 30 nr_dying_subsys_cpuset 0 nr_dying_subsys_cpu 0 nr_dying_subsys_io 0 nr_dying_subsys_memory 30 nr_dying_subsys_perf_event 0 nr_dying_subsys_hugetlb 0 nr_dying_subsys_pids 0 nr_dying_subsys_rdma 0 nr_dying_subsys_misc 0 Note that 'debug' controller wasn't used to provide this information because the controller is not recommended in productions kernels, also many of them won't enable CONFIG_CGROUP_DEBUG by default. Similar information could be retrieved with debuggers like drgn but that's also not always available (e.g. lockdown) and the additional cost of runtime tracking here is deemed marginal. tj: Added Michal's paragraphs on why this is not added the debug controller to the commit message. Signed-off-by: Waiman Long Acked-by: Johannes Weiner Acked-by: Roman Gushchin Reviewed-by: Kamalesh Babulal Cc: Michal Koutný Link: http://lkml.kernel.org/r/20240715150034.2583772-1-longman@redhat.com Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ae04035b6cbe..eb0f6f349496 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -210,6 +210,14 @@ struct cgroup_subsys_state { * fields of the containing structure. */ struct cgroup_subsys_state *parent; + + /* + * Keep track of total numbers of visible descendant CSSes. + * The total number of dying CSSes is tracked in + * css->cgroup->nr_dying_subsys[ssid]. + * Protected by cgroup_mutex. + */ + int nr_descendants; }; /* @@ -470,6 +478,12 @@ struct cgroup { /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; + /* + * Keep track of total number of dying CSSes at and below this cgroup. + * Protected by cgroup_mutex. + */ + int nr_dying_subsys[CGROUP_SUBSYS_COUNT]; + struct cgroup_root *root; /* -- cgit v1.2.3 From f81489a136ac2c18a7b9bb22cf37c0a6f6d58545 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 12 Jul 2024 13:57:13 -0700 Subject: hwmon: (max6697) Drop platform data support Platform data is not used anywhere in the upstram kernel. Drop support for it to simplify code maintenance. Reviewed-by: Tzung-Bi Shih Signed-off-by: Guenter Roeck --- include/linux/platform_data/max6697.h | 33 --------------------------------- 1 file changed, 33 deletions(-) delete mode 100644 include/linux/platform_data/max6697.h (limited to 'include') diff --git a/include/linux/platform_data/max6697.h b/include/linux/platform_data/max6697.h deleted file mode 100644 index 6fbb70005541..000000000000 --- a/include/linux/platform_data/max6697.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * max6697.h - * Copyright (c) 2012 Guenter Roeck - */ - -#ifndef MAX6697_H -#define MAX6697_H - -#include - -/* - * For all bit masks: - * bit 0: local temperature - * bit 1..7: remote temperatures - */ -struct max6697_platform_data { - bool smbus_timeout_disable; /* set to disable SMBus timeouts */ - bool extended_range_enable; /* set to enable extended temp range */ - bool beta_compensation; /* set to enable beta compensation */ - u8 alert_mask; /* set bit to 1 to disable alert */ - u8 over_temperature_mask; /* set bit to 1 to disable */ - u8 resistance_cancellation; /* set bit to 0 to disable - * bit mask for MAX6581, - * boolean for other chips - */ - u8 ideality_mask; /* set bit to 0 to disable */ - u8 ideality_value; /* transistor ideality as per - * MAX6581 datasheet - */ -}; - -#endif /* MAX6697_H */ -- cgit v1.2.3 From 298dec19bdeb6e33ac220502504d969272b50cf6 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 31 Jul 2024 00:14:36 -0500 Subject: scx: Allow calling sleepable kfuncs from BPF_PROG_TYPE_SYSCALL We currently only allow calling sleepable scx kfuncs (i.e. scx_bpf_create_dsq()) from BPF_PROG_TYPE_STRUCT_OPS progs. The idea here was that we'd never have to call scx_bpf_create_dsq() outside of a sched_ext struct_ops callback, but that might not actually be true. For example, a scheduler could do something like the following: 1. Open and load (not yet attach) a scheduler skel 2. Synchronously call into a BPF_PROG_TYPE_SYSCALL prog from user space. For example, to initialize an LLC domain, or some other global, read-only state. 3. Attach the skel, which actually enables the scheduler The advantage of doing this is that it can preclude having to do pretty ugly boilerplate like initializing a read-only, statically sized array of u64[]'s which the kernel consumes literally once at init time to then create struct bpf_cpumask objects which are actually queried at runtime. Doing the above is already possible given that we can invoke core BPF kfuncs, such as bpf_cpumask_create(), from BPF_PROG_TYPE_SYSCALL progs. We already allow many scx kfuncs to be called from BPF_PROG_TYPE_SYSCALL progs (e.g. scx_bpf_kick_cpu()). Let's allow the sleepable kfuncs as well. Signed-off-by: David Vernet Signed-off-by: Tejun Heo --- include/linux/sched/ext.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 593d2f4909dd..26e1c33bc844 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -106,16 +106,14 @@ enum scx_ent_dsq_flags { * mechanism. See scx_kf_allow(). */ enum scx_kf_mask { - SCX_KF_UNLOCKED = 0, /* not sleepable, not rq locked */ - /* all non-sleepables may be nested inside SLEEPABLE */ - SCX_KF_SLEEPABLE = 1 << 0, /* sleepable init operations */ + SCX_KF_UNLOCKED = 0, /* sleepable and not rq locked */ /* ENQUEUE and DISPATCH may be nested inside CPU_RELEASE */ - SCX_KF_CPU_RELEASE = 1 << 1, /* ops.cpu_release() */ + SCX_KF_CPU_RELEASE = 1 << 0, /* ops.cpu_release() */ /* ops.dequeue (in REST) may be nested inside DISPATCH */ - SCX_KF_DISPATCH = 1 << 2, /* ops.dispatch() */ - SCX_KF_ENQUEUE = 1 << 3, /* ops.enqueue() and ops.select_cpu() */ - SCX_KF_SELECT_CPU = 1 << 4, /* ops.select_cpu() */ - SCX_KF_REST = 1 << 5, /* other rq-locked operations */ + SCX_KF_DISPATCH = 1 << 1, /* ops.dispatch() */ + SCX_KF_ENQUEUE = 1 << 2, /* ops.enqueue() and ops.select_cpu() */ + SCX_KF_SELECT_CPU = 1 << 3, /* ops.select_cpu() */ + SCX_KF_REST = 1 << 4, /* other rq-locked operations */ __SCX_KF_RQ_LOCKED = SCX_KF_CPU_RELEASE | SCX_KF_DISPATCH | SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, -- cgit v1.2.3 From be72a57527fde6c80061c5f9d0e28762eb817b03 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Wed, 24 Jul 2024 10:06:58 +0800 Subject: lsm: Refactor return value of LSM hook vm_enough_memory To be consistent with most LSM hooks, convert the return value of hook vm_enough_memory to 0 or a negative error code. Before: - Hook vm_enough_memory returns 1 if permission is granted, 0 if not. - LSM_RET_DEFAULT(vm_enough_memory_mm) is 1. After: - Hook vm_enough_memory reutrns 0 if permission is granted, negative error code if not. - LSM_RET_DEFAULT(vm_enough_memory_mm) is 0. Signed-off-by: Xu Kuohai Reviewed-by: Casey Schaufler Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 12c81be78eb9..63e2656d1d56 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -48,7 +48,7 @@ LSM_HOOK(int, 0, quota_on, struct dentry *dentry) LSM_HOOK(int, 0, syslog, int type) LSM_HOOK(int, 0, settime, const struct timespec64 *ts, const struct timezone *tz) -LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages) +LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) diff --git a/include/linux/security.h b/include/linux/security.h index 1390f1efb4f0..62233fec8ead 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -634,7 +634,7 @@ static inline int security_settime64(const struct timespec64 *ts, static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { - return __vm_enough_memory(mm, pages, cap_vm_enough_memory(mm, pages)); + return __vm_enough_memory(mm, pages, !cap_vm_enough_memory(mm, pages)); } static inline int security_bprm_creds_for_exec(struct linux_binprm *bprm) -- cgit v1.2.3 From 39b5ffc9554366122dbe5a4088a7c8798068ee9f Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 30 Jul 2024 20:24:38 -0700 Subject: dt-bindings: clock: qcom: Add missing USB MP resets The USB multiport controller needs a few missing resets, describe them in the binding. Acked-by: Krzysztof Kozlowski Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240730-sc8180x-usb-mp-v2-1-a7dc4265b553@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,gcc-sc8180x.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-sc8180x.h b/include/dt-bindings/clock/qcom,gcc-sc8180x.h index 90c6e021a035..487b12c19db5 100644 --- a/include/dt-bindings/clock/qcom,gcc-sc8180x.h +++ b/include/dt-bindings/clock/qcom,gcc-sc8180x.h @@ -294,6 +294,10 @@ #define GCC_VIDEO_AXI0_CLK_BCR 42 #define GCC_VIDEO_AXI1_CLK_BCR 43 #define GCC_USB3_DP_PHY_SEC_BCR 44 +#define GCC_USB3_UNIPHY_MP0_BCR 45 +#define GCC_USB3_UNIPHY_MP1_BCR 46 +#define GCC_USB3UNIPHY_PHY_MP0_BCR 47 +#define GCC_USB3UNIPHY_PHY_MP1_BCR 48 /* GCC GDSCRs */ #define EMAC_GDSC 0 -- cgit v1.2.3 From 44933cd06e34e9838fa0d6c8ed16a3632d49849b Mon Sep 17 00:00:00 2001 From: Satya Priya Kakitapalli Date: Wed, 31 Jul 2024 11:59:14 +0530 Subject: dt-bindings: clock: qcom: Add SM8150 camera clock controller Add device tree bindings for the camera clock controller on Qualcomm SM8150 platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Satya Priya Kakitapalli Link: https://lore.kernel.org/r/20240731062916.2680823-7-quic_skakitap@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sm8150-camcc.h | 135 ++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sm8150-camcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sm8150-camcc.h b/include/dt-bindings/clock/qcom,sm8150-camcc.h new file mode 100644 index 000000000000..5444035efa93 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm8150-camcc.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_CAM_CC_SM8150_H +#define _DT_BINDINGS_CLK_QCOM_CAM_CC_SM8150_H + +/* CAM_CC clocks */ +#define CAM_CC_PLL0 0 +#define CAM_CC_PLL0_OUT_EVEN 1 +#define CAM_CC_PLL0_OUT_ODD 2 +#define CAM_CC_PLL1 3 +#define CAM_CC_PLL1_OUT_EVEN 4 +#define CAM_CC_PLL2 5 +#define CAM_CC_PLL2_OUT_MAIN 6 +#define CAM_CC_PLL3 7 +#define CAM_CC_PLL3_OUT_EVEN 8 +#define CAM_CC_PLL4 9 +#define CAM_CC_PLL4_OUT_EVEN 10 +#define CAM_CC_BPS_AHB_CLK 11 +#define CAM_CC_BPS_AREG_CLK 12 +#define CAM_CC_BPS_AXI_CLK 13 +#define CAM_CC_BPS_CLK 14 +#define CAM_CC_BPS_CLK_SRC 15 +#define CAM_CC_CAMNOC_AXI_CLK 16 +#define CAM_CC_CAMNOC_AXI_CLK_SRC 17 +#define CAM_CC_CAMNOC_DCD_XO_CLK 18 +#define CAM_CC_CCI_0_CLK 19 +#define CAM_CC_CCI_0_CLK_SRC 20 +#define CAM_CC_CCI_1_CLK 21 +#define CAM_CC_CCI_1_CLK_SRC 22 +#define CAM_CC_CORE_AHB_CLK 23 +#define CAM_CC_CPAS_AHB_CLK 24 +#define CAM_CC_CPHY_RX_CLK_SRC 25 +#define CAM_CC_CSI0PHYTIMER_CLK 26 +#define CAM_CC_CSI0PHYTIMER_CLK_SRC 27 +#define CAM_CC_CSI1PHYTIMER_CLK 28 +#define CAM_CC_CSI1PHYTIMER_CLK_SRC 29 +#define CAM_CC_CSI2PHYTIMER_CLK 30 +#define CAM_CC_CSI2PHYTIMER_CLK_SRC 31 +#define CAM_CC_CSI3PHYTIMER_CLK 32 +#define CAM_CC_CSI3PHYTIMER_CLK_SRC 33 +#define CAM_CC_CSIPHY0_CLK 34 +#define CAM_CC_CSIPHY1_CLK 35 +#define CAM_CC_CSIPHY2_CLK 36 +#define CAM_CC_CSIPHY3_CLK 37 +#define CAM_CC_FAST_AHB_CLK_SRC 38 +#define CAM_CC_FD_CORE_CLK 39 +#define CAM_CC_FD_CORE_CLK_SRC 40 +#define CAM_CC_FD_CORE_UAR_CLK 41 +#define CAM_CC_GDSC_CLK 42 +#define CAM_CC_ICP_AHB_CLK 43 +#define CAM_CC_ICP_CLK 44 +#define CAM_CC_ICP_CLK_SRC 45 +#define CAM_CC_IFE_0_AXI_CLK 46 +#define CAM_CC_IFE_0_CLK 47 +#define CAM_CC_IFE_0_CLK_SRC 48 +#define CAM_CC_IFE_0_CPHY_RX_CLK 49 +#define CAM_CC_IFE_0_CSID_CLK 50 +#define CAM_CC_IFE_0_CSID_CLK_SRC 51 +#define CAM_CC_IFE_0_DSP_CLK 52 +#define CAM_CC_IFE_1_AXI_CLK 53 +#define CAM_CC_IFE_1_CLK 54 +#define CAM_CC_IFE_1_CLK_SRC 55 +#define CAM_CC_IFE_1_CPHY_RX_CLK 56 +#define CAM_CC_IFE_1_CSID_CLK 57 +#define CAM_CC_IFE_1_CSID_CLK_SRC 58 +#define CAM_CC_IFE_1_DSP_CLK 59 +#define CAM_CC_IFE_LITE_0_CLK 60 +#define CAM_CC_IFE_LITE_0_CLK_SRC 61 +#define CAM_CC_IFE_LITE_0_CPHY_RX_CLK 62 +#define CAM_CC_IFE_LITE_0_CSID_CLK 63 +#define CAM_CC_IFE_LITE_0_CSID_CLK_SRC 64 +#define CAM_CC_IFE_LITE_1_CLK 65 +#define CAM_CC_IFE_LITE_1_CLK_SRC 66 +#define CAM_CC_IFE_LITE_1_CPHY_RX_CLK 67 +#define CAM_CC_IFE_LITE_1_CSID_CLK 68 +#define CAM_CC_IFE_LITE_1_CSID_CLK_SRC 69 +#define CAM_CC_IPE_0_AHB_CLK 70 +#define CAM_CC_IPE_0_AREG_CLK 71 +#define CAM_CC_IPE_0_AXI_CLK 72 +#define CAM_CC_IPE_0_CLK 73 +#define CAM_CC_IPE_0_CLK_SRC 74 +#define CAM_CC_IPE_1_AHB_CLK 75 +#define CAM_CC_IPE_1_AREG_CLK 76 +#define CAM_CC_IPE_1_AXI_CLK 77 +#define CAM_CC_IPE_1_CLK 78 +#define CAM_CC_JPEG_CLK 79 +#define CAM_CC_JPEG_CLK_SRC 80 +#define CAM_CC_LRME_CLK 81 +#define CAM_CC_LRME_CLK_SRC 82 +#define CAM_CC_MCLK0_CLK 83 +#define CAM_CC_MCLK0_CLK_SRC 84 +#define CAM_CC_MCLK1_CLK 85 +#define CAM_CC_MCLK1_CLK_SRC 86 +#define CAM_CC_MCLK2_CLK 87 +#define CAM_CC_MCLK2_CLK_SRC 88 +#define CAM_CC_MCLK3_CLK 89 +#define CAM_CC_MCLK3_CLK_SRC 90 +#define CAM_CC_SLOW_AHB_CLK_SRC 91 + +/* CAM_CC power domains */ +#define TITAN_TOP_GDSC 0 +#define BPS_GDSC 1 +#define IFE_0_GDSC 2 +#define IFE_1_GDSC 3 +#define IPE_0_GDSC 4 +#define IPE_1_GDSC 5 + +/* CAM_CC resets */ +#define CAM_CC_BPS_BCR 0 +#define CAM_CC_CAMNOC_BCR 1 +#define CAM_CC_CCI_BCR 2 +#define CAM_CC_CPAS_BCR 3 +#define CAM_CC_CSI0PHY_BCR 4 +#define CAM_CC_CSI1PHY_BCR 5 +#define CAM_CC_CSI2PHY_BCR 6 +#define CAM_CC_CSI3PHY_BCR 7 +#define CAM_CC_FD_BCR 8 +#define CAM_CC_ICP_BCR 9 +#define CAM_CC_IFE_0_BCR 10 +#define CAM_CC_IFE_1_BCR 11 +#define CAM_CC_IFE_LITE_0_BCR 12 +#define CAM_CC_IFE_LITE_1_BCR 13 +#define CAM_CC_IPE_0_BCR 14 +#define CAM_CC_IPE_1_BCR 15 +#define CAM_CC_JPEG_BCR 16 +#define CAM_CC_LRME_BCR 17 +#define CAM_CC_MCLK0_BCR 18 +#define CAM_CC_MCLK1_BCR 19 +#define CAM_CC_MCLK2_BCR 20 +#define CAM_CC_MCLK3_BCR 21 + +#endif -- cgit v1.2.3 From 99447ef003d199329c0a1890f54958fcd10f7063 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 17 Jul 2024 13:04:34 +0300 Subject: dt-bindings: clock: qcom,sm8650-dispcc: replace with symlink The display clock controller indices for SM8650 and SM8550 are completely equal. Replace the header file for qcom,sm8650-dispcc with the symlink to the qcom,sm8550-dispcc header file. Signed-off-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240717-dispcc-sm8550-fixes-v2-7-5c4a3128c40b@linaro.org Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sm8650-dispcc.h | 103 +------------------------ 1 file changed, 1 insertion(+), 102 deletions(-) mode change 100644 => 120000 include/dt-bindings/clock/qcom,sm8650-dispcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sm8650-dispcc.h b/include/dt-bindings/clock/qcom,sm8650-dispcc.h deleted file mode 100644 index b0a668b395a5..000000000000 --- a/include/dt-bindings/clock/qcom,sm8650-dispcc.h +++ /dev/null @@ -1,102 +0,0 @@ -/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ -/* - * Copyright (c) 2022, Qualcomm Innovation Center, Inc. All rights reserved - * Copyright (c) 2023, Linaro Ltd. - */ - -#ifndef _DT_BINDINGS_CLK_QCOM_SM8650_DISP_CC_H -#define _DT_BINDINGS_CLK_QCOM_SM8650_DISP_CC_H - -/* DISP_CC clocks */ -#define DISP_CC_MDSS_ACCU_CLK 0 -#define DISP_CC_MDSS_AHB1_CLK 1 -#define DISP_CC_MDSS_AHB_CLK 2 -#define DISP_CC_MDSS_AHB_CLK_SRC 3 -#define DISP_CC_MDSS_BYTE0_CLK 4 -#define DISP_CC_MDSS_BYTE0_CLK_SRC 5 -#define DISP_CC_MDSS_BYTE0_DIV_CLK_SRC 6 -#define DISP_CC_MDSS_BYTE0_INTF_CLK 7 -#define DISP_CC_MDSS_BYTE1_CLK 8 -#define DISP_CC_MDSS_BYTE1_CLK_SRC 9 -#define DISP_CC_MDSS_BYTE1_DIV_CLK_SRC 10 -#define DISP_CC_MDSS_BYTE1_INTF_CLK 11 -#define DISP_CC_MDSS_DPTX0_AUX_CLK 12 -#define DISP_CC_MDSS_DPTX0_AUX_CLK_SRC 13 -#define DISP_CC_MDSS_DPTX0_CRYPTO_CLK 14 -#define DISP_CC_MDSS_DPTX0_LINK_CLK 15 -#define DISP_CC_MDSS_DPTX0_LINK_CLK_SRC 16 -#define DISP_CC_MDSS_DPTX0_LINK_DIV_CLK_SRC 17 -#define DISP_CC_MDSS_DPTX0_LINK_INTF_CLK 18 -#define DISP_CC_MDSS_DPTX0_PIXEL0_CLK 19 -#define DISP_CC_MDSS_DPTX0_PIXEL0_CLK_SRC 20 -#define DISP_CC_MDSS_DPTX0_PIXEL1_CLK 21 -#define DISP_CC_MDSS_DPTX0_PIXEL1_CLK_SRC 22 -#define DISP_CC_MDSS_DPTX0_USB_ROUTER_LINK_INTF_CLK 23 -#define DISP_CC_MDSS_DPTX1_AUX_CLK 24 -#define DISP_CC_MDSS_DPTX1_AUX_CLK_SRC 25 -#define DISP_CC_MDSS_DPTX1_CRYPTO_CLK 26 -#define DISP_CC_MDSS_DPTX1_LINK_CLK 27 -#define DISP_CC_MDSS_DPTX1_LINK_CLK_SRC 28 -#define DISP_CC_MDSS_DPTX1_LINK_DIV_CLK_SRC 29 -#define DISP_CC_MDSS_DPTX1_LINK_INTF_CLK 30 -#define DISP_CC_MDSS_DPTX1_PIXEL0_CLK 31 -#define DISP_CC_MDSS_DPTX1_PIXEL0_CLK_SRC 32 -#define DISP_CC_MDSS_DPTX1_PIXEL1_CLK 33 -#define DISP_CC_MDSS_DPTX1_PIXEL1_CLK_SRC 34 -#define DISP_CC_MDSS_DPTX1_USB_ROUTER_LINK_INTF_CLK 35 -#define DISP_CC_MDSS_DPTX2_AUX_CLK 36 -#define DISP_CC_MDSS_DPTX2_AUX_CLK_SRC 37 -#define DISP_CC_MDSS_DPTX2_CRYPTO_CLK 38 -#define DISP_CC_MDSS_DPTX2_LINK_CLK 39 -#define DISP_CC_MDSS_DPTX2_LINK_CLK_SRC 40 -#define DISP_CC_MDSS_DPTX2_LINK_DIV_CLK_SRC 41 -#define DISP_CC_MDSS_DPTX2_LINK_INTF_CLK 42 -#define DISP_CC_MDSS_DPTX2_PIXEL0_CLK 43 -#define DISP_CC_MDSS_DPTX2_PIXEL0_CLK_SRC 44 -#define DISP_CC_MDSS_DPTX2_PIXEL1_CLK 45 -#define DISP_CC_MDSS_DPTX2_PIXEL1_CLK_SRC 46 -#define DISP_CC_MDSS_DPTX3_AUX_CLK 47 -#define DISP_CC_MDSS_DPTX3_AUX_CLK_SRC 48 -#define DISP_CC_MDSS_DPTX3_CRYPTO_CLK 49 -#define DISP_CC_MDSS_DPTX3_LINK_CLK 50 -#define DISP_CC_MDSS_DPTX3_LINK_CLK_SRC 51 -#define DISP_CC_MDSS_DPTX3_LINK_DIV_CLK_SRC 52 -#define DISP_CC_MDSS_DPTX3_LINK_INTF_CLK 53 -#define DISP_CC_MDSS_DPTX3_PIXEL0_CLK 54 -#define DISP_CC_MDSS_DPTX3_PIXEL0_CLK_SRC 55 -#define DISP_CC_MDSS_ESC0_CLK 56 -#define DISP_CC_MDSS_ESC0_CLK_SRC 57 -#define DISP_CC_MDSS_ESC1_CLK 58 -#define DISP_CC_MDSS_ESC1_CLK_SRC 59 -#define DISP_CC_MDSS_MDP1_CLK 60 -#define DISP_CC_MDSS_MDP_CLK 61 -#define DISP_CC_MDSS_MDP_CLK_SRC 62 -#define DISP_CC_MDSS_MDP_LUT1_CLK 63 -#define DISP_CC_MDSS_MDP_LUT_CLK 64 -#define DISP_CC_MDSS_NON_GDSC_AHB_CLK 65 -#define DISP_CC_MDSS_PCLK0_CLK 66 -#define DISP_CC_MDSS_PCLK0_CLK_SRC 67 -#define DISP_CC_MDSS_PCLK1_CLK 68 -#define DISP_CC_MDSS_PCLK1_CLK_SRC 69 -#define DISP_CC_MDSS_RSCC_AHB_CLK 70 -#define DISP_CC_MDSS_RSCC_VSYNC_CLK 71 -#define DISP_CC_MDSS_VSYNC1_CLK 72 -#define DISP_CC_MDSS_VSYNC_CLK 73 -#define DISP_CC_MDSS_VSYNC_CLK_SRC 74 -#define DISP_CC_PLL0 75 -#define DISP_CC_PLL1 76 -#define DISP_CC_SLEEP_CLK 77 -#define DISP_CC_SLEEP_CLK_SRC 78 -#define DISP_CC_XO_CLK 79 -#define DISP_CC_XO_CLK_SRC 80 - -/* DISP_CC resets */ -#define DISP_CC_MDSS_CORE_BCR 0 -#define DISP_CC_MDSS_CORE_INT2_BCR 1 -#define DISP_CC_MDSS_RSCC_BCR 2 - -/* DISP_CC GDSCR */ -#define MDSS_GDSC 0 -#define MDSS_INT2_GDSC 1 - -#endif diff --git a/include/dt-bindings/clock/qcom,sm8650-dispcc.h b/include/dt-bindings/clock/qcom,sm8650-dispcc.h new file mode 120000 index 000000000000..c0a291188f28 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm8650-dispcc.h @@ -0,0 +1 @@ +qcom,sm8550-dispcc.h \ No newline at end of file -- cgit v1.2.3 From c9c0ee5f20c593faf289fa8850c3ed84031dd12a Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 29 Jul 2024 03:47:40 -0700 Subject: net: skbuff: Skip early return in skb_unref when debugging This patch modifies the skb_unref function to skip the early return optimization when CONFIG_DEBUG_NET is enabled. The change ensures that the reference count decrement always occurs in debug builds, allowing for more thorough checking of SKB reference counting. Previously, when the SKB's reference count was 1 and CONFIG_DEBUG_NET was not set, the function would return early after a memory barrier (smp_rmb()) without decrementing the reference count. This optimization assumes it's safe to proceed with freeing the SKB without the overhead of an atomic decrement from 1 to 0. With this change: - In non-debug builds (CONFIG_DEBUG_NET not set), behavior remains unchanged, preserving the performance optimization. - In debug builds (CONFIG_DEBUG_NET set), the reference count is always decremented, even when it's 1, allowing for consistent behavior and potentially catching subtle SKB management bugs. This modification enhances debugging capabilities for networking code without impacting performance in production kernels. It helps kernel developers identify and diagnose issues related to SKB management and reference counting in the network stack. Cc: Chris Mason Suggested-by: Jakub Kicinski Signed-off-by: Breno Leitao Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240729104741.370327-1-leitao@debian.org Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 29c3ea5b6e93..cf8f6ce06742 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1225,7 +1225,7 @@ static inline bool skb_unref(struct sk_buff *skb) { if (unlikely(!skb)) return false; - if (likely(refcount_read(&skb->users) == 1)) + if (!IS_ENABLED(CONFIG_DEBUG_NET) && likely(refcount_read(&skb->users) == 1)) smp_rmb(); else if (likely(!refcount_dec_and_test(&skb->users))) return false; -- cgit v1.2.3 From e469e2045f1b78418f39a2461a9fdf73c2789a1a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 1 Aug 2024 08:48:06 +0200 Subject: ALSA: memalloc: Let IOMMU handle S/G primarily The recent changes in IOMMU made the non-contiguous page allocations as default, hence we can simply use the standard DMA allocation for the S/G pages as well. In this patch, we simplify the code by trying the standard DMA allocation at first, instead of dma_alloc_noncontiguous(). For the case without IOMMU, we still need to manage the S/G pages manually, so we keep the same fallback routines like before. The fallback types (SNDRV_DMA_TYPE_DEV_SG_FALLBACK & co) are dropped / folded into SNDRV_DMA_TYPE_DEV_SG and co now. The allocation via the standard DMA call overrides the type accordingly, hence we don't have to have extra fallback types any longer. OTOH, SNDRV_DMA_TYPE_DEV_SG is no longer an alias but became its own type back again. Note that this patch requires another prerequisite fix for memmalloc helper to use the DMA API for WC pages on x86. Link: https://bugzilla.kernel.org/show_bug.cgi?id=219087 Link: https://patch.msgid.link/20240801064808.31205-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/memalloc.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h index 43d524580bd2..9dd475cf4e8c 100644 --- a/include/sound/memalloc.h +++ b/include/sound/memalloc.h @@ -42,17 +42,12 @@ struct snd_dma_device { #define SNDRV_DMA_TYPE_NONCONTIG 8 /* non-coherent SG buffer */ #define SNDRV_DMA_TYPE_NONCOHERENT 9 /* non-coherent buffer */ #ifdef CONFIG_SND_DMA_SGBUF -#define SNDRV_DMA_TYPE_DEV_SG SNDRV_DMA_TYPE_NONCONTIG +#define SNDRV_DMA_TYPE_DEV_SG 3 /* S/G pages */ #define SNDRV_DMA_TYPE_DEV_WC_SG 6 /* SG write-combined */ #else #define SNDRV_DMA_TYPE_DEV_SG SNDRV_DMA_TYPE_DEV /* no SG-buf support */ #define SNDRV_DMA_TYPE_DEV_WC_SG SNDRV_DMA_TYPE_DEV_WC #endif -/* fallback types, don't use those directly */ -#ifdef CONFIG_SND_DMA_SGBUF -#define SNDRV_DMA_TYPE_DEV_SG_FALLBACK 10 -#define SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK 11 -#endif /* * info for buffer allocation -- cgit v1.2.3 From 469b77e421b92ce4662f6f1a47f1e7af9dbd14bd Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 30 Jul 2024 02:05:12 +0000 Subject: ALSA: trace: use snd_pcm_direction_name() We already have snd_pcm_direction_name(). Let's use it. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87sevrk52f.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Takashi Iwai --- include/trace/events/asoc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/asoc.h b/include/trace/events/asoc.h index 202fc3680c36..6696dbcc2b96 100644 --- a/include/trace/events/asoc.h +++ b/include/trace/events/asoc.h @@ -8,6 +8,7 @@ #include #include #include +#include #define DAPM_DIRECT "(direct)" #define DAPM_ARROW(dir) (((dir) == SND_SOC_DAPM_DIR_OUT) ? "->" : "<-") @@ -212,7 +213,7 @@ TRACE_EVENT(snd_soc_dapm_connected, ), TP_printk("%s: found %d paths", - __entry->stream ? "capture" : "playback", __entry->paths) + snd_pcm_direction_name(__entry->stream), __entry->paths) ); TRACE_EVENT(snd_soc_jack_irq, -- cgit v1.2.3 From d39388e6555cb805beb985a7ece1c771c611037c Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 14:44:22 +0530 Subject: ASoC: intel/sdw-utils: move soundwire machine driver soc ops Move Intel SoundWire generic machine driver soc ops to common place so that it can be used by other platform machine driver. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801091446.10457-8-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/sound/soc_sdw_utils.h (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h new file mode 100644 index 000000000000..cf4cdb66b2de --- /dev/null +++ b/include/sound/soc_sdw_utils.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * This file incorporates work covered by the following copyright notice: + * Copyright (c) 2020 Intel Corporation + * Copyright(c) 2024 Advanced Micro Devices, Inc. + * + */ + +#ifndef SOC_SDW_UTILS_H +#define SOC_SDW_UTILS_H + +#include + +int asoc_sdw_startup(struct snd_pcm_substream *substream); +int asoc_sdw_prepare(struct snd_pcm_substream *substream); +int asoc_sdw_prepare(struct snd_pcm_substream *substream); +int asoc_sdw_trigger(struct snd_pcm_substream *substream, int cmd); +int asoc_sdw_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params); +int asoc_sdw_hw_free(struct snd_pcm_substream *substream); +void asoc_sdw_shutdown(struct snd_pcm_substream *substream); + +#endif -- cgit v1.2.3 From 73619137c633a89a90f8c8c5fa59171aaff6e913 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 14:44:23 +0530 Subject: ASoC: intel: move soundwire machine driver common structures Move intel generic SoundWire machine driver common structures to soc_sdw_utils.h file. These structures will be used in other platform SoundWire machine driver code. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801091446.10457-9-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index cf4cdb66b2de..1ae5523bbcf8 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -11,6 +11,49 @@ #include +#define SOC_SDW_MAX_DAI_NUM 8 + +struct asoc_sdw_codec_info; + +struct asoc_sdw_dai_info { + const bool direction[2]; /* playback & capture support */ + const char *dai_name; + const int dai_type; + const int dailink[2]; /* dailink id for each direction */ + const struct snd_kcontrol_new *controls; + const int num_controls; + const struct snd_soc_dapm_widget *widgets; + const int num_widgets; + int (*init)(struct snd_soc_card *card, + struct snd_soc_dai_link *dai_links, + struct asoc_sdw_codec_info *info, + bool playback); + int (*exit)(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link); + int (*rtd_init)(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); + bool rtd_init_done; /* Indicate that the rtd_init callback is done */ + unsigned long quirk; +}; + +struct asoc_sdw_codec_info { + const int part_id; + const int version_id; + const char *codec_name; + int amp_num; + const u8 acpi_id[ACPI_ID_LEN]; + const bool ignore_internal_dmic; + const struct snd_soc_ops *ops; + struct asoc_sdw_dai_info dais[SOC_SDW_MAX_DAI_NUM]; + const int dai_num; + + int (*codec_card_late_probe)(struct snd_soc_card *card); + + int (*count_sidecar)(struct snd_soc_card *card, + int *num_dais, int *num_devs); + int (*add_sidecar)(struct snd_soc_card *card, + struct snd_soc_dai_link **dai_links, + struct snd_soc_codec_conf **codec_conf); +}; + int asoc_sdw_startup(struct snd_pcm_substream *substream); int asoc_sdw_prepare(struct snd_pcm_substream *substream); int asoc_sdw_prepare(struct snd_pcm_substream *substream); -- cgit v1.2.3 From 941d6933eb31b13d09a7293bc92d9d494513a372 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 14:44:24 +0530 Subject: ASoC: intel/sdw_utils: move soundwire machine driver helper functions Move below Intel SoundWire machine driver helper functions to soc_sdw_utils.c file so that it can be used by other platform machine driver. - asoc_sdw_is_unique_device() - asoc_sdw_get_codec_name() Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801091446.10457-10-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 1ae5523bbcf8..7ca3a6afdfb1 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -10,6 +10,7 @@ #define SOC_SDW_UTILS_H #include +#include #define SOC_SDW_MAX_DAI_NUM 8 @@ -63,4 +64,8 @@ int asoc_sdw_hw_params(struct snd_pcm_substream *substream, int asoc_sdw_hw_free(struct snd_pcm_substream *substream); void asoc_sdw_shutdown(struct snd_pcm_substream *substream); +const char *asoc_sdw_get_codec_name(struct device *dev, + const struct asoc_sdw_codec_info *codec_info, + const struct snd_soc_acpi_link_adr *adr_link, + int adr_index); #endif -- cgit v1.2.3 From 4776d0c9088634677749e42ae8b36b4da46226db Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 14:44:25 +0530 Subject: ASoC: intel/sdw_utils: move dmic codec helper function Move generic dmic codec helper function implementation to sdw_utils folder so that this function can be used by other platform machine drivers. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801091446.10457-11-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 7ca3a6afdfb1..0ffbd9847532 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -68,4 +68,8 @@ const char *asoc_sdw_get_codec_name(struct device *dev, const struct asoc_sdw_codec_info *codec_info, const struct snd_soc_acpi_link_adr *adr_link, int adr_index); + +/* DMIC support */ +int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd); + #endif -- cgit v1.2.3 From a9831fd1c0e6353366bbde6e1dd7b15a2dd6d825 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 14:44:26 +0530 Subject: ASoC: intel/sdw_utils: move rtk dmic helper functions Move rtk SoundWire dmic helper functions implementation to sdw_utils folder to make it generic. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801091446.10457-12-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 0ffbd9847532..9fa102fc03c3 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -72,4 +72,7 @@ const char *asoc_sdw_get_codec_name(struct device *dev, /* DMIC support */ int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd); +/* dai_link init callbacks */ +int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); + #endif -- cgit v1.2.3 From 09c60bc9da91f188e2aedb0bac94d3e129fa20f9 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 14:44:27 +0530 Subject: ASoC: intel/sdw_utils: move rt712 sdca helper functions Move RT712 SDCA codec helper file to sdw_utils folder so that these helper functions can be used by other platform machine drivers. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801091446.10457-13-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 9fa102fc03c3..6fd305253e2a 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -74,5 +74,6 @@ int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd); /* dai_link init callbacks */ int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); #endif -- cgit v1.2.3 From 89b3456e9afa4c61879f6d744f6d2c6fadc2b2fc Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 14:44:28 +0530 Subject: ASoC: intel/sdw_utils: move rt722 sdca helper functions Move RT722 SDCA codec helper file to sdw_utils folder to make it generic. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801091446.10457-14-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 6fd305253e2a..5bc2a89bced4 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -75,5 +75,6 @@ int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd); /* dai_link init callbacks */ int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_rt722_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); #endif -- cgit v1.2.3 From 4f54856b4ea460e9048c11e3f698c38fb072529d Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 14:44:29 +0530 Subject: ASoC: intel: split soundwire machine driver private data Split intel generic SoundWire machine driver private data into two structures. One structure is generic one which can be used by other platform machine driver and the other one is intel specific one. Move generic machine driver private data to soc_sdw_utils.h. Define a void pointer in generic machine driver private data structure and assign the vendor specific structure in mc_probe() call. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801091446.10457-15-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 5bc2a89bced4..eb713cdf4079 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -55,6 +55,16 @@ struct asoc_sdw_codec_info { struct snd_soc_codec_conf **codec_conf); }; +struct asoc_sdw_mc_private { + struct snd_soc_card card; + struct snd_soc_jack sdw_headset; + struct device *headset_codec_dev; /* only one headset per card */ + struct device *amp_dev1, *amp_dev2; + bool append_dai_type; + bool ignore_internal_dmic; + void *private; +}; + int asoc_sdw_startup(struct snd_pcm_substream *substream); int asoc_sdw_prepare(struct snd_pcm_substream *substream); int asoc_sdw_prepare(struct snd_pcm_substream *substream); -- cgit v1.2.3 From 139e17740200d8e92677942bfee6c8cfe4da3009 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 14:44:30 +0530 Subject: ASoC: intel/sdw_utils: move rt5682 codec helper function Move rt5682 sdw codec helper function to common place holder to make it generic. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801091446.10457-16-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index eb713cdf4079..ed97d78336da 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -86,5 +86,6 @@ int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd); int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt722_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_rt5682_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); #endif -- cgit v1.2.3 From da5b1831673208e235cadc9107207adb092c2eb9 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 14:44:31 +0530 Subject: ASoC: intel/sdw_utils: move rtk jack common helper functions Move RTK codec jack common helper functions to common place holder (sdw_utils folder) to make it generic so that it will be used by other platform machine driver code. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801091446.10457-17-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index ed97d78336da..6b6bab8d3310 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -13,6 +13,8 @@ #include #define SOC_SDW_MAX_DAI_NUM 8 +#define SOC_SDW_MAX_NO_PROPS 2 +#define SOC_SDW_JACK_JDSRC(quirk) ((quirk) & GENMASK(3, 0)) struct asoc_sdw_codec_info; @@ -63,6 +65,7 @@ struct asoc_sdw_mc_private { bool append_dai_type; bool ignore_internal_dmic; void *private; + unsigned long mc_quirk; }; int asoc_sdw_startup(struct snd_pcm_substream *substream); @@ -82,8 +85,16 @@ const char *asoc_sdw_get_codec_name(struct device *dev, /* DMIC support */ int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd); +/* RT711-SDCA support */ +int asoc_sdw_rt_sdca_jack_init(struct snd_soc_card *card, + struct snd_soc_dai_link *dai_links, + struct asoc_sdw_codec_info *info, + bool playback); +int asoc_sdw_rt_sdca_jack_exit(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link); + /* dai_link init callbacks */ int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_rt_sdca_jack_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt722_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt5682_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); -- cgit v1.2.3 From 8e84fd22dc425fd0b005a20156eeb67f019ae39f Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 14:44:32 +0530 Subject: ASoC: intel/sdw_utils: move rt700 and rt711 codec helper functions Move RT700 and RT711 Soundwire codec helper functions to common place holder so that it can be used by other platform machine driver. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801091446.10457-18-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 6b6bab8d3310..3e55cac33176 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -85,6 +85,13 @@ const char *asoc_sdw_get_codec_name(struct device *dev, /* DMIC support */ int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd); +/* RT711 support */ +int asoc_sdw_rt711_init(struct snd_soc_card *card, + struct snd_soc_dai_link *dai_links, + struct asoc_sdw_codec_info *info, + bool playback); +int asoc_sdw_rt711_exit(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link); + /* RT711-SDCA support */ int asoc_sdw_rt_sdca_jack_init(struct snd_soc_card *card, struct snd_soc_dai_link *dai_links, @@ -95,6 +102,8 @@ int asoc_sdw_rt_sdca_jack_exit(struct snd_soc_card *card, struct snd_soc_dai_lin /* dai_link init callbacks */ int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt_sdca_jack_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_rt700_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_rt711_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt722_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt5682_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); -- cgit v1.2.3 From ccc96ae2814a7faad591af68bd0115e4d2b256b4 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 14:44:33 +0530 Subject: ASoC: intel/sdw_utils: move rtk amp codec helper functions Move RTK amp codec helper functions related implementation to common place holder to make it generic so that these helper functions will be used by other platform machine driver modules. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801091446.10457-19-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 3e55cac33176..450542eb3ea0 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -99,9 +99,20 @@ int asoc_sdw_rt_sdca_jack_init(struct snd_soc_card *card, bool playback); int asoc_sdw_rt_sdca_jack_exit(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link); +/* RT1308 I2S support */ +extern const struct snd_soc_ops soc_sdw_rt1308_i2s_ops; + +/* generic amp support */ +int asoc_sdw_rt_amp_init(struct snd_soc_card *card, + struct snd_soc_dai_link *dai_links, + struct asoc_sdw_codec_info *info, + bool playback); +int asoc_sdw_rt_amp_exit(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link); + /* dai_link init callbacks */ int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt_sdca_jack_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_rt_amp_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt700_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt711_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); -- cgit v1.2.3 From 5fa46627d5118bf58b80df45489f49e1e316473a Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 14:44:34 +0530 Subject: ASoC: intel/sdw_utils: move cirrus soundwire codec helper functions To make it generic, move Cirrus Soundwire codec helper functions to common place holder so that it can be used by other platform machine driver. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801091446.10457-20-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 450542eb3ea0..d5dd887b9d15 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -16,6 +16,19 @@ #define SOC_SDW_MAX_NO_PROPS 2 #define SOC_SDW_JACK_JDSRC(quirk) ((quirk) & GENMASK(3, 0)) +/* If a CODEC has an optional speaker output, this quirk will enable it */ +#define SOC_SDW_CODEC_SPKR BIT(15) +/* + * If the CODEC has additional devices attached directly to it. + * + * For the cs42l43: + * - 0 - No speaker output + * - SOC_SDW_CODEC_SPKR - CODEC internal speaker + * - SOC_SDW_SIDECAR_AMPS - 2x Sidecar amplifiers + CODEC internal speaker + * - SOC_SDW_CODEC_SPKR | SOF_SIDECAR_AMPS - Not currently supported + */ +#define SOC_SDW_SIDECAR_AMPS BIT(16) + struct asoc_sdw_codec_info; struct asoc_sdw_dai_info { @@ -109,6 +122,28 @@ int asoc_sdw_rt_amp_init(struct snd_soc_card *card, bool playback); int asoc_sdw_rt_amp_exit(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link); +/* CS42L43 support */ +int asoc_sdw_cs42l43_spk_init(struct snd_soc_card *card, + struct snd_soc_dai_link *dai_links, + struct asoc_sdw_codec_info *info, + bool playback); + +/* CS AMP support */ +int asoc_sdw_bridge_cs35l56_count_sidecar(struct snd_soc_card *card, + int *num_dais, int *num_devs); +int asoc_sdw_bridge_cs35l56_add_sidecar(struct snd_soc_card *card, + struct snd_soc_dai_link **dai_links, + struct snd_soc_codec_conf **codec_conf); +int asoc_sdw_bridge_cs35l56_spk_init(struct snd_soc_card *card, + struct snd_soc_dai_link *dai_links, + struct asoc_sdw_codec_info *info, + bool playback); + +int asoc_sdw_cs_amp_init(struct snd_soc_card *card, + struct snd_soc_dai_link *dai_links, + struct asoc_sdw_codec_info *info, + bool playback); + /* dai_link init callbacks */ int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt_sdca_jack_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); @@ -118,5 +153,10 @@ int asoc_sdw_rt711_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt722_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt5682_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_cs42l42_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_cs42l43_hs_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_cs42l43_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_cs42l43_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); #endif -- cgit v1.2.3 From 051b7cb3fde16fd8788078d697d84069b0e50e03 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 14:44:35 +0530 Subject: ASoC: intel/sdw_utils: move maxim codec helper functions Move maxim codec helper functions to common place holder so that it can be used by other platform machine driver. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801091446.10457-21-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index d5dd887b9d15..9e84d1ab6cd0 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -144,6 +144,12 @@ int asoc_sdw_cs_amp_init(struct snd_soc_card *card, struct asoc_sdw_codec_info *info, bool playback); +/* MAXIM codec support */ +int asoc_sdw_maxim_init(struct snd_soc_card *card, + struct snd_soc_dai_link *dai_links, + struct asoc_sdw_codec_info *info, + bool playback); + /* dai_link init callbacks */ int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt_sdca_jack_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); @@ -158,5 +164,6 @@ int asoc_sdw_cs42l43_hs_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc int asoc_sdw_cs42l43_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_cs42l43_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_maxim_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); #endif -- cgit v1.2.3 From 8f87e292a34813e4551e1513c62b7222900481cb Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 16:48:07 +0530 Subject: ASoC: intel/sdw_utils: move dai id common macros Move dai id common macros from intel SoundWire generic driver to soc_sdw_utils.h file so that it can be used by other platform machine driver. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801111821.18076-1-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 9e84d1ab6cd0..13941ddd24c8 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -29,6 +29,13 @@ */ #define SOC_SDW_SIDECAR_AMPS BIT(16) +#define SOC_SDW_UNUSED_DAI_ID -1 +#define SOC_SDW_JACK_OUT_DAI_ID 0 +#define SOC_SDW_JACK_IN_DAI_ID 1 +#define SOC_SDW_AMP_OUT_DAI_ID 2 +#define SOC_SDW_AMP_IN_DAI_ID 3 +#define SOC_SDW_DMIC_DAI_ID 4 + struct asoc_sdw_codec_info; struct asoc_sdw_dai_info { -- cgit v1.2.3 From 6e7af1fdf7da7f0b79475f573d3c1f49a826bd68 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 16:48:08 +0530 Subject: ASoC: intel/sdw_utils: move soundwire dai type macros Move SoundWire dai type macros to common header file(soc_sdw_util.h). So that these macros will be used by other platform machine driver. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801111821.18076-2-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 13941ddd24c8..7912ff7d2bb9 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -36,6 +36,10 @@ #define SOC_SDW_AMP_IN_DAI_ID 3 #define SOC_SDW_DMIC_DAI_ID 4 +#define SOC_SDW_DAI_TYPE_JACK 0 +#define SOC_SDW_DAI_TYPE_AMP 1 +#define SOC_SDW_DAI_TYPE_MIC 2 + struct asoc_sdw_codec_info; struct asoc_sdw_dai_info { -- cgit v1.2.3 From e377c94773171e8a97e716e57ec67d49b02ded0b Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 16:48:09 +0530 Subject: ASoC: intel/sdw_utils: move soundwire codec_info_list structure SoundWire 'codec_info_list' structure is not a platform specific one. Move codec_info_list structure to common file soc_sdw_utils.c. Move codec helper functions which uses codec_info_list structure to common place holder and rename the function by adding _sdw tag. This will allow to use 'codec_info_list' structure and it's helper functions in other platform machine driver. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801111821.18076-3-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 7912ff7d2bb9..9d99a460ba27 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -90,8 +90,12 @@ struct asoc_sdw_mc_private { bool ignore_internal_dmic; void *private; unsigned long mc_quirk; + int codec_info_list_count; }; +extern struct asoc_sdw_codec_info codec_info_list[]; +int asoc_sdw_get_codec_info_list_count(void); + int asoc_sdw_startup(struct snd_pcm_substream *substream); int asoc_sdw_prepare(struct snd_pcm_substream *substream); int asoc_sdw_prepare(struct snd_pcm_substream *substream); @@ -106,6 +110,15 @@ const char *asoc_sdw_get_codec_name(struct device *dev, const struct snd_soc_acpi_link_adr *adr_link, int adr_index); +struct asoc_sdw_codec_info *asoc_sdw_find_codec_info_part(const u64 adr); + +struct asoc_sdw_codec_info *asoc_sdw_find_codec_info_acpi(const u8 *acpi_id); + +struct asoc_sdw_codec_info *asoc_sdw_find_codec_info_dai(const char *dai_name, + int *dai_index); + +int asoc_sdw_rtd_init(struct snd_soc_pcm_runtime *rtd); + /* DMIC support */ int asoc_sdw_dmic_init(struct snd_soc_pcm_runtime *rtd); -- cgit v1.2.3 From 778dcb08832a5e526e447971f7ca72cb6dd2307b Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 16:48:10 +0530 Subject: ASoC: intel/sdw_utils: move machine driver dai link helper functions Move machine driver dai link helper functions to common place holder, So that it can be used by other platform machine driver. Rename these functions with "asoc_sdw" tag as a prefix. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801111821.18076-4-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 9d99a460ba27..b3b6d6b7ce2f 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -117,6 +117,11 @@ struct asoc_sdw_codec_info *asoc_sdw_find_codec_info_acpi(const u8 *acpi_id); struct asoc_sdw_codec_info *asoc_sdw_find_codec_info_dai(const char *dai_name, int *dai_index); +struct snd_soc_dai_link *asoc_sdw_mc_find_codec_dai_used(struct snd_soc_card *card, + const char *dai_name); + +void asoc_sdw_mc_dailink_exit_loop(struct snd_soc_card *card); + int asoc_sdw_rtd_init(struct snd_soc_pcm_runtime *rtd); /* DMIC support */ -- cgit v1.2.3 From 5bd414c7b80e39ef11bd86db76e726962c1bfc92 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 16:48:11 +0530 Subject: ASoC: sdw_utils: refactor sof_sdw_card_late_probe function Refactor sof_sdw_card_late_probe() function and derive a generic function soc_sdw_card_late_probe() function which can be used by SoundWire generic machine driver. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801111821.18076-5-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index b3b6d6b7ce2f..14e21a992f56 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -122,6 +122,8 @@ struct snd_soc_dai_link *asoc_sdw_mc_find_codec_dai_used(struct snd_soc_card *ca void asoc_sdw_mc_dailink_exit_loop(struct snd_soc_card *card); +int asoc_sdw_card_late_probe(struct snd_soc_card *card); + int asoc_sdw_rtd_init(struct snd_soc_pcm_runtime *rtd); /* DMIC support */ -- cgit v1.2.3 From 59f8b622d52e30c5be7f14212c26ca37e810ece9 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 16:48:12 +0530 Subject: ASoC: intel/sdw_utils: refactor init_dai_link() and init_simple_dai_link() To make it generic, refactor existing implementation for init_dai_link() and init_simple_dai_link() as mentioned below. - Move init_dai_link() and init_simple_dai_link() to common place holder - Rename the functions with "asoc_sdw" as prefix. - Pass the platform specific 'platform_component' structure and its size as arguments for init_simple_dai_link() function and allocate one more extra dlc for platform component. - Pass the 'platform_component' and 'num_platforms' as arguments for init_dai_link(). Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801111821.18076-6-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 14e21a992f56..e366b7968c2d 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -124,6 +124,22 @@ void asoc_sdw_mc_dailink_exit_loop(struct snd_soc_card *card); int asoc_sdw_card_late_probe(struct snd_soc_card *card); +void asoc_sdw_init_dai_link(struct device *dev, struct snd_soc_dai_link *dai_links, + int *be_id, char *name, int playback, int capture, + struct snd_soc_dai_link_component *cpus, int cpus_num, + struct snd_soc_dai_link_component *platform_component, + int num_platforms, struct snd_soc_dai_link_component *codecs, + int codecs_num, int (*init)(struct snd_soc_pcm_runtime *rtd), + const struct snd_soc_ops *ops); + +int asoc_sdw_init_simple_dai_link(struct device *dev, struct snd_soc_dai_link *dai_links, + int *be_id, char *name, int playback, int capture, + const char *cpu_dai_name, const char *platform_comp_name, + int num_platforms, const char *codec_name, + const char *codec_dai_name, + int (*init)(struct snd_soc_pcm_runtime *rtd), + const struct snd_soc_ops *ops); + int asoc_sdw_rtd_init(struct snd_soc_pcm_runtime *rtd); /* DMIC support */ -- cgit v1.2.3 From 0b8f009ae92fa94ab3c0ca881fce02c336056a73 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 Aug 2024 16:48:13 +0530 Subject: ASoC: soc-acpi: add pci revision id field in mach params structure Few IP's may have same PCI vendor id and device id and different revision id. Add revision id field to the 'snd_soc_acpi_mach_params' so that using this field platform specific implementation can be added in machine driver. Link: https://github.com/thesofproject/linux/pull/5068 Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240801111821.18076-7-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc-acpi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index 38ccec4e3fcd..b6d301946244 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -70,6 +70,7 @@ static inline struct snd_soc_acpi_mach *snd_soc_acpi_codec_list(void *arg) * @dai_drivers: pointer to dai_drivers, used e.g. in nocodec mode * @subsystem_vendor: optional PCI SSID vendor value * @subsystem_device: optional PCI SSID device value + * @subsystem_rev: optional PCI SSID revision value * @subsystem_id_set: true if a value has been written to * subsystem_vendor and subsystem_device. */ @@ -86,6 +87,7 @@ struct snd_soc_acpi_mach_params { struct snd_soc_dai_driver *dai_drivers; unsigned short subsystem_vendor; unsigned short subsystem_device; + unsigned short subsystem_rev; bool subsystem_id_set; }; -- cgit v1.2.3 From f0fcdd2cb0db62605d85f3b97a1b443e7c91f886 Mon Sep 17 00:00:00 2001 From: John Allen Date: Tue, 30 Jul 2024 15:17:30 +0000 Subject: ACPI: PRM: Add PRM handler direct call support Platform Runtime Mechanism (PRM) handlers can be invoked from either the AML interpreter or directly by an OS driver. Implement the latter. [ bp: Massage commit message. ] Signed-off-by: John Allen Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Yazen Ghannam Reviewed-by: Ard Biesheuvel Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20240730151731.15363-2-john.allen@amd.com --- include/linux/prmt.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/prmt.h b/include/linux/prmt.h index 24da8364b919..9c094294403f 100644 --- a/include/linux/prmt.h +++ b/include/linux/prmt.h @@ -2,6 +2,11 @@ #ifdef CONFIG_ACPI_PRMT void init_prmt(void); +int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer); #else static inline void init_prmt(void) { } +static inline int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer) +{ + return -EOPNOTSUPP; +} #endif -- cgit v1.2.3 From 6b08d07cac64c0dcf1dbb4584bdf95d0f789a520 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Wed, 10 Jul 2024 12:06:51 +0200 Subject: leds: trigger: netdev: Add support for tx_err and rx_err notification with LEDs This patch provides support for enabling blinking of LEDs when RX or TX errors are detected. Approach taken in this patch is similar to one for TX or RX data transmission indication (i.e. TRIGGER_NETDEV_TX/RX attribute). One can inspect transmission errors with: ip -s link show eth0 Example LED configuration: cd /sys/devices/platform/amba_pl@0/a001a000.leds/leds/ echo netdev > mode:blue/trigger && \ echo eth0 > mode:blue/device_name && \ echo 1 > mode:blue/tx_err Signed-off-by: Lukasz Majewski Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20240710100651.4059887-1-lukma@denx.de Signed-off-by: Lee Jones --- include/linux/leds.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/leds.h b/include/linux/leds.h index 6885603f211b..e5968c3ed4ae 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -611,6 +611,8 @@ enum led_trigger_netdev_modes { TRIGGER_NETDEV_FULL_DUPLEX, TRIGGER_NETDEV_TX, TRIGGER_NETDEV_RX, + TRIGGER_NETDEV_TX_ERR, + TRIGGER_NETDEV_RX_ERR, /* Keep last */ __TRIGGER_NETDEV_MAX, -- cgit v1.2.3 From 480a8ad683d7a54e8d38c9c7778fb6b15aac241a Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Mon, 22 Jul 2024 15:10:58 +0300 Subject: mfd: adp5585: Add Analog Devices ADP5585 core support The ADP5585 is a 10/11 input/output port expander with a built in keypad matrix decoder, programmable logic, reset generator, and PWM generator. This driver supports the chip by modelling it as an MFD device, with two child devices for the GPIO and PWM functions. The driver is derived from an initial implementation from NXP, available in commit 8059835bee19 ("MLK-25917-1 mfd: adp5585: add ADI adp5585 core support") in their BSP kernel tree. It has been extensively rewritten. Signed-off-by: Haibo Chen Signed-off-by: Laurent Pinchart Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20240722121100.2855-3-laurent.pinchart@ideasonboard.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 126 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 include/linux/mfd/adp5585.h (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h new file mode 100644 index 000000000000..016033cd68e4 --- /dev/null +++ b/include/linux/mfd/adp5585.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Analog Devices ADP5585 I/O expander, PWM controller and keypad controller + * + * Copyright 2022 NXP + * Copyright 2024 Ideas on Board Oy + */ + +#ifndef __MFD_ADP5585_H_ +#define __MFD_ADP5585_H_ + +#include + +#define ADP5585_ID 0x00 +#define ADP5585_MAN_ID_VALUE 0x20 +#define ADP5585_MAN_ID_MASK GENMASK(7, 4) +#define ADP5585_INT_STATUS 0x01 +#define ADP5585_STATUS 0x02 +#define ADP5585_FIFO_1 0x03 +#define ADP5585_FIFO_2 0x04 +#define ADP5585_FIFO_3 0x05 +#define ADP5585_FIFO_4 0x06 +#define ADP5585_FIFO_5 0x07 +#define ADP5585_FIFO_6 0x08 +#define ADP5585_FIFO_7 0x09 +#define ADP5585_FIFO_8 0x0a +#define ADP5585_FIFO_9 0x0b +#define ADP5585_FIFO_10 0x0c +#define ADP5585_FIFO_11 0x0d +#define ADP5585_FIFO_12 0x0e +#define ADP5585_FIFO_13 0x0f +#define ADP5585_FIFO_14 0x10 +#define ADP5585_FIFO_15 0x11 +#define ADP5585_FIFO_16 0x12 +#define ADP5585_GPI_INT_STAT_A 0x13 +#define ADP5585_GPI_INT_STAT_B 0x14 +#define ADP5585_GPI_STATUS_A 0x15 +#define ADP5585_GPI_STATUS_B 0x16 +#define ADP5585_RPULL_CONFIG_A 0x17 +#define ADP5585_RPULL_CONFIG_B 0x18 +#define ADP5585_RPULL_CONFIG_C 0x19 +#define ADP5585_RPULL_CONFIG_D 0x1a +#define ADP5585_Rx_PULL_CFG_PU_300K 0 +#define ADP5585_Rx_PULL_CFG_PD_300K 1 +#define ADP5585_Rx_PULL_CFG_PU_100K 2 +#define ADP5585_Rx_PULL_CFG_DISABLE 3 +#define ADP5585_Rx_PULL_CFG_MASK 3 +#define ADP5585_GPI_INT_LEVEL_A 0x1b +#define ADP5585_GPI_INT_LEVEL_B 0x1c +#define ADP5585_GPI_EVENT_EN_A 0x1d +#define ADP5585_GPI_EVENT_EN_B 0x1e +#define ADP5585_GPI_INTERRUPT_EN_A 0x1f +#define ADP5585_GPI_INTERRUPT_EN_B 0x20 +#define ADP5585_DEBOUNCE_DIS_A 0x21 +#define ADP5585_DEBOUNCE_DIS_B 0x22 +#define ADP5585_GPO_DATA_OUT_A 0x23 +#define ADP5585_GPO_DATA_OUT_B 0x24 +#define ADP5585_GPO_OUT_MODE_A 0x25 +#define ADP5585_GPO_OUT_MODE_B 0x26 +#define ADP5585_GPIO_DIRECTION_A 0x27 +#define ADP5585_GPIO_DIRECTION_B 0x28 +#define ADP5585_RESET1_EVENT_A 0x29 +#define ADP5585_RESET1_EVENT_B 0x2a +#define ADP5585_RESET1_EVENT_C 0x2b +#define ADP5585_RESET2_EVENT_A 0x2c +#define ADP5585_RESET2_EVENT_B 0x2d +#define ADP5585_RESET_CFG 0x2e +#define ADP5585_PWM_OFFT_LOW 0x2f +#define ADP5585_PWM_OFFT_HIGH 0x30 +#define ADP5585_PWM_ONT_LOW 0x31 +#define ADP5585_PWM_ONT_HIGH 0x32 +#define ADP5585_PWM_CFG 0x33 +#define ADP5585_PWM_IN_AND BIT(2) +#define ADP5585_PWM_MODE BIT(1) +#define ADP5585_PWM_EN BIT(0) +#define ADP5585_LOGIC_CFG 0x34 +#define ADP5585_LOGIC_FF_CFG 0x35 +#define ADP5585_LOGIC_INT_EVENT_EN 0x36 +#define ADP5585_POLL_PTIME_CFG 0x37 +#define ADP5585_PIN_CONFIG_A 0x38 +#define ADP5585_PIN_CONFIG_B 0x39 +#define ADP5585_PIN_CONFIG_C 0x3a +#define ADP5585_PULL_SELECT BIT(7) +#define ADP5585_C4_EXTEND_CFG_GPIO11 (0U << 6) +#define ADP5585_C4_EXTEND_CFG_RESET2 (1U << 6) +#define ADP5585_C4_EXTEND_CFG_MASK GENMASK(6, 6) +#define ADP5585_R4_EXTEND_CFG_GPIO5 (0U << 5) +#define ADP5585_R4_EXTEND_CFG_RESET1 (1U << 5) +#define ADP5585_R4_EXTEND_CFG_MASK GENMASK(5, 5) +#define ADP5585_R3_EXTEND_CFG_GPIO4 (0U << 2) +#define ADP5585_R3_EXTEND_CFG_LC (1U << 2) +#define ADP5585_R3_EXTEND_CFG_PWM_OUT (2U << 2) +#define ADP5585_R3_EXTEND_CFG_MASK GENMASK(3, 2) +#define ADP5585_R0_EXTEND_CFG_GPIO1 (0U << 0) +#define ADP5585_R0_EXTEND_CFG_LY (1U << 0) +#define ADP5585_R0_EXTEND_CFG_MASK GENMASK(0, 0) +#define ADP5585_GENERAL_CFG 0x3b +#define ADP5585_OSC_EN BIT(7) +#define ADP5585_OSC_FREQ_50KHZ (0U << 5) +#define ADP5585_OSC_FREQ_100KHZ (1U << 5) +#define ADP5585_OSC_FREQ_200KHZ (2U << 5) +#define ADP5585_OSC_FREQ_500KHZ (3U << 5) +#define ADP5585_OSC_FREQ_MASK GENMASK(6, 5) +#define ADP5585_INT_CFG BIT(1) +#define ADP5585_RST_CFG BIT(0) +#define ADP5585_INT_EN 0x3c + +#define ADP5585_MAX_REG ADP5585_INT_EN + +/* + * Bank 0 covers pins "GPIO 1/R0" to "GPIO 6/R5", numbered 0 to 5 by the + * driver, and bank 1 covers pins "GPIO 7/C0" to "GPIO 11/C4", numbered 6 to + * 10. Some variants of the ADP5585 don't support "GPIO 6/R5". As the driver + * uses identical GPIO numbering for all variants to avoid confusion, GPIO 5 is + * marked as reserved in the device tree for variants that don't support it. + */ +#define ADP5585_BANK(n) ((n) >= 6 ? 1 : 0) +#define ADP5585_BIT(n) ((n) >= 6 ? BIT((n) - 6) : BIT(n)) + +struct regmap; + +struct adp5585_dev { + struct regmap *regmap; +}; + +#endif -- cgit v1.2.3 From bcbfcebda2cbc6a10a347d726e4a4f69e43a864e Mon Sep 17 00:00:00 2001 From: Mohamed Ghanmi Date: Sun, 9 Jun 2024 15:48:49 +0100 Subject: platform/x86: asus-wmi: add support for vivobook fan profiles Add support for vivobook fan profiles wmi call on the ASUS VIVOBOOK to adjust power limits. These fan profiles have a different device id than the ROG series and different order. This reorders the existing modes. As part of keeping the patch clean the throttle_thermal_policy_available boolean stored in the driver struct is removed and throttle_thermal_policy_dev is used in place (as on init it is zeroed). Co-developed-by: Luke D. Jones Signed-off-by: Luke D. Jones Signed-off-by: Mohamed Ghanmi Reviewed-by: Luke D. Jones Link: https://lore.kernel.org/r/20240609144849.2532-2-mohamed.ghanmi@supcom.tn Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/platform_data/x86/asus-wmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 0aeeae1c1943..5e6f407b2def 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -68,6 +68,7 @@ #define ASUS_WMI_DEVID_SCREENPAD_LIGHT 0x00050032 #define ASUS_WMI_DEVID_FAN_BOOST_MODE 0x00110018 #define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY 0x00120075 +#define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY_VIVO 0x00110019 /* Misc */ #define ASUS_WMI_DEVID_PANEL_OD 0x00050019 -- cgit v1.2.3 From 3908ba2e0b2476e2ec13e15967bf6a37e449f2af Mon Sep 17 00:00:00 2001 From: Nick Hu Date: Wed, 17 Jul 2024 11:17:14 +0800 Subject: RISC-V: Enable the IPI before workqueue_online_cpu() Sometimes the hotplug cpu stalls at the arch_cpu_idle() for a while after workqueue_online_cpu(). When cpu stalls at the idle loop, the reschedule IPI is pending. However the enable bit is not enabled yet so the cpu stalls at WFI until watchdog timeout. Therefore enable the IPI before the workqueue_online_cpu() to fix the issue. Fixes: 63c5484e7495 ("workqueue: Add multiple affinity scopes and interface to select them") Signed-off-by: Nick Hu Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20240717031714.1946036-1-nick.hu@sifive.com Signed-off-by: Palmer Dabbelt --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 51ba681b915a..e30d93b807d5 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -148,6 +148,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_LOONGARCH_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_IRQ_RISCV_IMSIC_STARTING, + CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, CPUHP_AP_PERF_X86_STARTING, -- cgit v1.2.3 From 53369581dc0c68a5700ed51e1660f44c4b2bb524 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 1 Aug 2024 08:41:17 -0700 Subject: drm/printer: Allow NULL data in devcoredump printer We want to determine the size of the devcoredump before writing it out. To that end, we will run the devcoredump printer with NULL data to get the size, alloc data based on the generated offset, then run the devcorecump again with a valid data pointer to print. This necessitates not writing data to the data pointer on the initial pass, when it is NULL. v5: - Better commit message (Jonathan) - Add kerenl doc with examples (Jani) Cc: Maarten Lankhorst Acked-by: Maarten Lankhorst Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240801154118.2547543-3-matthew.brost@intel.com --- include/drm/drm_print.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 5d9dff5149c9..d2676831d765 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -221,7 +221,8 @@ drm_vprintf(struct drm_printer *p, const char *fmt, va_list *va) /** * struct drm_print_iterator - local struct used with drm_printer_coredump - * @data: Pointer to the devcoredump output buffer + * @data: Pointer to the devcoredump output buffer, can be NULL if using + * drm_printer_coredump to determine size of devcoredump * @start: The offset within the buffer to start writing * @remain: The number of bytes to write for this iteration */ @@ -266,6 +267,57 @@ struct drm_print_iterator { * coredump_read, ...) * } * + * The above example has a time complexity of O(N^2), where N is the size of the + * devcoredump. This is acceptable for small devcoredumps but scales poorly for + * larger ones. + * + * Another use case for drm_coredump_printer is to capture the devcoredump into + * a saved buffer before the dev_coredump() callback. This involves two passes: + * one to determine the size of the devcoredump and another to print it to a + * buffer. Then, in dev_coredump(), copy from the saved buffer into the + * devcoredump read buffer. + * + * For example:: + * + * char *devcoredump_saved_buffer; + * + * ssize_t __coredump_print(char *buffer, ssize_t count, ...) + * { + * struct drm_print_iterator iter; + * struct drm_printer p; + * + * iter.data = buffer; + * iter.start = 0; + * iter.remain = count; + * + * p = drm_coredump_printer(&iter); + * + * drm_printf(p, "foo=%d\n", foo); + * ... + * return count - iter.remain; + * } + * + * void coredump_print(...) + * { + * ssize_t count; + * + * count = __coredump_print(NULL, INT_MAX, ...); + * devcoredump_saved_buffer = kvmalloc(count, GFP_KERNEL); + * __coredump_print(devcoredump_saved_buffer, count, ...); + * } + * + * void coredump_read(char *buffer, loff_t offset, size_t count, + * void *data, size_t datalen) + * { + * ... + * memcpy(buffer, devcoredump_saved_buffer + offset, count); + * ... + * } + * + * The above example has a time complexity of O(N*2), where N is the size of the + * devcoredump. This scales better than the previous example for larger + * devcoredumps. + * * RETURNS: * The &drm_printer object */ -- cgit v1.2.3 From b40824500eaa77668026b6d1ade6924901a680f9 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 30 Jul 2024 14:38:07 +0900 Subject: ata: libata: Remove ata_noop_qc_prep() The function ata_noop_qc_prep(), as its name implies, does nothing and simply returns AC_ERR_OK. For drivers that do not need any special preparations of queued commands, we can avoid having to define struct ata_port qc_prep operation by simply testing if that operation is defined or not in ata_qc_issue(). Make this change and remove ata_noop_qc_prep(). Signed-off-by: Damien Le Moal Reviewed-by: John Garry Reviewed-by: Sergey Shtylyov --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index d598ef690e50..d5446e18d9df 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1168,7 +1168,6 @@ extern int ata_xfer_mode2shift(u8 xfer_mode); extern const char *ata_mode_string(unsigned int xfer_mask); extern unsigned int ata_id_xfermask(const u16 *id); extern int ata_std_qc_defer(struct ata_queued_cmd *qc); -extern enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc); extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, unsigned int n_elem); extern unsigned int ata_dev_classify(const struct ata_taskfile *tf); -- cgit v1.2.3 From bf1807c6ee1f66608c7835f6f9d9139c9c477942 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 1 Aug 2024 18:04:22 +0900 Subject: ata: libata: Print device quirks only once In ata_dev_print_quirks(), return early if ata_dev_print_info() returns false or if we already printed quirk information. This is to avoid printing a device quirks multiple times (that is, each time ata_dev_revalidate() is called). To remember if ata_dev_print_quirks() was already executed, define the EH context flag ATA_EHI_DID_PRINT_QUIRKS and set this flag in ata_dev_print_quirks(). Reported-by: Geert Uytterhoeven Fixes: 58157d607aec ("ata: libata: Print quirks applied to devices") Signed-off-by: Damien Le Moal Tested-by: Geert Uytterhoeven --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index d5446e18d9df..0279c0a6302f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -378,6 +378,7 @@ enum { ATA_EHI_PRINTINFO = (1 << 18), /* print configuration info */ ATA_EHI_SETMODE = (1 << 19), /* configure transfer mode */ ATA_EHI_POST_SETMODE = (1 << 20), /* revalidating after setmode */ + ATA_EHI_DID_PRINT_QUIRKS = (1 << 21), /* already printed quirks info */ ATA_EHI_DID_RESET = ATA_EHI_DID_SOFTRESET | ATA_EHI_DID_HARDRESET, -- cgit v1.2.3 From 744500d81f81346b4d442809c0511684bb28c829 Mon Sep 17 00:00:00 2001 From: Luigi Leonardi Date: Tue, 30 Jul 2024 21:43:06 +0200 Subject: vsock: add support for SIOCOUTQ ioctl Add support for ioctl(s) in AF_VSOCK. The only ioctl available is SIOCOUTQ/TIOCOUTQ, which returns the number of unsent bytes in the socket. This information is transport-specific and is delegated to them using a callback. Suggested-by: Daan De Meyer Signed-off-by: Luigi Leonardi Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/net/af_vsock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 535701efc1e5..fc504d2da3d0 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -169,6 +169,9 @@ struct vsock_transport { void (*notify_buffer_size)(struct vsock_sock *, u64 *); int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val); + /* SIOCOUTQ ioctl */ + ssize_t (*unsent_bytes)(struct vsock_sock *vsk); + /* Shutdown. */ int (*shutdown)(struct vsock_sock *, int); -- cgit v1.2.3 From e6ab45005772014ce49a693a63f928203c0cbbb0 Mon Sep 17 00:00:00 2001 From: Luigi Leonardi Date: Tue, 30 Jul 2024 21:43:07 +0200 Subject: vsock/virtio: add SIOCOUTQ support for all virtio based transports Introduce support for virtio_transport_unsent_bytes ioctl for virtio_transport, vhost_vsock and vsock_loopback. For all transports the unsent bytes counter is incremented in virtio_transport_get_credit. In virtio_transport (G2H) and in vhost-vsock (H2G) the counter is decremented when the skbuff is consumed. In vsock_loopback the same skbuff is passed from the transmitter to the receiver, so the counter is decremented before queuing the skbuff to the receiver. Signed-off-by: Luigi Leonardi Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/linux/virtio_vsock.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index c82089dee0c8..0387d64e2c66 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -133,6 +133,7 @@ struct virtio_vsock_sock { u32 tx_cnt; u32 peer_fwd_cnt; u32 peer_buf_alloc; + size_t bytes_unsent; /* Protected by rx_lock */ u32 fwd_cnt; @@ -193,6 +194,11 @@ s64 virtio_transport_stream_has_data(struct vsock_sock *vsk); s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk); +ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk); + +void virtio_transport_consume_skb_sent(struct sk_buff *skb, + bool consume); + int virtio_transport_do_socket_init(struct vsock_sock *vsk, struct vsock_sock *psk); int -- cgit v1.2.3 From 042859e80d4b67ff93f19009c02d6a8a735b0fd9 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 29 Jul 2024 21:26:43 +0100 Subject: dt-bindings: clock: renesas: Document RZ/V2H(P) SoC CPG Document the device tree bindings for the Renesas RZ/V2H(P) SoC Clock Pulse Generator (CPG). CPG block handles the below operations: - Generation and control of clock signals for the IP modules - Generation and control of resets - Control over booting - Low power consumption and power supply domains Also define constants for the core clocks of the RZ/V2H(P) SoC. Note the core clocks are a subset of the ones which are listed as part of section 4.4.2 of HW manual Rev.1.01 which cannot be controlled by CLKON register. Signed-off-by: Lad Prabhakar Reviewed-by: Krzysztof Kozlowski Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20240729202645.263525-2-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/renesas,r9a09g057-cpg.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/dt-bindings/clock/renesas,r9a09g057-cpg.h (limited to 'include') diff --git a/include/dt-bindings/clock/renesas,r9a09g057-cpg.h b/include/dt-bindings/clock/renesas,r9a09g057-cpg.h new file mode 100644 index 000000000000..541e6d719bd6 --- /dev/null +++ b/include/dt-bindings/clock/renesas,r9a09g057-cpg.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + * + * Copyright (C) 2024 Renesas Electronics Corp. + */ +#ifndef __DT_BINDINGS_CLOCK_RENESAS_R9A09G057_CPG_H__ +#define __DT_BINDINGS_CLOCK_RENESAS_R9A09G057_CPG_H__ + +#include + +/* Core Clock list */ +#define R9A09G057_SYS_0_PCLK 0 +#define R9A09G057_CA55_0_CORE_CLK0 1 +#define R9A09G057_CA55_0_CORE_CLK1 2 +#define R9A09G057_CA55_0_CORE_CLK2 3 +#define R9A09G057_CA55_0_CORE_CLK3 4 +#define R9A09G057_CA55_0_PERIPHCLK 5 +#define R9A09G057_CM33_CLK0 6 +#define R9A09G057_CST_0_SWCLKTCK 7 +#define R9A09G057_IOTOP_0_SHCLK 8 + +#endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G057_CPG_H__ */ -- cgit v1.2.3 From 79bd233010859463e46a0a4b3926eaaba25a6110 Mon Sep 17 00:00:00 2001 From: Ben Gainey Date: Tue, 30 Jul 2024 09:44:14 +0100 Subject: perf: Rename perf_event_context.nr_pending to nr_no_switch_fast. nr_pending counts the number of events in the context that either pending_sigtrap or pending_work, but it is used to prevent taking the fast path in perf_event_context_sched_out. Renamed to reflect what it is used for, rather than what it counts. This change allows using the field to track other event properties that also require skipping the fast path without possible confusion over the name. Signed-off-by: Ben Gainey Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20240730084417.7693-2-ben.gainey@arm.com --- include/linux/perf_event.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6bb0c21d6335..655f66b18418 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -966,12 +966,13 @@ struct perf_event_context { struct rcu_head rcu_head; /* - * Sum (event->pending_work + event->pending_work) + * The count of events for which using the switch-out fast path + * should be avoided. * * The SIGTRAP is targeted at ctx->task, as such it won't do changing * that until the signal is delivered. */ - local_t nr_pending; + local_t nr_no_switch_fast; }; struct perf_cpu_pmu_context { -- cgit v1.2.3 From 7e8b255650fcfa1d05a57e4093d8405e6d8dd488 Mon Sep 17 00:00:00 2001 From: Ben Gainey Date: Tue, 30 Jul 2024 09:44:15 +0100 Subject: perf: Support PERF_SAMPLE_READ with inherit This change allows events to use PERF_SAMPLE_READ with inherit so long as PERF_SAMPLE_TID is also set. This enables sample based profiling of a group of counters over a hierarchy of processes or threads. This is useful, for example, for collecting per-thread counters/metrics, event based sampling of multiple counters as a unit, access to the enabled and running time when using multiplexing and so on. Prior to this, users were restricted to either collecting aggregate statistics for a multi-threaded/-process application (e.g. with "perf stat"), or to sample individual threads, or to profile the entire system (which requires root or CAP_PERFMON, and may produce much more data than is required). Theoretically a tool could poll for or otherwise monitor thread/process creation and construct whatever events the user is interested in using perf_event_open, for each new thread or process, but this is racy, can lead to file-descriptor exhaustion, and ultimately just replicates the behaviour of inherit, but in userspace. This configuration differs from inherit without PERF_SAMPLE_READ in that the accumulated event count, and consequently any sample (such as if triggered by overflow of sample_period) will be on a per-thread rather than on an aggregate basis. The meaning of read_format::value field of both PERF_RECORD_READ and PERF_RECORD_SAMPLE is changed such that if the sampled event uses this new configuration then the values reported will be per-thread rather than the global aggregate value. This is a change from the existing semantics of read_format (where PERF_SAMPLE_READ is used without inherit), but it is necessary to expose the per-thread counter values, and it avoids reinventing a separate "read_format_thread" field that otherwise replicates the same behaviour. This change should not break existing tools, since this configuration was not previously valid and was rejected by the kernel. Tools that opt into this new mode will need to account for this when calculating the counter delta for a given sample. Tools that wish to have both the per-thread and aggregate value can perform the global aggregation themselves from the per-thread values. The change to read_format::value does not affect existing valid perf_event_attr configurations, nor does it change the behaviour of calls to "read" on an event descriptor. Both continue to report the aggregate value for the entire thread/process hierarchy. The difference between the results reported by "read" and PERF_RECORD_SAMPLE in this new configuration is justified on the basis that it is not (easily) possible for "read" to target a specific thread (the caller only has the fd for the original parent event). Signed-off-by: Ben Gainey Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20240730084417.7693-3-ben.gainey@arm.com --- include/linux/perf_event.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 655f66b18418..701549967c18 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -969,6 +969,9 @@ struct perf_event_context { * The count of events for which using the switch-out fast path * should be avoided. * + * Sum (event->pending_work + events with + * (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ))) + * * The SIGTRAP is targeted at ctx->task, as such it won't do changing * that until the signal is delivered. */ -- cgit v1.2.3 From cfa7f3d2c526c224a6271cc78a4a27a0de06f4f0 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 29 Jul 2024 10:52:23 -0700 Subject: perf,x86: avoid missing caller address in stack traces captured in uprobe When tracing user functions with uprobe functionality, it's common to install the probe (e.g., a BPF program) at the first instruction of the function. This is often going to be `push %rbp` instruction in function preamble, which means that within that function frame pointer hasn't been established yet. This leads to consistently missing an actual caller of the traced function, because perf_callchain_user() only records current IP (capturing traced function) and then following frame pointer chain (which would be caller's frame, containing the address of caller's caller). So when we have target_1 -> target_2 -> target_3 call chain and we are tracing an entry to target_3, captured stack trace will report target_1 -> target_3 call chain, which is wrong and confusing. This patch proposes a x86-64-specific heuristic to detect `push %rbp` (`push %ebp` on 32-bit architecture) instruction being traced. Given entire kernel implementation of user space stack trace capturing works under assumption that user space code was compiled with frame pointer register (%rbp/%ebp) preservation, it seems pretty reasonable to use this instruction as a strong indicator that this is the entry to the function. In that case, return address is still pointed to by %rsp/%esp, so we fetch it and add to stack trace before proceeding to unwind the rest using frame pointer-based logic. We also check for `endbr64` (for 64-bit modes) as another common pattern for function entry, as suggested by Josh Poimboeuf. Even if we get this wrong sometimes for uprobes attached not at the function entry, it's OK because stack trace will still be overall meaningful, just with one extra bogus entry. If we don't detect this, we end up with guaranteed to be missing caller function entry in the stack trace, which is worse overall. Signed-off-by: Andrii Nakryiko Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20240729175223.23914-1-andrii@kernel.org --- include/linux/uprobes.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index b503fafb7fb3..a270a5892ab4 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -76,6 +76,8 @@ struct uprobe_task { struct uprobe *active_uprobe; unsigned long xol_vaddr; + struct arch_uprobe *auprobe; + struct return_instance *return_instances; unsigned int depth; }; -- cgit v1.2.3 From e04332ebc8ac128fa551e83f1161ab1c094d13a9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 1 Aug 2024 15:27:28 +0200 Subject: uprobes: kill uprobe_register_refctr() It doesn't make any sense to have 2 versions of _register(). Note that trace_uprobe_enable(), the only user of uprobe_register(), doesn't need to check tu->ref_ctr_offset to decide which one should be used, it could safely pass ref_ctr_offset == 0 to uprobe_register_refctr(). Add this argument to uprobe_register(), update the callers, and kill uprobe_register_refctr(). Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Jiri Olsa Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240801132728.GA8800@redhat.com --- include/linux/uprobes.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index a270a5892ab4..788813c0b8fc 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -112,8 +112,7 @@ extern bool is_trap_insn(uprobe_opcode_t *insn); extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); -extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); -extern int uprobe_register_refctr(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); +extern int uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); @@ -154,11 +153,7 @@ static inline void uprobes_init(void) #define uprobe_get_trap_addr(regs) instruction_pointer(regs) static inline int -uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) -{ - return -ENOSYS; -} -static inline int uprobe_register_refctr(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc) +uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc) { return -ENOSYS; } -- cgit v1.2.3 From 3c83a9ad0295eb63bdeb81d821b8c3b9417fbcac Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 1 Aug 2024 15:27:34 +0200 Subject: uprobes: make uprobe_register() return struct uprobe * This way uprobe_unregister() and uprobe_apply() can use "struct uprobe *" rather than inode + offset. This simplifies the code and allows to avoid the unnecessary find_uprobe() + put_uprobe() in these functions. TODO: uprobe_unregister() still needs get_uprobe/put_uprobe to ensure that this uprobe can't be freed before up_write(&uprobe->register_rwsem). Co-developed-by: Andrii Nakryiko Signed-off-by: Andrii Nakryiko Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Jiri Olsa Link: https://lore.kernel.org/r/20240801132734.GA8803@redhat.com --- include/linux/uprobes.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 788813c0b8fc..f50df1fa93e7 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -16,6 +16,7 @@ #include #include +struct uprobe; struct vm_area_struct; struct mm_struct; struct inode; @@ -112,9 +113,9 @@ extern bool is_trap_insn(uprobe_opcode_t *insn); extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); -extern int uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); -extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); -extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); +extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); +extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); +extern void uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void uprobe_start_dup_mmap(void); @@ -152,18 +153,18 @@ static inline void uprobes_init(void) #define uprobe_get_trap_addr(regs) instruction_pointer(regs) -static inline int +static inline struct uprobe * uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc) { - return -ENOSYS; + return ERR_PTR(-ENOSYS); } static inline int -uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add) +uprobe_apply(struct uprobe* uprobe, struct uprobe_consumer *uc, bool add) { return -ENOSYS; } static inline void -uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) +uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) { } static inline int uprobe_mmap(struct vm_area_struct *vma) -- cgit v1.2.3 From 6edb8cd87cca5dd4389d4ea2d84b66ea94341bbd Mon Sep 17 00:00:00 2001 From: Kerem Karabay Date: Fri, 5 Jul 2024 11:17:42 +0000 Subject: HID: core: add helper for finding a field with a certain usage This helper will allow HID drivers to easily determine if they should bind to a hid_device by checking for the prescence of a certain field when its ID is not enough, which can be the case on USB devices with multiple interfaces and/or configurations. Convert google-hammer driver to use it, and remove now superfluous hammer_has_usage(). [jkosina@suse.com: expand changelog with the information about google-hammer being added as user of this API ] Signed-off-by: Kerem Karabay Signed-off-by: Aditya Garg Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 1533c9dcd3a6..2deff79f39a1 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -940,6 +940,8 @@ extern void hidinput_report_event(struct hid_device *hid, struct hid_report *rep extern int hidinput_connect(struct hid_device *hid, unsigned int force); extern void hidinput_disconnect(struct hid_device *); +struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_type, + unsigned int application, unsigned int usage); int hid_set_field(struct hid_field *, unsigned, __s32); int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, int interrupt); -- cgit v1.2.3 From 79f194dd54cef3a0212914cd79dda1f91b7bf7f6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 29 Jul 2024 18:11:29 +0200 Subject: thermal: trip: Get rid of thermal_zone_get_num_trips() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only existing caller of thermal_zone_get_num_trips(), which is rcar_gen3_thermal_probe(), uses this function to put the number of trip points into a kernel log message, but this information is also available from the thermal sysfs interface. For this reason, remove the thermal_zone_get_num_trips() call from rcar_gen3_thermal_probe() and drop the former altogether. Signed-off-by: Rafael J. Wysocki Reviewed-by: Niklas Söderlund Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/2636988.Lt9SDvczpP@rjwysocki.net --- include/linux/thermal.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 25fbf960b474..4fdfd5abd2c4 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -210,7 +210,6 @@ int for_each_thermal_trip(struct thermal_zone_device *tz, int thermal_zone_for_each_trip(struct thermal_zone_device *tz, int (*cb)(struct thermal_trip *, void *), void *data); -int thermal_zone_get_num_trips(struct thermal_zone_device *tz); void thermal_zone_set_trip_temp(struct thermal_zone_device *tz, struct thermal_trip *trip, int temp); -- cgit v1.2.3 From 8ecd953ca5850d2f83f548d4bbcf672d2d2cd0ca Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 29 Jul 2024 18:12:45 +0200 Subject: thermal: trip: Drop thermal_zone_get_trip() There are no more callers of thermal_zone_get_trip() in the tree, so drop it. No functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/2220301.Mh6RI2rZIc@rjwysocki.net --- include/linux/thermal.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 4fdfd5abd2c4..db44a57a7474 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -202,8 +202,6 @@ static inline void devm_thermal_of_zone_unregister(struct device *dev, } #endif -int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, - struct thermal_trip *trip); int for_each_thermal_trip(struct thermal_zone_device *tz, int (*cb)(struct thermal_trip *, void *), void *data); -- cgit v1.2.3 From c8a132e2e032b00828d51141ab34f9aeb24f44ae Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 2 Aug 2024 11:57:32 +0100 Subject: ASoC: soc-component: Add new snd_soc_component_get_kcontrol() helpers Add new helper functions snd_soc_component_get_kcontrol() and snd_soc_component_get_kcontrol_locked() that returns a kcontrol by name, but will factor in the components name_prefix, to handle situations where multiple components are present with the same controls. Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20240802105734.2309788-3-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/soc-component.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index ceca69b46a82..bf2e381cd124 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -462,6 +462,11 @@ int snd_soc_component_force_enable_pin_unlocked( const char *pin); /* component controls */ +struct snd_kcontrol *snd_soc_component_get_kcontrol(struct snd_soc_component *component, + const char * const ctl); +struct snd_kcontrol * +snd_soc_component_get_kcontrol_locked(struct snd_soc_component *component, + const char * const ctl); int snd_soc_component_notify_control(struct snd_soc_component *component, const char * const ctl); -- cgit v1.2.3 From 6a965fbaac461564ae74dbfe6d9c9e9de65ea67a Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 2 Aug 2024 14:40:07 +0200 Subject: ASoC: Intel: soc-acpi: add PTL match tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For now the tables are basic for mockup devices and headset codec support Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Signed-off-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20240802124011.173820-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-acpi-intel-match.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/soc-acpi-intel-match.h b/include/sound/soc-acpi-intel-match.h index 4843b57798f6..daed7123df9d 100644 --- a/include/sound/soc-acpi-intel-match.h +++ b/include/sound/soc-acpi-intel-match.h @@ -33,6 +33,7 @@ extern struct snd_soc_acpi_mach snd_soc_acpi_intel_rpl_machines[]; extern struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_machines[]; extern struct snd_soc_acpi_mach snd_soc_acpi_intel_lnl_machines[]; extern struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_machines[]; +extern struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_machines[]; extern struct snd_soc_acpi_mach snd_soc_acpi_intel_cnl_sdw_machines[]; extern struct snd_soc_acpi_mach snd_soc_acpi_intel_cfl_sdw_machines[]; @@ -44,6 +45,7 @@ extern struct snd_soc_acpi_mach snd_soc_acpi_intel_rpl_sdw_machines[]; extern struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_sdw_machines[]; extern struct snd_soc_acpi_mach snd_soc_acpi_intel_lnl_sdw_machines[]; extern struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[]; +extern struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_sdw_machines[]; /* * generic table used for HDA codec-based platforms, possibly with -- cgit v1.2.3 From 92b796845a4a8789c2d9434c6a77baa88a99121e Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Fri, 2 Aug 2024 15:20:52 +0800 Subject: ASoC: tas2781: Fix a compiling warning reported by robot kernel test due to adding tas2563_dvc_table Move tas2563_dvc_table into a separate Header file, as only tas2781 codec driver use this table, and hda side codec driver won't use it. Fixes: 75ed63a5ab5d ("ASoC: tas2781: Add new Kontrol to set tas2563 digital Volume") Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20240802072055.1462-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- include/sound/tas2563-tlv.h | 279 ++++++++++++++++++++++++++++++++++++++++++++ include/sound/tas2781-tlv.h | 260 ----------------------------------------- 2 files changed, 279 insertions(+), 260 deletions(-) create mode 100644 include/sound/tas2563-tlv.h (limited to 'include') diff --git a/include/sound/tas2563-tlv.h b/include/sound/tas2563-tlv.h new file mode 100644 index 000000000000..faa3e194f73b --- /dev/null +++ b/include/sound/tas2563-tlv.h @@ -0,0 +1,279 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// +// ALSA SoC Texas Instruments TAS2563 Audio Smart Amplifier +// +// Copyright (C) 2022 - 2024 Texas Instruments Incorporated +// https://www.ti.com +// +// The TAS2563 driver implements a flexible and configurable +// algo coefficient setting for one, two, or even multiple +// TAS2563 chips. +// +// Author: Shenghao Ding +// + +#ifndef __TAS2563_TLV_H__ +#define __TAS2563_TLV_H__ + +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1); + +/* pow(10, db/20) * pow(2,30) */ +static const unsigned char tas2563_dvc_table[][4] = { + { 0X00, 0X00, 0X00, 0X00 }, /* -121.5db */ + { 0X00, 0X00, 0X03, 0XBC }, /* -121.0db */ + { 0X00, 0X00, 0X03, 0XF5 }, /* -120.5db */ + { 0X00, 0X00, 0X04, 0X31 }, /* -120.0db */ + { 0X00, 0X00, 0X04, 0X71 }, /* -119.5db */ + { 0X00, 0X00, 0X04, 0XB4 }, /* -119.0db */ + { 0X00, 0X00, 0X04, 0XFC }, /* -118.5db */ + { 0X00, 0X00, 0X05, 0X47 }, /* -118.0db */ + { 0X00, 0X00, 0X05, 0X97 }, /* -117.5db */ + { 0X00, 0X00, 0X05, 0XEC }, /* -117.0db */ + { 0X00, 0X00, 0X06, 0X46 }, /* -116.5db */ + { 0X00, 0X00, 0X06, 0XA5 }, /* -116.0db */ + { 0X00, 0X00, 0X07, 0X0A }, /* -115.5db */ + { 0X00, 0X00, 0X07, 0X75 }, /* -115.0db */ + { 0X00, 0X00, 0X07, 0XE6 }, /* -114.5db */ + { 0X00, 0X00, 0X08, 0X5E }, /* -114.0db */ + { 0X00, 0X00, 0X08, 0XDD }, /* -113.5db */ + { 0X00, 0X00, 0X09, 0X63 }, /* -113.0db */ + { 0X00, 0X00, 0X09, 0XF2 }, /* -112.5db */ + { 0X00, 0X00, 0X0A, 0X89 }, /* -112.0db */ + { 0X00, 0X00, 0X0B, 0X28 }, /* -111.5db */ + { 0X00, 0X00, 0X0B, 0XD2 }, /* -111.0db */ + { 0X00, 0X00, 0X0C, 0X85 }, /* -110.5db */ + { 0X00, 0X00, 0X0D, 0X43 }, /* -110.0db */ + { 0X00, 0X00, 0X0E, 0X0C }, /* -109.5db */ + { 0X00, 0X00, 0X0E, 0XE1 }, /* -109.0db */ + { 0X00, 0X00, 0X0F, 0XC3 }, /* -108.5db */ + { 0X00, 0X00, 0X10, 0XB2 }, /* -108.0db */ + { 0X00, 0X00, 0X11, 0XAF }, /* -107.5db */ + { 0X00, 0X00, 0X12, 0XBC }, /* -107.0db */ + { 0X00, 0X00, 0X13, 0XD8 }, /* -106.5db */ + { 0X00, 0X00, 0X15, 0X05 }, /* -106.0db */ + { 0X00, 0X00, 0X16, 0X44 }, /* -105.5db */ + { 0X00, 0X00, 0X17, 0X96 }, /* -105.0db */ + { 0X00, 0X00, 0X18, 0XFB }, /* -104.5db */ + { 0X00, 0X00, 0X1A, 0X76 }, /* -104.0db */ + { 0X00, 0X00, 0X1C, 0X08 }, /* -103.5db */ + { 0X00, 0X00, 0X1D, 0XB1 }, /* -103.0db */ + { 0X00, 0X00, 0X1F, 0X73 }, /* -102.5db */ + { 0X00, 0X00, 0X21, 0X51 }, /* -102.0db */ + { 0X00, 0X00, 0X23, 0X4A }, /* -101.5db */ + { 0X00, 0X00, 0X25, 0X61 }, /* -101.0db */ + { 0X00, 0X00, 0X27, 0X98 }, /* -100.5db */ + { 0X00, 0X00, 0X29, 0XF1 }, /* -100.0db */ + { 0X00, 0X00, 0X2C, 0X6D }, /* -99.5db */ + { 0X00, 0X00, 0X2F, 0X0F }, /* -99.0db */ + { 0X00, 0X00, 0X31, 0XD9 }, /* -98.5db */ + { 0X00, 0X00, 0X34, 0XCD }, /* -98.0db */ + { 0X00, 0X00, 0X37, 0XEE }, /* -97.5db */ + { 0X00, 0X00, 0X3B, 0X3F }, /* -97.0db */ + { 0X00, 0X00, 0X3E, 0XC1 }, /* -96.5db */ + { 0X00, 0X00, 0X42, 0X79 }, /* -96.0db */ + { 0X00, 0X00, 0X46, 0X6A }, /* -95.5db */ + { 0X00, 0X00, 0X4A, 0X96 }, /* -95.0db */ + { 0X00, 0X00, 0X4F, 0X01 }, /* -94.5db */ + { 0X00, 0X00, 0X53, 0XAF }, /* -94.0db */ + { 0X00, 0X00, 0X58, 0XA5 }, /* -93.5db */ + { 0X00, 0X00, 0X5D, 0XE6 }, /* -93.0db */ + { 0X00, 0X00, 0X63, 0X76 }, /* -92.5db */ + { 0X00, 0X00, 0X69, 0X5B }, /* -92.0db */ + { 0X00, 0X00, 0X6F, 0X99 }, /* -91.5db */ + { 0X00, 0X00, 0X76, 0X36 }, /* -91.0db */ + { 0X00, 0X00, 0X7D, 0X37 }, /* -90.5db */ + { 0X00, 0X00, 0X84, 0XA2 }, /* -90.0db */ + { 0X00, 0X00, 0X8C, 0X7E }, /* -89.5db */ + { 0X00, 0X00, 0X94, 0XD1 }, /* -89.0db */ + { 0X00, 0X00, 0X9D, 0XA3 }, /* -88.5db */ + { 0X00, 0X00, 0XA6, 0XFA }, /* -88.0db */ + { 0X00, 0X00, 0XB0, 0XDF }, /* -87.5db */ + { 0X00, 0X00, 0XBB, 0X5A }, /* -87.0db */ + { 0X00, 0X00, 0XC6, 0X74 }, /* -86.5db */ + { 0X00, 0X00, 0XD2, 0X36 }, /* -86.0db */ + { 0X00, 0X00, 0XDE, 0XAB }, /* -85.5db */ + { 0X00, 0X00, 0XEB, 0XDC }, /* -85.0db */ + { 0X00, 0X00, 0XF9, 0XD6 }, /* -84.5db */ + { 0X00, 0X01, 0X08, 0XA4 }, /* -84.0db */ + { 0X00, 0X01, 0X18, 0X52 }, /* -83.5db */ + { 0X00, 0X01, 0X28, 0XEF }, /* -83.0db */ + { 0X00, 0X01, 0X3A, 0X87 }, /* -82.5db */ + { 0X00, 0X01, 0X4D, 0X2A }, /* -82.0db */ + { 0X00, 0X01, 0X60, 0XE8 }, /* -81.5db */ + { 0X00, 0X01, 0X75, 0XD1 }, /* -81.0db */ + { 0X00, 0X01, 0X8B, 0XF7 }, /* -80.5db */ + { 0X00, 0X01, 0XA3, 0X6E }, /* -80.0db */ + { 0X00, 0X01, 0XBC, 0X48 }, /* -79.5db */ + { 0X00, 0X01, 0XD6, 0X9B }, /* -79.0db */ + { 0X00, 0X01, 0XF2, 0X7E }, /* -78.5db */ + { 0X00, 0X02, 0X10, 0X08 }, /* -78.0db */ + { 0X00, 0X02, 0X2F, 0X51 }, /* -77.5db */ + { 0X00, 0X02, 0X50, 0X76 }, /* -77.0db */ + { 0X00, 0X02, 0X73, 0X91 }, /* -76.5db */ + { 0X00, 0X02, 0X98, 0XC0 }, /* -76.0db */ + { 0X00, 0X02, 0XC0, 0X24 }, /* -75.5db */ + { 0X00, 0X02, 0XE9, 0XDD }, /* -75.0db */ + { 0X00, 0X03, 0X16, 0X0F }, /* -74.5db */ + { 0X00, 0X03, 0X44, 0XDF }, /* -74.0db */ + { 0X00, 0X03, 0X76, 0X76 }, /* -73.5db */ + { 0X00, 0X03, 0XAA, 0XFC }, /* -73.0db */ + { 0X00, 0X03, 0XE2, 0XA0 }, /* -72.5db */ + { 0X00, 0X04, 0X1D, 0X8F }, /* -72.0db */ + { 0X00, 0X04, 0X5B, 0XFD }, /* -71.5db */ + { 0X00, 0X04, 0X9E, 0X1D }, /* -71.0db */ + { 0X00, 0X04, 0XE4, 0X29 }, /* -70.5db */ + { 0X00, 0X05, 0X2E, 0X5A }, /* -70.0db */ + { 0X00, 0X05, 0X7C, 0XF2 }, /* -69.5db */ + { 0X00, 0X05, 0XD0, 0X31 }, /* -69.0db */ + { 0X00, 0X06, 0X28, 0X60 }, /* -68.5db */ + { 0X00, 0X06, 0X85, 0XC8 }, /* -68.0db */ + { 0X00, 0X06, 0XE8, 0XB9 }, /* -67.5db */ + { 0X00, 0X07, 0X51, 0X86 }, /* -67.0db */ + { 0X00, 0X07, 0XC0, 0X8A }, /* -66.5db */ + { 0X00, 0X08, 0X36, 0X21 }, /* -66.0db */ + { 0X00, 0X08, 0XB2, 0XB0 }, /* -65.5db */ + { 0X00, 0X09, 0X36, 0XA1 }, /* -65.0db */ + { 0X00, 0X09, 0XC2, 0X63 }, /* -64.5db */ + { 0X00, 0X0A, 0X56, 0X6D }, /* -64.0db */ + { 0X00, 0X0A, 0XF3, 0X3C }, /* -63.5db */ + { 0X00, 0X0B, 0X99, 0X56 }, /* -63.0db */ + { 0X00, 0X0C, 0X49, 0X48 }, /* -62.5db */ + { 0X00, 0X0D, 0X03, 0XA7 }, /* -62.0db */ + { 0X00, 0X0D, 0XC9, 0X11 }, /* -61.5db */ + { 0X00, 0X0E, 0X9A, 0X2D }, /* -61.0db */ + { 0X00, 0X0F, 0X77, 0XAD }, /* -60.5db */ + { 0X00, 0X10, 0X62, 0X4D }, /* -60.0db */ + { 0X00, 0X11, 0X5A, 0XD5 }, /* -59.5db */ + { 0X00, 0X12, 0X62, 0X16 }, /* -59.0db */ + { 0X00, 0X13, 0X78, 0XF0 }, /* -58.5db */ + { 0X00, 0X14, 0XA0, 0X50 }, /* -58.0db */ + { 0X00, 0X15, 0XD9, 0X31 }, /* -57.5db */ + { 0X00, 0X17, 0X24, 0X9C }, /* -57.0db */ + { 0X00, 0X18, 0X83, 0XAA }, /* -56.5db */ + { 0X00, 0X19, 0XF7, 0X86 }, /* -56.0db */ + { 0X00, 0X1B, 0X81, 0X6A }, /* -55.5db */ + { 0X00, 0X1D, 0X22, 0XA4 }, /* -55.0db */ + { 0X00, 0X1E, 0XDC, 0X98 }, /* -54.5db */ + { 0X00, 0X20, 0XB0, 0XBC }, /* -54.0db */ + { 0X00, 0X22, 0XA0, 0X9D }, /* -53.5db */ + { 0X00, 0X24, 0XAD, 0XE0 }, /* -53.0db */ + { 0X00, 0X26, 0XDA, 0X43 }, /* -52.5db */ + { 0X00, 0X29, 0X27, 0X9D }, /* -52.0db */ + { 0X00, 0X2B, 0X97, 0XE3 }, /* -51.5db */ + { 0X00, 0X2E, 0X2D, 0X27 }, /* -51.0db */ + { 0X00, 0X30, 0XE9, 0X9A }, /* -50.5db */ + { 0X00, 0X33, 0XCF, 0X8D }, /* -50.0db */ + { 0X00, 0X36, 0XE1, 0X78 }, /* -49.5db */ + { 0X00, 0X3A, 0X21, 0XF3 }, /* -49.0db */ + { 0X00, 0X3D, 0X93, 0XC3 }, /* -48.5db */ + { 0X00, 0X41, 0X39, 0XD3 }, /* -48.0db */ + { 0X00, 0X45, 0X17, 0X3B }, /* -47.5db */ + { 0X00, 0X49, 0X2F, 0X44 }, /* -47.0db */ + { 0X00, 0X4D, 0X85, 0X66 }, /* -46.5db */ + { 0X00, 0X52, 0X1D, 0X50 }, /* -46.0db */ + { 0X00, 0X56, 0XFA, 0XE8 }, /* -45.5db */ + { 0X00, 0X5C, 0X22, 0X4E }, /* -45.0db */ + { 0X00, 0X61, 0X97, 0XE1 }, /* -44.5db */ + { 0X00, 0X67, 0X60, 0X44 }, /* -44.0db */ + { 0X00, 0X6D, 0X80, 0X60 }, /* -43.5db */ + { 0X00, 0X73, 0XFD, 0X65 }, /* -43.0db */ + { 0X00, 0X7A, 0XDC, 0XD7 }, /* -42.5db */ + { 0X00, 0X82, 0X24, 0X8A }, /* -42.0db */ + { 0X00, 0X89, 0XDA, 0XAB }, /* -41.5db */ + { 0X00, 0X92, 0X05, 0XC6 }, /* -41.0db */ + { 0X00, 0X9A, 0XAC, 0XC8 }, /* -40.5db */ + { 0X00, 0XA3, 0XD7, 0X0A }, /* -40.0db */ + { 0X00, 0XAD, 0X8C, 0X52 }, /* -39.5db */ + { 0X00, 0XB7, 0XD4, 0XDD }, /* -39.0db */ + { 0X00, 0XC2, 0XB9, 0X65 }, /* -38.5db */ + { 0X00, 0XCE, 0X43, 0X28 }, /* -38.0db */ + { 0X00, 0XDA, 0X7B, 0XF1 }, /* -37.5db */ + { 0X00, 0XE7, 0X6E, 0X1E }, /* -37.0db */ + { 0X00, 0XF5, 0X24, 0XAC }, /* -36.5db */ + { 0X01, 0X03, 0XAB, 0X3D }, /* -36.0db */ + { 0X01, 0X13, 0X0E, 0X24 }, /* -35.5db */ + { 0X01, 0X23, 0X5A, 0X71 }, /* -35.0db */ + { 0X01, 0X34, 0X9D, 0XF8 }, /* -34.5db */ + { 0X01, 0X46, 0XE7, 0X5D }, /* -34.0db */ + { 0X01, 0X5A, 0X46, 0X27 }, /* -33.5db */ + { 0X01, 0X6E, 0XCA, 0XC5 }, /* -33.0db */ + { 0X01, 0X84, 0X86, 0X9F }, /* -32.5db */ + { 0X01, 0X9B, 0X8C, 0X27 }, /* -32.0db */ + { 0X01, 0XB3, 0XEE, 0XE5 }, /* -31.5db */ + { 0X01, 0XCD, 0XC3, 0X8C }, /* -31.0db */ + { 0X01, 0XE9, 0X20, 0X05 }, /* -30.5db */ + { 0X02, 0X06, 0X1B, 0X89 }, /* -30.0db */ + { 0X02, 0X24, 0XCE, 0XB0 }, /* -29.5db */ + { 0X02, 0X45, 0X53, 0X85 }, /* -29.0db */ + { 0X02, 0X67, 0XC5, 0XA2 }, /* -28.5db */ + { 0X02, 0X8C, 0X42, 0X3F }, /* -28.0db */ + { 0X02, 0XB2, 0XE8, 0X55 }, /* -27.5db */ + { 0X02, 0XDB, 0XD8, 0XAD }, /* -27.0db */ + { 0X03, 0X07, 0X36, 0X05 }, /* -26.5db */ + { 0X03, 0X35, 0X25, 0X29 }, /* -26.0db */ + { 0X03, 0X65, 0XCD, 0X13 }, /* -25.5db */ + { 0X03, 0X99, 0X57, 0X0C }, /* -25.0db */ + { 0X03, 0XCF, 0XEE, 0XCF }, /* -24.5db */ + { 0X04, 0X09, 0XC2, 0XB0 }, /* -24.0db */ + { 0X04, 0X47, 0X03, 0XC1 }, /* -23.5db */ + { 0X04, 0X87, 0XE5, 0XFB }, /* -23.0db */ + { 0X04, 0XCC, 0XA0, 0X6D }, /* -22.5db */ + { 0X05, 0X15, 0X6D, 0X68 }, /* -22.0db */ + { 0X05, 0X62, 0X8A, 0XB3 }, /* -21.5db */ + { 0X05, 0XB4, 0X39, 0XBC }, /* -21.0db */ + { 0X06, 0X0A, 0XBF, 0XD4 }, /* -20.5db */ + { 0X06, 0X66, 0X66, 0X66 }, /* -20.0db */ + { 0X06, 0XC7, 0X7B, 0X36 }, /* -19.5db */ + { 0X07, 0X2E, 0X50, 0XA6 }, /* -19.0db */ + { 0X07, 0X9B, 0X3D, 0XF6 }, /* -18.5db */ + { 0X08, 0X0E, 0X9F, 0X96 }, /* -18.0db */ + { 0X08, 0X88, 0XD7, 0X6D }, /* -17.5db */ + { 0X09, 0X0A, 0X4D, 0X2F }, /* -17.0db */ + { 0X09, 0X93, 0X6E, 0XB8 }, /* -16.5db */ + { 0X0A, 0X24, 0XB0, 0X62 }, /* -16.0db */ + { 0X0A, 0XBE, 0X8D, 0X70 }, /* -15.5db */ + { 0X0B, 0X61, 0X88, 0X71 }, /* -15.0db */ + { 0X0C, 0X0E, 0X2B, 0XB0 }, /* -14.5db */ + { 0X0C, 0XC5, 0X09, 0XAB }, /* -14.0db */ + { 0X0D, 0X86, 0XBD, 0X8D }, /* -13.5db */ + { 0X0E, 0X53, 0XEB, 0XB3 }, /* -13.0db */ + { 0X0F, 0X2D, 0X42, 0X38 }, /* -12.5db */ + { 0X10, 0X13, 0X79, 0X87 }, /* -12.0db */ + { 0X11, 0X07, 0X54, 0XF9 }, /* -11.5db */ + { 0X12, 0X09, 0XA3, 0X7A }, /* -11.0db */ + { 0X13, 0X1B, 0X40, 0X39 }, /* -10.5db */ + { 0X14, 0X3D, 0X13, 0X62 }, /* -10.0db */ + { 0X15, 0X70, 0X12, 0XE1 }, /* -9.5db */ + { 0X16, 0XB5, 0X43, 0X37 }, /* -9.0db */ + { 0X18, 0X0D, 0XB8, 0X54 }, /* -8.5db */ + { 0X19, 0X7A, 0X96, 0X7F }, /* -8.0db */ + { 0X1A, 0XFD, 0X13, 0X54 }, /* -7.5db */ + { 0X1C, 0X96, 0X76, 0XC6 }, /* -7.0db */ + { 0X1E, 0X48, 0X1C, 0X37 }, /* -6.5db */ + { 0X20, 0X13, 0X73, 0X9E }, /* -6.0db */ + { 0X21, 0XFA, 0X02, 0XBF }, /* -5.5db */ + { 0X23, 0XFD, 0X66, 0X78 }, /* -5.0db */ + { 0X26, 0X1F, 0X54, 0X1C }, /* -4.5db */ + { 0X28, 0X61, 0X9A, 0XE9 }, /* -4.0db */ + { 0X2A, 0XC6, 0X25, 0X91 }, /* -3.5db */ + { 0X2D, 0X4E, 0XFB, 0XD5 }, /* -3.0db */ + { 0X2F, 0XFE, 0X44, 0X48 }, /* -2.5db */ + { 0X32, 0XD6, 0X46, 0X17 }, /* -2.0db */ + { 0X35, 0XD9, 0X6B, 0X02 }, /* -1.5db */ + { 0X39, 0X0A, 0X41, 0X5F }, /* -1.0db */ + { 0X3C, 0X6B, 0X7E, 0X4F }, /* -0.5db */ + { 0X40, 0X00, 0X00, 0X00 }, /* 0.0db */ + { 0X43, 0XCA, 0XD0, 0X22 }, /* 0.5db */ + { 0X47, 0XCF, 0X26, 0X7D }, /* 1.0db */ + { 0X4C, 0X10, 0X6B, 0XA5 }, /* 1.5db */ + { 0X50, 0X92, 0X3B, 0XE3 }, /* 2.0db */ + { 0X55, 0X58, 0X6A, 0X46 }, /* 2.5db */ + { 0X5A, 0X67, 0X03, 0XDF }, /* 3.0db */ + { 0X5F, 0XC2, 0X53, 0X32 }, /* 3.5db */ + { 0X65, 0X6E, 0XE3, 0XDB }, /* 4.0db */ + { 0X6B, 0X71, 0X86, 0X68 }, /* 4.5db */ + { 0X71, 0XCF, 0X54, 0X71 }, /* 5.0db */ + { 0X78, 0X8D, 0XB4, 0XE9 }, /* 5.5db */ + { 0X7F, 0XFF, 0XFF, 0XFF }, /* 6.0db */ +}; +#endif diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h index 00fd4d449ff3..d87263e43fdb 100644 --- a/include/sound/tas2781-tlv.h +++ b/include/sound/tas2781-tlv.h @@ -17,265 +17,5 @@ static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0); static const __maybe_unused DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); -static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1); -/* pow(10, db/20) * pow(2,30) */ -static const __maybe_unused unsigned char tas2563_dvc_table[][4] = { - { 0X00, 0X00, 0X00, 0X00 }, /* -121.5db */ - { 0X00, 0X00, 0X03, 0XBC }, /* -121.0db */ - { 0X00, 0X00, 0X03, 0XF5 }, /* -120.5db */ - { 0X00, 0X00, 0X04, 0X31 }, /* -120.0db */ - { 0X00, 0X00, 0X04, 0X71 }, /* -119.5db */ - { 0X00, 0X00, 0X04, 0XB4 }, /* -119.0db */ - { 0X00, 0X00, 0X04, 0XFC }, /* -118.5db */ - { 0X00, 0X00, 0X05, 0X47 }, /* -118.0db */ - { 0X00, 0X00, 0X05, 0X97 }, /* -117.5db */ - { 0X00, 0X00, 0X05, 0XEC }, /* -117.0db */ - { 0X00, 0X00, 0X06, 0X46 }, /* -116.5db */ - { 0X00, 0X00, 0X06, 0XA5 }, /* -116.0db */ - { 0X00, 0X00, 0X07, 0X0A }, /* -115.5db */ - { 0X00, 0X00, 0X07, 0X75 }, /* -115.0db */ - { 0X00, 0X00, 0X07, 0XE6 }, /* -114.5db */ - { 0X00, 0X00, 0X08, 0X5E }, /* -114.0db */ - { 0X00, 0X00, 0X08, 0XDD }, /* -113.5db */ - { 0X00, 0X00, 0X09, 0X63 }, /* -113.0db */ - { 0X00, 0X00, 0X09, 0XF2 }, /* -112.5db */ - { 0X00, 0X00, 0X0A, 0X89 }, /* -112.0db */ - { 0X00, 0X00, 0X0B, 0X28 }, /* -111.5db */ - { 0X00, 0X00, 0X0B, 0XD2 }, /* -111.0db */ - { 0X00, 0X00, 0X0C, 0X85 }, /* -110.5db */ - { 0X00, 0X00, 0X0D, 0X43 }, /* -110.0db */ - { 0X00, 0X00, 0X0E, 0X0C }, /* -109.5db */ - { 0X00, 0X00, 0X0E, 0XE1 }, /* -109.0db */ - { 0X00, 0X00, 0X0F, 0XC3 }, /* -108.5db */ - { 0X00, 0X00, 0X10, 0XB2 }, /* -108.0db */ - { 0X00, 0X00, 0X11, 0XAF }, /* -107.5db */ - { 0X00, 0X00, 0X12, 0XBC }, /* -107.0db */ - { 0X00, 0X00, 0X13, 0XD8 }, /* -106.5db */ - { 0X00, 0X00, 0X15, 0X05 }, /* -106.0db */ - { 0X00, 0X00, 0X16, 0X44 }, /* -105.5db */ - { 0X00, 0X00, 0X17, 0X96 }, /* -105.0db */ - { 0X00, 0X00, 0X18, 0XFB }, /* -104.5db */ - { 0X00, 0X00, 0X1A, 0X76 }, /* -104.0db */ - { 0X00, 0X00, 0X1C, 0X08 }, /* -103.5db */ - { 0X00, 0X00, 0X1D, 0XB1 }, /* -103.0db */ - { 0X00, 0X00, 0X1F, 0X73 }, /* -102.5db */ - { 0X00, 0X00, 0X21, 0X51 }, /* -102.0db */ - { 0X00, 0X00, 0X23, 0X4A }, /* -101.5db */ - { 0X00, 0X00, 0X25, 0X61 }, /* -101.0db */ - { 0X00, 0X00, 0X27, 0X98 }, /* -100.5db */ - { 0X00, 0X00, 0X29, 0XF1 }, /* -100.0db */ - { 0X00, 0X00, 0X2C, 0X6D }, /* -99.5db */ - { 0X00, 0X00, 0X2F, 0X0F }, /* -99.0db */ - { 0X00, 0X00, 0X31, 0XD9 }, /* -98.5db */ - { 0X00, 0X00, 0X34, 0XCD }, /* -98.0db */ - { 0X00, 0X00, 0X37, 0XEE }, /* -97.5db */ - { 0X00, 0X00, 0X3B, 0X3F }, /* -97.0db */ - { 0X00, 0X00, 0X3E, 0XC1 }, /* -96.5db */ - { 0X00, 0X00, 0X42, 0X79 }, /* -96.0db */ - { 0X00, 0X00, 0X46, 0X6A }, /* -95.5db */ - { 0X00, 0X00, 0X4A, 0X96 }, /* -95.0db */ - { 0X00, 0X00, 0X4F, 0X01 }, /* -94.5db */ - { 0X00, 0X00, 0X53, 0XAF }, /* -94.0db */ - { 0X00, 0X00, 0X58, 0XA5 }, /* -93.5db */ - { 0X00, 0X00, 0X5D, 0XE6 }, /* -93.0db */ - { 0X00, 0X00, 0X63, 0X76 }, /* -92.5db */ - { 0X00, 0X00, 0X69, 0X5B }, /* -92.0db */ - { 0X00, 0X00, 0X6F, 0X99 }, /* -91.5db */ - { 0X00, 0X00, 0X76, 0X36 }, /* -91.0db */ - { 0X00, 0X00, 0X7D, 0X37 }, /* -90.5db */ - { 0X00, 0X00, 0X84, 0XA2 }, /* -90.0db */ - { 0X00, 0X00, 0X8C, 0X7E }, /* -89.5db */ - { 0X00, 0X00, 0X94, 0XD1 }, /* -89.0db */ - { 0X00, 0X00, 0X9D, 0XA3 }, /* -88.5db */ - { 0X00, 0X00, 0XA6, 0XFA }, /* -88.0db */ - { 0X00, 0X00, 0XB0, 0XDF }, /* -87.5db */ - { 0X00, 0X00, 0XBB, 0X5A }, /* -87.0db */ - { 0X00, 0X00, 0XC6, 0X74 }, /* -86.5db */ - { 0X00, 0X00, 0XD2, 0X36 }, /* -86.0db */ - { 0X00, 0X00, 0XDE, 0XAB }, /* -85.5db */ - { 0X00, 0X00, 0XEB, 0XDC }, /* -85.0db */ - { 0X00, 0X00, 0XF9, 0XD6 }, /* -84.5db */ - { 0X00, 0X01, 0X08, 0XA4 }, /* -84.0db */ - { 0X00, 0X01, 0X18, 0X52 }, /* -83.5db */ - { 0X00, 0X01, 0X28, 0XEF }, /* -83.0db */ - { 0X00, 0X01, 0X3A, 0X87 }, /* -82.5db */ - { 0X00, 0X01, 0X4D, 0X2A }, /* -82.0db */ - { 0X00, 0X01, 0X60, 0XE8 }, /* -81.5db */ - { 0X00, 0X01, 0X75, 0XD1 }, /* -81.0db */ - { 0X00, 0X01, 0X8B, 0XF7 }, /* -80.5db */ - { 0X00, 0X01, 0XA3, 0X6E }, /* -80.0db */ - { 0X00, 0X01, 0XBC, 0X48 }, /* -79.5db */ - { 0X00, 0X01, 0XD6, 0X9B }, /* -79.0db */ - { 0X00, 0X01, 0XF2, 0X7E }, /* -78.5db */ - { 0X00, 0X02, 0X10, 0X08 }, /* -78.0db */ - { 0X00, 0X02, 0X2F, 0X51 }, /* -77.5db */ - { 0X00, 0X02, 0X50, 0X76 }, /* -77.0db */ - { 0X00, 0X02, 0X73, 0X91 }, /* -76.5db */ - { 0X00, 0X02, 0X98, 0XC0 }, /* -76.0db */ - { 0X00, 0X02, 0XC0, 0X24 }, /* -75.5db */ - { 0X00, 0X02, 0XE9, 0XDD }, /* -75.0db */ - { 0X00, 0X03, 0X16, 0X0F }, /* -74.5db */ - { 0X00, 0X03, 0X44, 0XDF }, /* -74.0db */ - { 0X00, 0X03, 0X76, 0X76 }, /* -73.5db */ - { 0X00, 0X03, 0XAA, 0XFC }, /* -73.0db */ - { 0X00, 0X03, 0XE2, 0XA0 }, /* -72.5db */ - { 0X00, 0X04, 0X1D, 0X8F }, /* -72.0db */ - { 0X00, 0X04, 0X5B, 0XFD }, /* -71.5db */ - { 0X00, 0X04, 0X9E, 0X1D }, /* -71.0db */ - { 0X00, 0X04, 0XE4, 0X29 }, /* -70.5db */ - { 0X00, 0X05, 0X2E, 0X5A }, /* -70.0db */ - { 0X00, 0X05, 0X7C, 0XF2 }, /* -69.5db */ - { 0X00, 0X05, 0XD0, 0X31 }, /* -69.0db */ - { 0X00, 0X06, 0X28, 0X60 }, /* -68.5db */ - { 0X00, 0X06, 0X85, 0XC8 }, /* -68.0db */ - { 0X00, 0X06, 0XE8, 0XB9 }, /* -67.5db */ - { 0X00, 0X07, 0X51, 0X86 }, /* -67.0db */ - { 0X00, 0X07, 0XC0, 0X8A }, /* -66.5db */ - { 0X00, 0X08, 0X36, 0X21 }, /* -66.0db */ - { 0X00, 0X08, 0XB2, 0XB0 }, /* -65.5db */ - { 0X00, 0X09, 0X36, 0XA1 }, /* -65.0db */ - { 0X00, 0X09, 0XC2, 0X63 }, /* -64.5db */ - { 0X00, 0X0A, 0X56, 0X6D }, /* -64.0db */ - { 0X00, 0X0A, 0XF3, 0X3C }, /* -63.5db */ - { 0X00, 0X0B, 0X99, 0X56 }, /* -63.0db */ - { 0X00, 0X0C, 0X49, 0X48 }, /* -62.5db */ - { 0X00, 0X0D, 0X03, 0XA7 }, /* -62.0db */ - { 0X00, 0X0D, 0XC9, 0X11 }, /* -61.5db */ - { 0X00, 0X0E, 0X9A, 0X2D }, /* -61.0db */ - { 0X00, 0X0F, 0X77, 0XAD }, /* -60.5db */ - { 0X00, 0X10, 0X62, 0X4D }, /* -60.0db */ - { 0X00, 0X11, 0X5A, 0XD5 }, /* -59.5db */ - { 0X00, 0X12, 0X62, 0X16 }, /* -59.0db */ - { 0X00, 0X13, 0X78, 0XF0 }, /* -58.5db */ - { 0X00, 0X14, 0XA0, 0X50 }, /* -58.0db */ - { 0X00, 0X15, 0XD9, 0X31 }, /* -57.5db */ - { 0X00, 0X17, 0X24, 0X9C }, /* -57.0db */ - { 0X00, 0X18, 0X83, 0XAA }, /* -56.5db */ - { 0X00, 0X19, 0XF7, 0X86 }, /* -56.0db */ - { 0X00, 0X1B, 0X81, 0X6A }, /* -55.5db */ - { 0X00, 0X1D, 0X22, 0XA4 }, /* -55.0db */ - { 0X00, 0X1E, 0XDC, 0X98 }, /* -54.5db */ - { 0X00, 0X20, 0XB0, 0XBC }, /* -54.0db */ - { 0X00, 0X22, 0XA0, 0X9D }, /* -53.5db */ - { 0X00, 0X24, 0XAD, 0XE0 }, /* -53.0db */ - { 0X00, 0X26, 0XDA, 0X43 }, /* -52.5db */ - { 0X00, 0X29, 0X27, 0X9D }, /* -52.0db */ - { 0X00, 0X2B, 0X97, 0XE3 }, /* -51.5db */ - { 0X00, 0X2E, 0X2D, 0X27 }, /* -51.0db */ - { 0X00, 0X30, 0XE9, 0X9A }, /* -50.5db */ - { 0X00, 0X33, 0XCF, 0X8D }, /* -50.0db */ - { 0X00, 0X36, 0XE1, 0X78 }, /* -49.5db */ - { 0X00, 0X3A, 0X21, 0XF3 }, /* -49.0db */ - { 0X00, 0X3D, 0X93, 0XC3 }, /* -48.5db */ - { 0X00, 0X41, 0X39, 0XD3 }, /* -48.0db */ - { 0X00, 0X45, 0X17, 0X3B }, /* -47.5db */ - { 0X00, 0X49, 0X2F, 0X44 }, /* -47.0db */ - { 0X00, 0X4D, 0X85, 0X66 }, /* -46.5db */ - { 0X00, 0X52, 0X1D, 0X50 }, /* -46.0db */ - { 0X00, 0X56, 0XFA, 0XE8 }, /* -45.5db */ - { 0X00, 0X5C, 0X22, 0X4E }, /* -45.0db */ - { 0X00, 0X61, 0X97, 0XE1 }, /* -44.5db */ - { 0X00, 0X67, 0X60, 0X44 }, /* -44.0db */ - { 0X00, 0X6D, 0X80, 0X60 }, /* -43.5db */ - { 0X00, 0X73, 0XFD, 0X65 }, /* -43.0db */ - { 0X00, 0X7A, 0XDC, 0XD7 }, /* -42.5db */ - { 0X00, 0X82, 0X24, 0X8A }, /* -42.0db */ - { 0X00, 0X89, 0XDA, 0XAB }, /* -41.5db */ - { 0X00, 0X92, 0X05, 0XC6 }, /* -41.0db */ - { 0X00, 0X9A, 0XAC, 0XC8 }, /* -40.5db */ - { 0X00, 0XA3, 0XD7, 0X0A }, /* -40.0db */ - { 0X00, 0XAD, 0X8C, 0X52 }, /* -39.5db */ - { 0X00, 0XB7, 0XD4, 0XDD }, /* -39.0db */ - { 0X00, 0XC2, 0XB9, 0X65 }, /* -38.5db */ - { 0X00, 0XCE, 0X43, 0X28 }, /* -38.0db */ - { 0X00, 0XDA, 0X7B, 0XF1 }, /* -37.5db */ - { 0X00, 0XE7, 0X6E, 0X1E }, /* -37.0db */ - { 0X00, 0XF5, 0X24, 0XAC }, /* -36.5db */ - { 0X01, 0X03, 0XAB, 0X3D }, /* -36.0db */ - { 0X01, 0X13, 0X0E, 0X24 }, /* -35.5db */ - { 0X01, 0X23, 0X5A, 0X71 }, /* -35.0db */ - { 0X01, 0X34, 0X9D, 0XF8 }, /* -34.5db */ - { 0X01, 0X46, 0XE7, 0X5D }, /* -34.0db */ - { 0X01, 0X5A, 0X46, 0X27 }, /* -33.5db */ - { 0X01, 0X6E, 0XCA, 0XC5 }, /* -33.0db */ - { 0X01, 0X84, 0X86, 0X9F }, /* -32.5db */ - { 0X01, 0X9B, 0X8C, 0X27 }, /* -32.0db */ - { 0X01, 0XB3, 0XEE, 0XE5 }, /* -31.5db */ - { 0X01, 0XCD, 0XC3, 0X8C }, /* -31.0db */ - { 0X01, 0XE9, 0X20, 0X05 }, /* -30.5db */ - { 0X02, 0X06, 0X1B, 0X89 }, /* -30.0db */ - { 0X02, 0X24, 0XCE, 0XB0 }, /* -29.5db */ - { 0X02, 0X45, 0X53, 0X85 }, /* -29.0db */ - { 0X02, 0X67, 0XC5, 0XA2 }, /* -28.5db */ - { 0X02, 0X8C, 0X42, 0X3F }, /* -28.0db */ - { 0X02, 0XB2, 0XE8, 0X55 }, /* -27.5db */ - { 0X02, 0XDB, 0XD8, 0XAD }, /* -27.0db */ - { 0X03, 0X07, 0X36, 0X05 }, /* -26.5db */ - { 0X03, 0X35, 0X25, 0X29 }, /* -26.0db */ - { 0X03, 0X65, 0XCD, 0X13 }, /* -25.5db */ - { 0X03, 0X99, 0X57, 0X0C }, /* -25.0db */ - { 0X03, 0XCF, 0XEE, 0XCF }, /* -24.5db */ - { 0X04, 0X09, 0XC2, 0XB0 }, /* -24.0db */ - { 0X04, 0X47, 0X03, 0XC1 }, /* -23.5db */ - { 0X04, 0X87, 0XE5, 0XFB }, /* -23.0db */ - { 0X04, 0XCC, 0XA0, 0X6D }, /* -22.5db */ - { 0X05, 0X15, 0X6D, 0X68 }, /* -22.0db */ - { 0X05, 0X62, 0X8A, 0XB3 }, /* -21.5db */ - { 0X05, 0XB4, 0X39, 0XBC }, /* -21.0db */ - { 0X06, 0X0A, 0XBF, 0XD4 }, /* -20.5db */ - { 0X06, 0X66, 0X66, 0X66 }, /* -20.0db */ - { 0X06, 0XC7, 0X7B, 0X36 }, /* -19.5db */ - { 0X07, 0X2E, 0X50, 0XA6 }, /* -19.0db */ - { 0X07, 0X9B, 0X3D, 0XF6 }, /* -18.5db */ - { 0X08, 0X0E, 0X9F, 0X96 }, /* -18.0db */ - { 0X08, 0X88, 0XD7, 0X6D }, /* -17.5db */ - { 0X09, 0X0A, 0X4D, 0X2F }, /* -17.0db */ - { 0X09, 0X93, 0X6E, 0XB8 }, /* -16.5db */ - { 0X0A, 0X24, 0XB0, 0X62 }, /* -16.0db */ - { 0X0A, 0XBE, 0X8D, 0X70 }, /* -15.5db */ - { 0X0B, 0X61, 0X88, 0X71 }, /* -15.0db */ - { 0X0C, 0X0E, 0X2B, 0XB0 }, /* -14.5db */ - { 0X0C, 0XC5, 0X09, 0XAB }, /* -14.0db */ - { 0X0D, 0X86, 0XBD, 0X8D }, /* -13.5db */ - { 0X0E, 0X53, 0XEB, 0XB3 }, /* -13.0db */ - { 0X0F, 0X2D, 0X42, 0X38 }, /* -12.5db */ - { 0X10, 0X13, 0X79, 0X87 }, /* -12.0db */ - { 0X11, 0X07, 0X54, 0XF9 }, /* -11.5db */ - { 0X12, 0X09, 0XA3, 0X7A }, /* -11.0db */ - { 0X13, 0X1B, 0X40, 0X39 }, /* -10.5db */ - { 0X14, 0X3D, 0X13, 0X62 }, /* -10.0db */ - { 0X15, 0X70, 0X12, 0XE1 }, /* -9.5db */ - { 0X16, 0XB5, 0X43, 0X37 }, /* -9.0db */ - { 0X18, 0X0D, 0XB8, 0X54 }, /* -8.5db */ - { 0X19, 0X7A, 0X96, 0X7F }, /* -8.0db */ - { 0X1A, 0XFD, 0X13, 0X54 }, /* -7.5db */ - { 0X1C, 0X96, 0X76, 0XC6 }, /* -7.0db */ - { 0X1E, 0X48, 0X1C, 0X37 }, /* -6.5db */ - { 0X20, 0X13, 0X73, 0X9E }, /* -6.0db */ - { 0X21, 0XFA, 0X02, 0XBF }, /* -5.5db */ - { 0X23, 0XFD, 0X66, 0X78 }, /* -5.0db */ - { 0X26, 0X1F, 0X54, 0X1C }, /* -4.5db */ - { 0X28, 0X61, 0X9A, 0XE9 }, /* -4.0db */ - { 0X2A, 0XC6, 0X25, 0X91 }, /* -3.5db */ - { 0X2D, 0X4E, 0XFB, 0XD5 }, /* -3.0db */ - { 0X2F, 0XFE, 0X44, 0X48 }, /* -2.5db */ - { 0X32, 0XD6, 0X46, 0X17 }, /* -2.0db */ - { 0X35, 0XD9, 0X6B, 0X02 }, /* -1.5db */ - { 0X39, 0X0A, 0X41, 0X5F }, /* -1.0db */ - { 0X3C, 0X6B, 0X7E, 0X4F }, /* -0.5db */ - { 0X40, 0X00, 0X00, 0X00 }, /* 0.0db */ - { 0X43, 0XCA, 0XD0, 0X22 }, /* 0.5db */ - { 0X47, 0XCF, 0X26, 0X7D }, /* 1.0db */ - { 0X4C, 0X10, 0X6B, 0XA5 }, /* 1.5db */ - { 0X50, 0X92, 0X3B, 0XE3 }, /* 2.0db */ - { 0X55, 0X58, 0X6A, 0X46 }, /* 2.5db */ - { 0X5A, 0X67, 0X03, 0XDF }, /* 3.0db */ - { 0X5F, 0XC2, 0X53, 0X32 }, /* 3.5db */ - { 0X65, 0X6E, 0XE3, 0XDB }, /* 4.0db */ - { 0X6B, 0X71, 0X86, 0X68 }, /* 4.5db */ - { 0X71, 0XCF, 0X54, 0X71 }, /* 5.0db */ - { 0X78, 0X8D, 0XB4, 0XE9 }, /* 5.5db */ - { 0XFF, 0XFF, 0XFF, 0XFF }, /* 6.0db */ -}; #endif -- cgit v1.2.3 From 54233a4254036efca91b9bffbd398ecf39e90555 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 30 Jul 2024 17:30:40 +0200 Subject: uretprobe: change syscall number, again Despite multiple attempts to get the syscall number assignment right for the newly added uretprobe syscall, we ended up with a bit of a mess: - The number is defined as 467 based on the assumption that the xattrat family of syscalls would use 463 through 466, but those did not make it into 6.11. - The include/uapi/asm-generic/unistd.h file still lists the number 463, but the new scripts/syscall.tbl that was supposed to have the same data lists 467 instead as the number for arc, arm64, csky, hexagon, loongarch, nios2, openrisc and riscv. None of these architectures actually provide a uretprobe syscall. - All the other architectures (powerpc, arm, mips, ...) don't list this syscall at all. There are two ways to make it consistent again: either list it with the same syscall number on all architectures, or only list it on x86 but not in scripts/syscall.tbl and asm-generic/unistd.h. Based on the most recent discussion, it seems like we won't need it anywhere else, so just remove the inconsistent assignment and instead move the x86 number to the next available one in the architecture specific range, which is 335. Fixes: 5c28424e9a34 ("syscalls: Fix to add sys_uretprobe to syscall.tbl") Fixes: 190fec72df4a ("uprobe: Wire up uretprobe system call") Fixes: 63ded110979b ("uprobe: Change uretprobe syscall scope and number") Acked-by: Masami Hiramatsu (Google) Reviewed-by: Jiri Olsa Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/unistd.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 985a262d0f9e..5bf6148cac2b 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -841,11 +841,8 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) #define __NR_mseal 462 __SYSCALL(__NR_mseal, sys_mseal) -#define __NR_uretprobe 463 -__SYSCALL(__NR_uretprobe, sys_uretprobe) - #undef __NR_syscalls -#define __NR_syscalls 464 +#define __NR_syscalls 463 /* * 32 bit systems traditionally used different -- cgit v1.2.3 From 37c6dccd683797a5a4ec85d2e94939bf829d3499 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Sun, 28 Jul 2024 20:26:59 +0100 Subject: cpufreq: Remove LATENCY_MULTIPLIER The current LATENCY_MULTIPLIER which has been around for nearly 20 years causes rate_limit_us to be always in ms range. On M1 mac mini I get 50 and 56us transition latency, but due to the 1000 multiplier we end up setting rate_limit_us to 50 and 56ms, which gets capped into 2ms and was 10ms before e13aa799c2a6 ("cpufreq: Change default transition delay to 2ms") On Intel I5 system transition latency is 20us but due to the multiplier we end up with 20ms that again is capped to 2ms. Given how good modern hardware and how modern workloads require systems to be more responsive to cater for sudden changes in workload (tasks sleeping/wakeup/migrating, uclamp causing a sudden boost or cap) and that 2ms is quarter of the time of 120Hz refresh rate system, drop the old logic in favour of providing 50% headroom. rate_limit_us = 1.5 * latency. I considered not adding any headroom which could mean that we can end up with infinite back-to-back requests. I also considered providing a constant headroom (e.g: 100us) assuming that any h/w or f/w dealing with the request shouldn't require a large headroom when transition_latency is actually high. But for both cases I wasn't sure if h/w or f/w can end up being overwhelmed dealing with the freq requests in a potentially busy system. So I opted for providing 50% breathing room. This is expected to impact schedutil only as the other user, dbs_governor, takes the max(2*tick, transition_delay_us) and the former was at least 2ms on 1ms TICK, which is equivalent to the max_delay_us before applying this patch. For systems with TICK of 4ms, this value would have almost always ended up with 8ms sampling rate. For systems that report 0 transition latency, we still default to returning 1ms as transition delay. This helps in eliminating a source of latency for applying requests as mentioned in [1]. For example if we have a 1ms tick, most systems will miss sending an update at tick when updating the util_avg for a task/CPU (rate_limit_us will be 2ms for most systems). Link: https://lore.kernel.org/lkml/20240724212255.mfr2ybiv2j2uqek7@airbuntu/ # [1] Link: https://lore.kernel.org/lkml/20240205022500.2232124-1-qyousef@layalina.io/ Signed-off-by: Qais Yousef Link: https://patch.msgid.link/20240728192659.58115-1-qyousef@layalina.io [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d4d2f4d1d7cb..e0e19d9c1323 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -577,12 +577,6 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, #define CPUFREQ_POLICY_POWERSAVE (1) #define CPUFREQ_POLICY_PERFORMANCE (2) -/* - * The polling frequency depends on the capability of the processor. Default - * polling frequency is 1000 times the transition latency of the processor. - */ -#define LATENCY_MULTIPLIER (1000) - struct cpufreq_governor { char name[CPUFREQ_NAME_LEN]; int (*init)(struct cpufreq_policy *policy); -- cgit v1.2.3 From 52831d9bbc9aa029aed525dfbe61cb78c1db991c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 9 Jul 2024 22:37:25 +0200 Subject: ACPI: sysfs: evaluate _STR on each sysfs access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The handling of the _STR method is inconsistent with the other method evaluations. It is the only method which is cached. The cached value stored in 'struct acpi_device_pnp' has a different lifetime than the other struct members. Commit d1efe3c324ea ("ACPI: Add new sysfs interface to export device description") does not explain this difference. Evaluating the method every time also removes the necessity to manage the lifetime of the cached value, which would be a problem when managing the sysfs attributes through the device core. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20240709-acpi-sysfs-groups-v2-2-058ab0667fa8@weissschuh.net Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 8db5bd382915..d0ec808e2c42 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -255,7 +255,6 @@ struct acpi_device_pnp { struct list_head ids; /* _HID and _CIDs */ acpi_device_name device_name; /* Driver-determined */ acpi_device_class device_class; /* " */ - union acpi_object *str_obj; /* unicode string for _STR method */ }; #define acpi_device_bid(d) ((d)->pnp.bus_id) -- cgit v1.2.3 From b6b242d019ed23195c81cf00eb8290d386efb83f Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Fri, 2 Aug 2024 10:59:45 -0400 Subject: Revert "drm: Introduce 'power saving policy' drm property" This reverts commit 76299a557f36d624ca32500173ad7856e1ad93c0. It was merged without meeting userspace requirements. Signed-off-by: Hamza Mahfooz Reviewed-by: Harry Wentland Link: https://patchwork.freedesktop.org/patch/msgid/20240802145946.48073-1-hamza.mahfooz@amd.com --- include/drm/drm_connector.h | 2 -- include/drm/drm_mode_config.h | 5 ----- include/uapi/drm/drm_mode.h | 7 ------- 3 files changed, 14 deletions(-) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 5ad735253413..e3fa43291f44 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -2267,8 +2267,6 @@ int drm_mode_create_dp_colorspace_property(struct drm_connector *connector, u32 supported_colorspaces); int drm_mode_create_content_type_property(struct drm_device *dev); int drm_mode_create_suggested_offset_properties(struct drm_device *dev); -int drm_mode_create_power_saving_policy_property(struct drm_device *dev, - uint64_t supported_policies); int drm_connector_set_path_property(struct drm_connector *connector, const char *path); diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 150f9a3b649f..ab0f167474b1 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -969,11 +969,6 @@ struct drm_mode_config { */ struct drm_atomic_state *suspend_state; - /** - * @power_saving_policy: bitmask for power saving policy requests. - */ - struct drm_property *power_saving_policy; - const struct drm_mode_config_helper_funcs *helper_private; }; diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 880303c2ad97..d390011b89b4 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -152,13 +152,6 @@ extern "C" { #define DRM_MODE_SCALE_CENTER 2 /* Centered, no scaling */ #define DRM_MODE_SCALE_ASPECT 3 /* Full screen, preserve aspect */ -/* power saving policy options */ -#define DRM_MODE_REQUIRE_COLOR_ACCURACY BIT(0) /* Compositor requires color accuracy */ -#define DRM_MODE_REQUIRE_LOW_LATENCY BIT(1) /* Compositor requires low latency */ - -#define DRM_MODE_POWER_SAVING_POLICY_ALL (DRM_MODE_REQUIRE_COLOR_ACCURACY |\ - DRM_MODE_REQUIRE_LOW_LATENCY) - /* Dithering mode options */ #define DRM_MODE_DITHERING_OFF 0 #define DRM_MODE_DITHERING_ON 1 -- cgit v1.2.3 From 70e6b7d9ae3c63df90a7bba7700e8d5c300c3c60 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 2 Aug 2024 14:55:54 +0100 Subject: x86/i8253: Disable PIT timer 0 when not in use Leaving the PIT interrupt running can cause noticeable steal time for virtual guests. The VMM generally has a timer which toggles the IRQ input to the PIC and I/O APIC, which takes CPU time away from the guest. Even on real hardware, running the counter may use power needlessly (albeit not much). Make sure it's turned off if it isn't going to be used. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Link: https://lore.kernel.org/all/20240802135555.564941-1-dwmw2@infradead.org --- include/linux/i8253.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/i8253.h b/include/linux/i8253.h index 8336b2f6f834..bf169cfef7f1 100644 --- a/include/linux/i8253.h +++ b/include/linux/i8253.h @@ -24,6 +24,7 @@ extern raw_spinlock_t i8253_lock; extern bool i8253_clear_counter_on_shutdown; extern struct clock_event_device i8253_clockevent; extern void clockevent_i8253_init(bool oneshot); +extern void clockevent_i8253_disable(void); extern void setup_pit_timer(void); -- cgit v1.2.3 From 531b2ca0a940ac9db03f246c8b77c4201de72b00 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 2 Aug 2024 14:55:55 +0100 Subject: clockevents/drivers/i8253: Fix stop sequence for timer 0 According to the data sheet, writing the MODE register should stop the counter (and thus the interrupts). This appears to work on real hardware, at least modern Intel and AMD systems. It should also work on Hyper-V. However, on some buggy virtual machines the mode change doesn't have any effect until the counter is subsequently loaded (or perhaps when the IRQ next fires). So, set MODE 0 and then load the counter, to ensure that those buggy VMs do the right thing and the interrupts stop. And then write MODE 0 *again* to stop the counter on compliant implementations too. Apparently, Hyper-V keeps firing the IRQ *repeatedly* even in mode zero when it should only happen once, but the second MODE write stops that too. Userspace test program (mostly written by tglx): ===== #include #include #include #include #include static __always_inline void __out##bwl(type value, uint16_t port) \ { \ asm volatile("out" #bwl " %" #bw "0, %w1" \ : : "a"(value), "Nd"(port)); \ } \ \ static __always_inline type __in##bwl(uint16_t port) \ { \ type value; \ asm volatile("in" #bwl " %w1, %" #bw "0" \ : "=a"(value) : "Nd"(port)); \ return value; \ } BUILDIO(b, b, uint8_t) #define inb __inb #define outb __outb #define PIT_MODE 0x43 #define PIT_CH0 0x40 #define PIT_CH2 0x42 static int is8254; static void dump_pit(void) { if (is8254) { // Latch and output counter and status outb(0xC2, PIT_MODE); printf("%02x %02x %02x\n", inb(PIT_CH0), inb(PIT_CH0), inb(PIT_CH0)); } else { // Latch and output counter outb(0x0, PIT_MODE); printf("%02x %02x\n", inb(PIT_CH0), inb(PIT_CH0)); } } int main(int argc, char* argv[]) { int nr_counts = 2; if (argc > 1) nr_counts = atoi(argv[1]); if (argc > 2) is8254 = 1; if (ioperm(0x40, 4, 1) != 0) return 1; dump_pit(); printf("Set oneshot\n"); outb(0x38, PIT_MODE); outb(0x00, PIT_CH0); outb(0x0F, PIT_CH0); dump_pit(); usleep(1000); dump_pit(); printf("Set periodic\n"); outb(0x34, PIT_MODE); outb(0x00, PIT_CH0); outb(0x0F, PIT_CH0); dump_pit(); usleep(1000); dump_pit(); dump_pit(); usleep(100000); dump_pit(); usleep(100000); dump_pit(); printf("Set stop (%d counter writes)\n", nr_counts); outb(0x30, PIT_MODE); while (nr_counts--) outb(0xFF, PIT_CH0); dump_pit(); usleep(100000); dump_pit(); usleep(100000); dump_pit(); printf("Set MODE 0\n"); outb(0x30, PIT_MODE); dump_pit(); usleep(100000); dump_pit(); usleep(100000); dump_pit(); return 0; } ===== Suggested-by: Sean Christopherson Co-developed-by: Li RongQing Signed-off-by: Li RongQing Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Link: https://lore.kernel.org/all/20240802135555.564941-2-dwmw2@infradead.org --- include/linux/i8253.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/i8253.h b/include/linux/i8253.h index bf169cfef7f1..56c280eb2d4f 100644 --- a/include/linux/i8253.h +++ b/include/linux/i8253.h @@ -21,7 +21,6 @@ #define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ) extern raw_spinlock_t i8253_lock; -extern bool i8253_clear_counter_on_shutdown; extern struct clock_event_device i8253_clockevent; extern void clockevent_i8253_init(bool oneshot); extern void clockevent_i8253_disable(void); -- cgit v1.2.3 From 90ec3a8a7fd0d43026fcca979713e077d4883b56 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 2 Aug 2024 16:22:13 +0100 Subject: spi: Add empty versions of ACPI functions Provide empty versions of acpi_spi_count_resources(), acpi_spi_device_alloc() and acpi_spi_find_controller_by_adev() if the real functions are not being built. This commit fixes two problems with the original definitions: 1) There wasn't an empty version of these functions 2) The #if only depended on CONFIG_ACPI. But the functions are implemented in the core spi.c so CONFIG_SPI_MASTER must also be enabled for the real functions to exist. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20240802152215.20831-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e4f3f3d30a03..d47d5f14ff99 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -902,12 +902,29 @@ extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); -#if IS_ENABLED(CONFIG_ACPI) +#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SPI_MASTER) extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); int acpi_spi_count_resources(struct acpi_device *adev); +#else +static inline struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev) +{ + return NULL; +} + +static inline struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, + struct acpi_device *adev, + int index) +{ + return ERR_PTR(-ENODEV); +} + +static inline int acpi_spi_count_resources(struct acpi_device *adev) +{ + return 0; +} #endif /* -- cgit v1.2.3 From e99129e5dbf7ca87233d31ad19348f6ce8627b38 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 1 Aug 2024 13:32:59 -1000 Subject: sched_ext: Allow p->scx.disallow only while loading From 1232da7eced620537a78f19c8cf3d4a3508e2419 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 31 Jul 2024 09:14:52 -1000 p->scx.disallow provides a way for the BPF scheduler to reject certain tasks from attaching. It's currently allowed for both the load and fork paths; however, the latter doesn't actually work as p->sched_class is already set by the time scx_ops_init_task() is called during fork. This is a convenience feature which is mostly useful from the load path anyway. Allow it only from the load path. v2: Trigger scx_ops_error() iff @p->policy == SCHED_EXT to make it a bit easier for the BPF scheduler (David). Signed-off-by: Tejun Heo Reported-by: "Zhangqiao (2012 lab)" Link: http://lkml.kernel.org/r/20240711110720.1285-1-zhangqiao22@huawei.com Fixes: 7bb6f0810ecf ("sched_ext: Allow BPF schedulers to disallow specific tasks from joining SCHED_EXT") Acked-by: David Vernet Signed-off-by: Tejun Heo --- include/linux/sched/ext.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 26e1c33bc844..69f68e2121a8 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -179,11 +179,12 @@ struct sched_ext_entity { * If set, reject future sched_setscheduler(2) calls updating the policy * to %SCHED_EXT with -%EACCES. * - * If set from ops.init_task() and the task's policy is already - * %SCHED_EXT, which can happen while the BPF scheduler is being loaded - * or by inhering the parent's policy during fork, the task's policy is - * rejected and forcefully reverted to %SCHED_NORMAL. The number of - * such events are reported through /sys/kernel/debug/sched_ext::nr_rejected. + * Can be set from ops.init_task() while the BPF scheduler is being + * loaded (!scx_init_task_args->fork). If set and the task's policy is + * already %SCHED_EXT, the task's policy is rejected and forcefully + * reverted to %SCHED_NORMAL. The number of such events are reported + * through /sys/kernel/debug/sched_ext::nr_rejected. Setting this flag + * during fork is not allowed. */ bool disallow; /* reject switching into SCX */ -- cgit v1.2.3 From 49675f5bdf9ae2624b430dafda4cb29024521625 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 1 Aug 2024 09:34:01 -0700 Subject: net: remove IFF_* re-definition We re-define values of enum netdev_priv_flags as preprocessor macros with the same name. I guess this was done to avoid breaking out of tree modules which may use #ifdef X for kernel compatibility? Commit 7aa98047df95 ("net: move net_device priv_flags out from UAPI") which added the enum doesn't say. In any case, the flags with defines are quite old now, and defines for new flags don't get added. OOT drivers have to resort to code greps for compat detection, anyway. Let's delete these defines, save LoC, help LXR link to the right place. Reviewed-by: Simon Horman Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20240801163401.378723-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 607009150b5f..0ef3eaa23f4b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1689,38 +1689,6 @@ enum netdev_priv_flags { IFF_SEE_ALL_HWTSTAMP_REQUESTS = BIT_ULL(33), }; -#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN -#define IFF_EBRIDGE IFF_EBRIDGE -#define IFF_BONDING IFF_BONDING -#define IFF_ISATAP IFF_ISATAP -#define IFF_WAN_HDLC IFF_WAN_HDLC -#define IFF_XMIT_DST_RELEASE IFF_XMIT_DST_RELEASE -#define IFF_DONT_BRIDGE IFF_DONT_BRIDGE -#define IFF_DISABLE_NETPOLL IFF_DISABLE_NETPOLL -#define IFF_MACVLAN_PORT IFF_MACVLAN_PORT -#define IFF_BRIDGE_PORT IFF_BRIDGE_PORT -#define IFF_OVS_DATAPATH IFF_OVS_DATAPATH -#define IFF_TX_SKB_SHARING IFF_TX_SKB_SHARING -#define IFF_UNICAST_FLT IFF_UNICAST_FLT -#define IFF_TEAM_PORT IFF_TEAM_PORT -#define IFF_SUPP_NOFCS IFF_SUPP_NOFCS -#define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE -#define IFF_MACVLAN IFF_MACVLAN -#define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM -#define IFF_L3MDEV_MASTER IFF_L3MDEV_MASTER -#define IFF_NO_QUEUE IFF_NO_QUEUE -#define IFF_OPENVSWITCH IFF_OPENVSWITCH -#define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE -#define IFF_TEAM IFF_TEAM -#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED -#define IFF_PHONY_HEADROOM IFF_PHONY_HEADROOM -#define IFF_MACSEC IFF_MACSEC -#define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER -#define IFF_FAILOVER IFF_FAILOVER -#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE -#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER -#define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR - /* Specifies the type of the struct net_device::ml_priv pointer */ enum netdev_ml_priv_type { ML_PRIV_NONE, -- cgit v1.2.3 From 7e1d512dab5042aa1c2224ed362be79e3f22a15e Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 1 Aug 2024 21:00:03 +0100 Subject: linkmode: Change return type of linkmode_andnot to bool linkmode_andnot() simply returns the result of bitmap_andnot(). And the return type of bitmap_andnot() is bool. So it makes sense for the return type of linkmode_andnot() to also be bool. I checked all call-sites and they either ignore the return value or treat it as a bool. Compile tested only. Link: https://lore.kernel.org/netdev/68088998-4486-4930-90a4-96a32f08c490@lunn.ch/ Signed-off-by: Simon Horman Link: https://patch.msgid.link/20240801-linkfield-bowl-v1-1-d58f68967802@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/linkmode.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index d94bfd9ac8cc..3b9de09871f6 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -37,8 +37,9 @@ static inline bool linkmode_empty(const unsigned long *src) return bitmap_empty(src, __ETHTOOL_LINK_MODE_MASK_NBITS); } -static inline int linkmode_andnot(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2) +static inline bool linkmode_andnot(unsigned long *dst, + const unsigned long *src1, + const unsigned long *src2) { return bitmap_andnot(dst, src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); } -- cgit v1.2.3 From f94074687d05aea10b50c4f4055a19d9d0c3bd27 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Thu, 1 Aug 2024 17:23:11 +0300 Subject: net: core: annotate socks of struct sock_reuseport with __counted_by According to '__reuseport_alloc()', annotate flexible array member 'sock' of 'struct sock_reuseport' with '__counted_by()' and use convenient 'struct_size()' to simplify the math used in 'kzalloc()'. Signed-off-by: Dmitry Antipov Reviewed-by: Gustavo A. R. Silva Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20240801142311.42837-1-dmantipov@yandex.ru Signed-off-by: Jakub Kicinski --- include/net/sock_reuseport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index 6ec140b0a61b..6e4faf3ee76f 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -26,7 +26,7 @@ struct sock_reuseport { unsigned int bind_inany:1; unsigned int has_conns:1; struct bpf_prog __rcu *prog; /* optional BPF sock selector */ - struct sock *socks[]; /* array of sock pointers */ + struct sock *socks[] __counted_by(max_socks); }; extern int reuseport_alloc(struct sock *sk, bool bind_inany); -- cgit v1.2.3 From 8585632a8600d26d4e6b7246d375a9c3a344f0d2 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Tue, 9 Jul 2024 13:14:28 +0200 Subject: iio: backend: remove unused parameter Indio_dev was not being used in iio_backend_extend_chan_spec() so remove it. Signed-off-by: Nuno Sa Link: https://patch.msgid.link/20240709-dev-iio-backend-add-debugfs-v1-1-fb4b8f2373c7@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/backend.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index 8099759d7242..4e81931703ab 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -142,8 +142,7 @@ ssize_t iio_backend_ext_info_set(struct iio_dev *indio_dev, uintptr_t private, ssize_t iio_backend_ext_info_get(struct iio_dev *indio_dev, uintptr_t private, const struct iio_chan_spec *chan, char *buf); -int iio_backend_extend_chan_spec(struct iio_dev *indio_dev, - struct iio_backend *back, +int iio_backend_extend_chan_spec(struct iio_backend *back, struct iio_chan_spec *chan); void *iio_backend_get_priv(const struct iio_backend *conv); struct iio_backend *devm_iio_backend_get(struct device *dev, const char *name); -- cgit v1.2.3 From b40cafc1143600122719ae335359722538ed06e3 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 11 Jul 2024 14:15:41 -0500 Subject: dt-bindings: iio: adc: add AD4695 and similar ADCs Add device tree bindings for AD4695 and similar ADCs. Reviewed-by: Conor Dooley Signed-off-by: David Lechner Link: https://patch.msgid.link/20240711-iio-adc-ad4695-v4-0-c31621113b57@baylibre.com Signed-off-by: Jonathan Cameron --- include/dt-bindings/iio/adi,ad4695.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 include/dt-bindings/iio/adi,ad4695.h (limited to 'include') diff --git a/include/dt-bindings/iio/adi,ad4695.h b/include/dt-bindings/iio/adi,ad4695.h new file mode 100644 index 000000000000..9fbef542bf67 --- /dev/null +++ b/include/dt-bindings/iio/adi,ad4695.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_ADI_AD4695_H +#define _DT_BINDINGS_ADI_AD4695_H + +#define AD4695_COMMON_MODE_REFGND 0xFF +#define AD4695_COMMON_MODE_COM 0xFE + +#endif /* _DT_BINDINGS_ADI_AD4695_H */ -- cgit v1.2.3 From 2477d7b179d3295c9978420027750f047976bd04 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 26 Jul 2024 15:18:40 -0500 Subject: iio: backend: spelling: continuous -> continuous This fixes the spelling in IIO_BACKEND_INTERNAL_CONTINUOUS_WAVE. Signed-off-by: David Lechner Link: https://patch.msgid.link/20240726-iio-backend-spelling-continuous-v1-1-467c6e3f78ff@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index 4e81931703ab..29c4cf0bd761 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -17,7 +17,7 @@ enum iio_backend_data_type { }; enum iio_backend_data_source { - IIO_BACKEND_INTERNAL_CONTINUOS_WAVE, + IIO_BACKEND_INTERNAL_CONTINUOUS_WAVE, IIO_BACKEND_EXTERNAL, IIO_BACKEND_DATA_SOURCE_MAX }; -- cgit v1.2.3 From f44e94afbb340be90100f8a172ebb14a9e717c8b Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Fri, 26 Jul 2024 10:23:15 +0200 Subject: iio: core: annotate masklength as __private Now that all users are using the proper accessors, we can mark masklength as __private so that no one tries to write. We also get help from checkers in warning us in case someone does it. To access the private field from IIO core code, we need to use the ACCESS_PRIVATE() macro. Signed-off-by: Nuno Sa Link: https://patch.msgid.link/20240726-dev-iio-masklength-private3-v1-23-82913fc0fb87@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index dd6bbc468283..f6c0499853bb 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -609,7 +609,7 @@ struct iio_dev { int scan_bytes; const unsigned long *available_scan_masks; - unsigned masklength; + unsigned __private masklength; const unsigned long *active_scan_mask; bool scan_timestamp; struct iio_trigger *trig; @@ -861,7 +861,7 @@ static inline const struct iio_scan_type */ static inline unsigned int iio_get_masklength(const struct iio_dev *indio_dev) { - return indio_dev->masklength; + return ACCESS_PRIVATE(indio_dev, masklength); } /** -- cgit v1.2.3 From d092b6869817208326005df0a656e3bb9724d89f Mon Sep 17 00:00:00 2001 From: Julien Stephan Date: Wed, 31 Jul 2024 09:05:43 +0200 Subject: iio: core: add function to retrieve active_scan_mask index Add a function to retrieve the index of the active scan mask inside the available scan masks array. As in iio_scan_mask_match and iio_sanity_check_avail_scan_masks, this function does not handle multi-long masks correctly. It only checks the first long to be zero, and will use such mask as a terminator even if there was bits set after the first long. This should be fine since the available_scan_mask has already been sanity tested using iio_sanity_check_avail_scan_masks. See iio_scan_mask_match and iio_sanity_check_avail_scan_masks for more details Signed-off-by: Julien Stephan Reviewed-by: David Lechner Link: https://patch.msgid.link/20240731-ad7380-add-single-ended-chips-v2-2-cd63bf05744c@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index f6c0499853bb..3a735a9a9eae 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -864,6 +864,8 @@ static inline unsigned int iio_get_masklength(const struct iio_dev *indio_dev) return ACCESS_PRIVATE(indio_dev, masklength); } +int iio_active_scan_mask_index(struct iio_dev *indio_dev); + /** * iio_for_each_active_channel - Iterated over active channels * @indio_dev: the IIO device -- cgit v1.2.3 From 2256f37e24b1979f9a97de0b76cabbf8544a8aff Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Fri, 2 Aug 2024 16:26:59 +0200 Subject: iio: backend: introduce struct iio_backend_info Instead of only passing the backend ops when calling devm_iio_backend_register(), pass an info like structure that will contains the ops and additional information. Fow now, the backend name is being added as that will be used by the debugFS interface introduced in a later patch. It also opens the door for further customizations passed by backends. All users of devm_iio_backend_register() were updated accordingly. Signed-off-by: Nuno Sa Link: https://patch.msgid.link/20240802-dev-iio-backend-add-debugfs-v2-1-4cb62852f0d0@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/backend.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index 29c4cf0bd761..f120fa2e0a43 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -115,6 +115,16 @@ struct iio_backend_ops { const struct iio_chan_spec *chan, char *buf); }; +/** + * struct iio_backend_info - info structure for an iio_backend + * @name: Backend name. + * @ops: Backend operations. + */ +struct iio_backend_info { + const char *name; + const struct iio_backend_ops *ops; +}; + int iio_backend_chan_enable(struct iio_backend *back, unsigned int chan); int iio_backend_chan_disable(struct iio_backend *back, unsigned int chan); int devm_iio_backend_enable(struct device *dev, struct iio_backend *back); @@ -151,6 +161,6 @@ __devm_iio_backend_get_from_fwnode_lookup(struct device *dev, struct fwnode_handle *fwnode); int devm_iio_backend_register(struct device *dev, - const struct iio_backend_ops *ops, void *priv); + const struct iio_backend_info *info, void *priv); #endif -- cgit v1.2.3 From cdf01e0809a4c6c7877ea52401c2a6679df7aed6 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Fri, 2 Aug 2024 16:27:00 +0200 Subject: iio: backend: add debugFs interface This adds a basic debugfs interface for backends. Two new ops are being added: * debugfs_reg_access: Analogous to the core IIO one but for backend devices. * debugfs_print_chan_status: One useful usecase for this one is for testing test tones in a digital interface and "ask" the backend to dump more details on why a test tone might have errors. Signed-off-by: Nuno Sa Link: https://patch.msgid.link/20240802-dev-iio-backend-add-debugfs-v2-2-4cb62852f0d0@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/backend.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index f120fa2e0a43..9d0dba7ab9e7 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -22,6 +22,8 @@ enum iio_backend_data_source { IIO_BACKEND_DATA_SOURCE_MAX }; +#define iio_backend_debugfs_ptr(ptr) PTR_IF(IS_ENABLED(CONFIG_DEBUG_FS), ptr) + /** * IIO_BACKEND_EX_INFO - Helper for an IIO extended channel attribute * @_name: Attribute name @@ -81,6 +83,8 @@ enum iio_backend_sample_trigger { * @extend_chan_spec: Extend an IIO channel. * @ext_info_set: Extended info setter. * @ext_info_get: Extended info getter. + * @debugfs_print_chan_status: Print channel status into a buffer. + * @debugfs_reg_access: Read or write register value of backend. **/ struct iio_backend_ops { int (*enable)(struct iio_backend *back); @@ -113,6 +117,11 @@ struct iio_backend_ops { const char *buf, size_t len); int (*ext_info_get)(struct iio_backend *back, uintptr_t private, const struct iio_chan_spec *chan, char *buf); + int (*debugfs_print_chan_status)(struct iio_backend *back, + unsigned int chan, char *buf, + size_t len); + int (*debugfs_reg_access)(struct iio_backend *back, unsigned int reg, + unsigned int writeval, unsigned int *readval); }; /** @@ -163,4 +172,9 @@ __devm_iio_backend_get_from_fwnode_lookup(struct device *dev, int devm_iio_backend_register(struct device *dev, const struct iio_backend_info *info, void *priv); +ssize_t iio_backend_debugfs_print_chan_status(struct iio_backend *back, + unsigned int chan, char *buf, + size_t len); +void iio_backend_debugfs_add(struct iio_backend *back, + struct iio_dev *indio_dev); #endif -- cgit v1.2.3 From b001635a7c3014fe7745a507840c5d8f58235c04 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Fri, 2 Aug 2024 16:27:01 +0200 Subject: iio: backend: add a modified prbs23 support Support ADI specific prb23 sequence that can be used both for calibrating or debugging digital interfaces. Signed-off-by: Nuno Sa Link: https://patch.msgid.link/20240802-dev-iio-backend-add-debugfs-v2-3-4cb62852f0d0@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/backend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index 9d0dba7ab9e7..2b9d1aa86552 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -56,6 +56,8 @@ enum iio_backend_test_pattern { IIO_BACKEND_NO_TEST_PATTERN, /* modified prbs9 */ IIO_BACKEND_ADI_PRBS_9A = 32, + /* modified prbs23 */ + IIO_BACKEND_ADI_PRBS_23A, IIO_BACKEND_TEST_PATTERN_MAX }; -- cgit v1.2.3 From b6547e54864b998af21fdcaa0a88cf8e7efe641a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 2 Aug 2024 18:12:06 -0700 Subject: runtime constants: deal with old decrepit linkers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The runtime constants linker script depended on documented linker behavior [1]: "If an output section’s name is the same as the input section’s name and is representable as a C identifier, then the linker will automatically PROVIDE two symbols: __start_SECNAME and __stop_SECNAME, where SECNAME is the name of the section. These indicate the start address and end address of the output section respectively" to just automatically define the symbol names for the bounds of the runtime constant arrays. It turns out that this isn't actually something we can rely on, with old linkers not generating these automatic symbols. It looks to have been introduced in binutils-2.29 back in 2017, and we still support building with versions all the way back to binutils-2.25 (from 2015). And yes, Oleg actually seems to be using such ancient versions of binutils. So instead of depending on the implicit symbols from "section names match and are representable C identifiers", just do this all manually. It's not like it causes us any extra pain, we already have to do that for all the other sections that we use that often have special characters in them. Reported-and-tested-by: Oleg Nesterov Link: https://sourceware.org/binutils/docs/ld/Input-Section-Example.html [1] Link: https://lore.kernel.org/all/20240802114518.GA20924@redhat.com/ Signed-off-by: Linus Torvalds --- include/asm-generic/vmlinux.lds.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ad6afc5c4918..1ae44793132a 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -911,13 +911,12 @@ #define CON_INITCALL \ BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end) -#define RUNTIME_NAME(t,x) runtime_##t##_##x +#define NAMED_SECTION(name) \ + . = ALIGN(8); \ + name : AT(ADDR(name) - LOAD_OFFSET) \ + { BOUNDED_SECTION_PRE_LABEL(name, name, __start_, __stop_) } -#define RUNTIME_CONST(t,x) \ - . = ALIGN(8); \ - RUNTIME_NAME(t,x) : AT(ADDR(RUNTIME_NAME(t,x)) - LOAD_OFFSET) { \ - *(RUNTIME_NAME(t,x)); \ - } +#define RUNTIME_CONST(t,x) NAMED_SECTION(runtime_##t##_##x) /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */ #define KUNIT_TABLE() \ -- cgit v1.2.3 From 768e4bb6a75e3c6a034df7c67edac20bd222857e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 31 Jul 2024 13:07:17 -0700 Subject: net: Don't register pernet_operations if only one of id or size is specified. We can allocate per-netns memory for struct pernet_operations by specifying id and size. register_pernet_operations() assigns an id to pernet_operations and later ops_init() allocates the specified size of memory as net->gen->ptr[id]. If id is missing, no memory is allocated. If size is not specified, pernet_operations just wastes an entry of net->gen->ptr[] for every netns. net_generic is available only when both id and size are specified, so let's ensure that. While we are at it, we add const to both fields. Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/net/net_namespace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 20c34bd7a077..e67b483cc8bb 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -451,8 +451,8 @@ struct pernet_operations { /* Following method is called with RTNL held. */ void (*exit_batch_rtnl)(struct list_head *net_exit_list, struct list_head *dev_kill_list); - unsigned int *id; - size_t size; + unsigned int * const id; + const size_t size; }; /* -- cgit v1.2.3 From d21fe1e9a6a44e752e227d3a43f41aed790dad95 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 18 Jul 2024 10:23:54 +0800 Subject: pinctrl: pinconf-generic: Add support for "input-schmitt-microvolt" property Add "input-schmitt-microvolt" property to generic options used for DT parsing files. This enables drivers, which use generic pin configurations, to get the value passed to this property. Signed-off-by: Inochi Amaoto Link: https://lore.kernel.org/IA1PR20MB4953806785BA04E075DC4F03BBAC2@IA1PR20MB4953.namprd20.prod.outlook.com Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index a65d3d078e58..53cfde98433d 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -81,6 +81,8 @@ struct pinctrl_map; * @PIN_CONFIG_INPUT_SCHMITT_ENABLE: control schmitt-trigger mode on the pin. * If the argument != 0, schmitt-trigger mode is enabled. If it's 0, * schmitt-trigger mode is disabled. + * @PIN_CONFIG_INPUT_SCHMITT_UV: this will configure an input pin to run in + * schmitt-trigger mode. The argument is in uV. * @PIN_CONFIG_MODE_LOW_POWER: this will configure the pin for low power * operation, if several modes of operation are supported these can be * passed in the argument on a custom form, else just use argument 1 @@ -132,6 +134,7 @@ enum pin_config_param { PIN_CONFIG_INPUT_ENABLE, PIN_CONFIG_INPUT_SCHMITT, PIN_CONFIG_INPUT_SCHMITT_ENABLE, + PIN_CONFIG_INPUT_SCHMITT_UV, PIN_CONFIG_MODE_LOW_POWER, PIN_CONFIG_MODE_PWM, PIN_CONFIG_OUTPUT, -- cgit v1.2.3 From f17c06c6608ad4ecd2ccf321753fb511812d821b Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 2 Aug 2024 16:22:14 +0100 Subject: i2c: Fix conditional for substituting empty ACPI functions Add IS_ENABLED(CONFIG_I2C) to the conditional around a bunch of ACPI functions. The conditional around these functions depended only on CONFIG_ACPI. But the functions are implemented in I2C core, so are only present if CONFIG_I2C is enabled. Signed-off-by: Richard Fitzgerald Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 07e33bbc9256..7eedd0c662da 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -1066,7 +1066,7 @@ static inline int of_i2c_get_board_info(struct device *dev, struct acpi_resource; struct acpi_resource_i2c_serialbus; -#if IS_ENABLED(CONFIG_ACPI) +#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_I2C) bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c); int i2c_acpi_client_count(struct acpi_device *adev); -- cgit v1.2.3 From b88f55389ad27f05ed84af9e1026aa64dbfabc9a Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 4 Aug 2024 18:48:10 +0900 Subject: profiling: remove profile=sleep support The kernel sleep profile is no longer working due to a recursive locking bug introduced by commit 42a20f86dc19 ("sched: Add wrapper for get_wchan() to keep task blocked") Booting with the 'profile=sleep' kernel command line option added or executing # echo -n sleep > /sys/kernel/profiling after boot causes the system to lock up. Lockdep reports kthreadd/3 is trying to acquire lock: ffff93ac82e08d58 (&p->pi_lock){....}-{2:2}, at: get_wchan+0x32/0x70 but task is already holding lock: ffff93ac82e08d58 (&p->pi_lock){....}-{2:2}, at: try_to_wake_up+0x53/0x370 with the call trace being lock_acquire+0xc8/0x2f0 get_wchan+0x32/0x70 __update_stats_enqueue_sleeper+0x151/0x430 enqueue_entity+0x4b0/0x520 enqueue_task_fair+0x92/0x6b0 ttwu_do_activate+0x73/0x140 try_to_wake_up+0x213/0x370 swake_up_locked+0x20/0x50 complete+0x2f/0x40 kthread+0xfb/0x180 However, since nobody noticed this regression for more than two years, let's remove 'profile=sleep' support based on the assumption that nobody needs this functionality. Fixes: 42a20f86dc19 ("sched: Add wrapper for get_wchan() to keep task blocked") Cc: stable@vger.kernel.org # v5.16+ Signed-off-by: Tetsuo Handa Signed-off-by: Linus Torvalds --- include/linux/profile.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/profile.h b/include/linux/profile.h index 2fb487f61d12..3f53cdb0c27c 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -10,7 +10,6 @@ #define CPU_PROFILING 1 #define SCHED_PROFILING 2 -#define SLEEP_PROFILING 3 #define KVM_PROFILING 4 struct proc_dir_entry; -- cgit v1.2.3 From 0e8b53979ac86eddb3fd76264025a70071a25574 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 5 Aug 2024 14:01:21 +0900 Subject: bpf: kprobe: remove unused declaring of bpf_kprobe_override After the commit 66665ad2f102 ("tracing/kprobe: bpf: Compare instruction pointer with original one"), "bpf_kprobe_override" is not used anywhere anymore, and we can remove it now. Link: https://lore.kernel.org/all/20240710085939.11520-1-dongml2@chinatelecom.cn/ Fixes: 66665ad2f102 ("tracing/kprobe: bpf: Compare instruction pointer with original one") Signed-off-by: Menglong Dong Acked-by: Jiri Olsa Signed-off-by: Masami Hiramatsu (Google) --- include/linux/trace_events.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 9df3e2973626..9435185c10ef 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -880,7 +880,6 @@ do { \ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); -DECLARE_PER_CPU(int, bpf_kprobe_override); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); -- cgit v1.2.3 From 613f21505b25a4f43f33de00f11afc059bedde2b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 4 Jul 2024 11:01:51 +0200 Subject: media: cec: core: add new CEC_MSG_FL_REPLY_VENDOR_ID flag If this flag is set, then the reply is expected to consist of the CEC_MSG_VENDOR_COMMAND_WITH_ID opcode followed by the Vendor ID (as used in bytes 1-4 of the message), followed by the struct cec_msg reply field. Note that this assumes that the byte after the Vendor ID is a vendor-specific opcode. This flag makes it easier to wait for replies to vendor commands, using the same CEC framework support for waiting for regular replies. Support for this flag is indicated by setting the new CEC_CAP_REPLY_VENDOR_ID capability. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/cec.h | 2 ++ include/uapi/linux/cec.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/media/cec.h b/include/media/cec.h index d131514032f2..07d2ee8a3904 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -66,6 +66,8 @@ struct cec_data { struct list_head xfer_list; struct cec_adapter *adap; struct cec_msg msg; + u8 match_len; + u8 match_reply[5]; struct cec_fh *fh; struct delayed_work work; struct completion c; diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h index b8e071abaea5..894fffc66f2c 100644 --- a/include/uapi/linux/cec.h +++ b/include/uapi/linux/cec.h @@ -165,6 +165,7 @@ static inline int cec_msg_recv_is_rx_result(const struct cec_msg *msg) /* cec_msg flags field */ #define CEC_MSG_FL_REPLY_TO_FOLLOWERS (1 << 0) #define CEC_MSG_FL_RAW (1 << 1) +#define CEC_MSG_FL_REPLY_VENDOR_ID (1 << 2) /* cec_msg tx/rx_status field */ #define CEC_TX_STATUS_OK (1 << 0) @@ -339,6 +340,8 @@ static inline int cec_is_unconfigured(__u16 log_addr_mask) #define CEC_CAP_MONITOR_PIN (1 << 7) /* CEC_ADAP_G_CONNECTOR_INFO is available */ #define CEC_CAP_CONNECTOR_INFO (1 << 8) +/* CEC_MSG_FL_REPLY_VENDOR_ID is available */ +#define CEC_CAP_REPLY_VENDOR_ID (1 << 9) /** * struct cec_caps - CEC capabilities structure. -- cgit v1.2.3 From d7bdb8e6aabe218fd980768a1486434e42761539 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 27 May 2024 16:25:51 +0200 Subject: pmdomain: core: Enable s2idle for CPU PM domains on PREEMPT_RT To allow a genpd provider for a CPU PM domain to enter a domain-idle-state during s2idle on a PREEMPT_RT based configuration, we can't use the regular spinlock, as they are turned into sleepable locks on PREEMPT_RT. To address this problem, let's convert into using the raw spinlock, but only for genpd providers that have the GENPD_FLAG_CPU_DOMAIN bit set. In this way, the lock can still be acquired/released in atomic context, which is needed in the idle-path for PREEMPT_RT. Do note that the genpd power-on/off notifiers may also be fired during s2idle, but these are already prepared for PREEMPT_RT as they are based on the raw notifiers. However, consumers of them may need to adopt accordingly to work properly on PREEMPT_RT. Signed-off-by: Ulf Hansson Tested-by: Raghavendra Kakarla # qcm6490 with PREEMPT_RT set Acked-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20240527142557.321610-2-ulf.hansson@linaro.org --- include/linux/pm_domain.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 858c8e7851fb..b86bb52858ac 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -198,8 +198,11 @@ struct generic_pm_domain { spinlock_t slock; unsigned long lock_flags; }; + struct { + raw_spinlock_t raw_slock; + unsigned long raw_lock_flags; + }; }; - }; static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) -- cgit v1.2.3 From d5934e76316e84eced836b6b2bafae1837d1cd58 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Mar 2024 16:48:48 -0700 Subject: cleanup: Add usage and style documentation When proposing that PCI grow some new cleanup helpers for pci_dev_put() and pci_dev_{lock,unlock} [1], Bjorn had some fundamental questions about expectations and best practices. Upon reviewing an updated changelog with those details he recommended adding them to documentation in the header file itself. Add that documentation and link it into the rendering for Documentation/core-api/. Signed-off-by: Dan Williams Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Jonathan Cameron Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/171175585714.2192972.12661675876300167762.stgit@dwillia2-xfh.jf.intel.com --- include/linux/cleanup.h | 136 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) (limited to 'include') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index d9e613803df1..9c6b4f2c0176 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -4,6 +4,142 @@ #include +/** + * DOC: scope-based cleanup helpers + * + * The "goto error" pattern is notorious for introducing subtle resource + * leaks. It is tedious and error prone to add new resource acquisition + * constraints into code paths that already have several unwind + * conditions. The "cleanup" helpers enable the compiler to help with + * this tedium and can aid in maintaining LIFO (last in first out) + * unwind ordering to avoid unintentional leaks. + * + * As drivers make up the majority of the kernel code base, here is an + * example of using these helpers to clean up PCI drivers. The target of + * the cleanups are occasions where a goto is used to unwind a device + * reference (pci_dev_put()), or unlock the device (pci_dev_unlock()) + * before returning. + * + * The DEFINE_FREE() macro can arrange for PCI device references to be + * dropped when the associated variable goes out of scope:: + * + * DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T)) + * ... + * struct pci_dev *dev __free(pci_dev_put) = + * pci_get_slot(parent, PCI_DEVFN(0, 0)); + * + * The above will automatically call pci_dev_put() if @dev is non-NULL + * when @dev goes out of scope (automatic variable scope). If a function + * wants to invoke pci_dev_put() on error, but return @dev (i.e. without + * freeing it) on success, it can do:: + * + * return no_free_ptr(dev); + * + * ...or:: + * + * return_ptr(dev); + * + * The DEFINE_GUARD() macro can arrange for the PCI device lock to be + * dropped when the scope where guard() is invoked ends:: + * + * DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T)) + * ... + * guard(pci_dev)(dev); + * + * The lifetime of the lock obtained by the guard() helper follows the + * scope of automatic variable declaration. Take the following example:: + * + * func(...) + * { + * if (...) { + * ... + * guard(pci_dev)(dev); // pci_dev_lock() invoked here + * ... + * } // <- implied pci_dev_unlock() triggered here + * } + * + * Observe the lock is held for the remainder of the "if ()" block not + * the remainder of "func()". + * + * Now, when a function uses both __free() and guard(), or multiple + * instances of __free(), the LIFO order of variable definition order + * matters. GCC documentation says: + * + * "When multiple variables in the same scope have cleanup attributes, + * at exit from the scope their associated cleanup functions are run in + * reverse order of definition (last defined, first cleanup)." + * + * When the unwind order matters it requires that variables be defined + * mid-function scope rather than at the top of the file. Take the + * following example and notice the bug highlighted by "!!":: + * + * LIST_HEAD(list); + * DEFINE_MUTEX(lock); + * + * struct object { + * struct list_head node; + * }; + * + * static struct object *alloc_add(void) + * { + * struct object *obj; + * + * lockdep_assert_held(&lock); + * obj = kzalloc(sizeof(*obj), GFP_KERNEL); + * if (obj) { + * LIST_HEAD_INIT(&obj->node); + * list_add(obj->node, &list): + * } + * return obj; + * } + * + * static void remove_free(struct object *obj) + * { + * lockdep_assert_held(&lock); + * list_del(&obj->node); + * kfree(obj); + * } + * + * DEFINE_FREE(remove_free, struct object *, if (_T) remove_free(_T)) + * static int init(void) + * { + * struct object *obj __free(remove_free) = NULL; + * int err; + * + * guard(mutex)(&lock); + * obj = alloc_add(); + * + * if (!obj) + * return -ENOMEM; + * + * err = other_init(obj); + * if (err) + * return err; // remove_free() called without the lock!! + * + * no_free_ptr(obj); + * return 0; + * } + * + * That bug is fixed by changing init() to call guard() and define + + * initialize @obj in this order:: + * + * guard(mutex)(&lock); + * struct object *obj __free(remove_free) = alloc_add(); + * + * Given that the "__free(...) = NULL" pattern for variables defined at + * the top of the function poses this potential interdependency problem + * the recommendation is to always define and assign variables in one + * statement and not group variable definitions at the top of the + * function when __free() is used. + * + * Lastly, given that the benefit of cleanup helpers is removal of + * "goto", and that the "goto" statement can jump between scopes, the + * expectation is that usage of "goto" and cleanup helpers is never + * mixed in the same function. I.e. for a given routine, convert all + * resources that need a "goto" cleanup to scope-based cleanup, or + * convert none of them. + */ + /* * DEFINE_FREE(name, type, free): * simple helper macro that defines the required wrapper for a __free() -- cgit v1.2.3 From a720dee5e039238a44c0142dfccdc0e35c1125f7 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Sat, 13 Jul 2024 19:47:33 +1200 Subject: hid-asus: use hid for brightness control on keyboard On almost all ASUS ROG series laptops the MCU used for the USB keyboard also has a HID packet used for setting the brightness. This is usually the same as the WMI method. But in some laptops the WMI method either is missing or doesn't work, so we should default to the HID control. Signed-off-by: Luke D. Jones Acked-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20240713074733.77334-2-luke@ljones.dev Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/platform_data/x86/asus-wmi.h | 36 ++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 5e6f407b2def..b601b245a035 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -4,6 +4,7 @@ #include #include +#include /* WMI Methods */ #define ASUS_WMI_METHODID_SPEC 0x43455053 /* BIOS SPECification */ @@ -165,4 +166,39 @@ static inline int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, } #endif +/* To be used by both hid-asus and asus-wmi to determine which controls kbd_brightness */ +static const struct dmi_system_id asus_use_hid_led_dmi_ids[] = { + { + .matches = { + DMI_MATCH(DMI_PRODUCT_FAMILY, "ROG Zephyrus"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_PRODUCT_FAMILY, "ROG Strix"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_PRODUCT_FAMILY, "ROG Flow"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GA403U"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GU605M"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "RC71L"), + }, + }, + { }, +}; + #endif /* __PLATFORM_DATA_X86_ASUS_WMI_H */ -- cgit v1.2.3 From e42066df07c0fcedebb32ed56f8bc39b4bf86337 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 5 Aug 2024 15:08:39 +0100 Subject: ASoC: cs35l56: Handle OTP read latency over SoundWire Use the late-read buffer in the CS35L56 SoundWire interface to read OTP memory. The OTP memory has a longer access latency than chip registers and cannot guarantee to return the data value in the SoundWire control response if the bus clock is >4.8 MHz. The Cirrus SoundWire peripheral IP exposes the bridge-to-bus read buffer and status bits. For a read from OTP the bridge status bits are polled to wait for the OTP data to be loaded into the read buffer and the data is then read from there. Signed-off-by: Richard Fitzgerald Fixes: e1830f66f6c6 ("ASoC: cs35l56: Add helper functions for amp calibration") Link: https://patch.msgid.link/20240805140839.26042-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index a6aa112e5741..a51acefa785f 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -277,6 +277,11 @@ static inline int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_ba return 0; } +static inline bool cs35l56_is_otp_register(unsigned int reg) +{ + return (reg >> 16) == 3; +} + extern struct regmap_config cs35l56_regmap_i2c; extern struct regmap_config cs35l56_regmap_spi; extern struct regmap_config cs35l56_regmap_sdw; -- cgit v1.2.3 From 25162a4f64f8ba0065f300977589fe1f6af332f0 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 4 Aug 2024 17:16:25 -0700 Subject: Input: cyttsp4 - remove driver The cyttsp4 touchscreen driver was contributed in 2013 and since then has seen no updates. The driver uses platform data (no device tree support) and there are no users of it in the mainline kernel. There were occasional fixes to it for issues either found by static code analysis tools or via visual inspection, but otherwise the driver is completely untested. Remove the driver. Reviewed-by: Linus Walleij Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/ZrAZ2cUow_z838tp@google.com Signed-off-by: Dmitry Torokhov --- include/linux/platform_data/cyttsp4.h | 62 ----------------------------------- 1 file changed, 62 deletions(-) delete mode 100644 include/linux/platform_data/cyttsp4.h (limited to 'include') diff --git a/include/linux/platform_data/cyttsp4.h b/include/linux/platform_data/cyttsp4.h deleted file mode 100644 index 5dc9d2be384b..000000000000 --- a/include/linux/platform_data/cyttsp4.h +++ /dev/null @@ -1,62 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Header file for: - * Cypress TrueTouch(TM) Standard Product (TTSP) touchscreen drivers. - * For use with Cypress Txx3xx parts. - * Supported parts include: - * CY8CTST341 - * CY8CTMA340 - * - * Copyright (C) 2009, 2010, 2011 Cypress Semiconductor, Inc. - * Copyright (C) 2012 Javier Martinez Canillas - * - * Contact Cypress Semiconductor at www.cypress.com (kev@cypress.com) - */ -#ifndef _CYTTSP4_H_ -#define _CYTTSP4_H_ - -#define CYTTSP4_MT_NAME "cyttsp4_mt" -#define CYTTSP4_I2C_NAME "cyttsp4_i2c_adapter" -#define CYTTSP4_SPI_NAME "cyttsp4_spi_adapter" - -#define CY_TOUCH_SETTINGS_MAX 32 - -struct touch_framework { - const uint16_t *abs; - uint8_t size; - uint8_t enable_vkeys; -} __packed; - -struct cyttsp4_mt_platform_data { - struct touch_framework *frmwrk; - unsigned short flags; - char const *inp_dev_name; -}; - -struct touch_settings { - const uint8_t *data; - uint32_t size; - uint8_t tag; -} __packed; - -struct cyttsp4_core_platform_data { - int irq_gpio; - int rst_gpio; - int level_irq_udelay; - int (*xres)(struct cyttsp4_core_platform_data *pdata, - struct device *dev); - int (*init)(struct cyttsp4_core_platform_data *pdata, - int on, struct device *dev); - int (*power)(struct cyttsp4_core_platform_data *pdata, - int on, struct device *dev, atomic_t *ignore_irq); - int (*irq_stat)(struct cyttsp4_core_platform_data *pdata, - struct device *dev); - struct touch_settings *sett[CY_TOUCH_SETTINGS_MAX]; -}; - -struct cyttsp4_platform_data { - struct cyttsp4_core_platform_data *core_pdata; - struct cyttsp4_mt_platform_data *mt_pdata; -}; - -#endif /* _CYTTSP4_H_ */ -- cgit v1.2.3 From f91f7ac900e7342e0fd66093dfbf7cb8cb585a99 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Wed, 17 Jul 2024 15:00:23 +0200 Subject: refcount: Report UAF for refcount_sub_and_test(0) when counter==0 When a reference counter is at zero and refcount_sub_and_test() is invoked to subtract zero, the function accepts this request without any warning and returns true. This behavior does not seem ideal because the counter being already at zero indicates a use-after-free. Furthermore, returning true by refcount_sub_and_test() in this case potentially results in a double-free done by its caller. Modify the underlying function __refcount_sub_and_test() to warn about this case as a use-after-free and have it return false to avoid the potential double-free. Signed-off-by: Petr Pavlu Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240717130023.5675-1-petr.pavlu@suse.com Signed-off-by: Kees Cook --- include/linux/refcount.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 59b3b752394d..35f039ecb272 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -266,12 +266,12 @@ bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) if (oldp) *oldp = old; - if (old == i) { + if (old > 0 && old == i) { smp_acquire__after_ctrl_dep(); return true; } - if (unlikely(old < 0 || old - i < 0)) + if (unlikely(old <= 0 || old - i < 0)) refcount_warn_saturate(r, REFCOUNT_SUB_UAF); return false; -- cgit v1.2.3 From a2dc7bee4f77266ebb8e3a8544fc5f7812835f8c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Aug 2024 13:40:25 +0000 Subject: inet: constify inet_sk_bound_dev_eq() net parameter inet_sk_bound_dev_eq() and its callers do not modify the net structure. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240802134029.3748005-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet6_hashtables.h | 2 +- include/net/inet_hashtables.h | 2 +- include/net/inet_sock.h | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 533a7337865a..591cbf5e4d5f 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -175,7 +175,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, int inet6_hash(struct sock *sk); -static inline bool inet6_match(struct net *net, const struct sock *sk, +static inline bool inet6_match(const struct net *net, const struct sock *sk, const struct in6_addr *saddr, const struct in6_addr *daddr, const __portpair ports, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 7f1b38458743..1cc8b7ca20a1 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -351,7 +351,7 @@ static inline struct sock *inet_lookup_listener(struct net *net, ((__force __u64)(__be32)(__saddr))) #endif /* __BIG_ENDIAN */ -static inline bool inet_match(struct net *net, const struct sock *sk, +static inline bool inet_match(const struct net *net, const struct sock *sk, const __addrpair cookie, const __portpair ports, int dif, int sdif) { diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index f9ddd47dc4f8..394c3b66065e 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -150,7 +150,8 @@ static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if, return bound_dev_if == dif || bound_dev_if == sdif; } -static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, +static inline bool inet_sk_bound_dev_eq(const struct net *net, + int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) -- cgit v1.2.3 From d4433e8b405a882fa2ef29601c4ad262ba6e5526 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Aug 2024 13:40:26 +0000 Subject: inet: constify 'struct net' parameter of various lookup helpers Following helpers do not touch their struct net argument: - bpf_sk_lookup_run_v4() - inet_lookup_reuseport() - inet_lhash2_lookup() - inet_lookup_run_sk_lookup() - __inet_lookup_listener() - __inet_lookup_established() Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240802134029.3748005-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/filter.h | 2 +- include/net/inet_hashtables.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index b6672ff61407..4acd1da4dac6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1616,7 +1616,7 @@ extern struct static_key_false bpf_sk_lookup_enabled; _all_pass || _selected_sk ? SK_PASS : SK_DROP; \ }) -static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, +static inline bool bpf_sk_lookup_run_v4(const struct net *net, int protocol, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 dport, const int ifindex, struct sock **psk) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 1cc8b7ca20a1..5eea47f135a4 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -304,7 +304,7 @@ int __inet_hash(struct sock *sk, struct sock *osk); int inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); -struct sock *__inet_lookup_listener(struct net *net, +struct sock *__inet_lookup_listener(const struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, @@ -368,7 +368,7 @@ static inline bool inet_match(const struct net *net, const struct sock *sk, /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need * not check it for lookups anymore, thanks Alexey. -DaveM */ -struct sock *__inet_lookup_established(struct net *net, +struct sock *__inet_lookup_established(const struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, @@ -382,13 +382,13 @@ inet_ehashfn_t inet_ehashfn; INDIRECT_CALLABLE_DECLARE(inet_ehashfn_t udp_ehashfn); -struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk, +struct sock *inet_lookup_reuseport(const struct net *net, struct sock *sk, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, __be32 daddr, unsigned short hnum, inet_ehashfn_t *ehashfn); -struct sock *inet_lookup_run_sk_lookup(struct net *net, +struct sock *inet_lookup_run_sk_lookup(const struct net *net, int protocol, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, -- cgit v1.2.3 From 9a2fa1472083580b6c66bdaf291f591e1170123a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 3 Aug 2024 18:02:00 -0400 Subject: fix bitmap corruption on close_range() with CLOSE_RANGE_UNSHARE copy_fd_bitmaps(new, old, count) is expected to copy the first count/BITS_PER_LONG bits from old->full_fds_bits[] and fill the rest with zeroes. What it does is copying enough words (BITS_TO_LONGS(count/BITS_PER_LONG)), then memsets the rest. That works fine, *if* all bits past the cutoff point are clear. Otherwise we are risking garbage from the last word we'd copied. For most of the callers that is true - expand_fdtable() has count equal to old->max_fds, so there's no open descriptors past count, let alone fully occupied words in ->open_fds[], which is what bits in ->full_fds_bits[] correspond to. The other caller (dup_fd()) passes sane_fdtable_size(old_fdt, max_fds), which is the smallest multiple of BITS_PER_LONG that covers all opened descriptors below max_fds. In the common case (copying on fork()) max_fds is ~0U, so all opened descriptors will be below it and we are fine, by the same reasons why the call in expand_fdtable() is safe. Unfortunately, there is a case where max_fds is less than that and where we might, indeed, end up with junk in ->full_fds_bits[] - close_range(from, to, CLOSE_RANGE_UNSHARE) with * descriptor table being currently shared * 'to' being above the current capacity of descriptor table * 'from' being just under some chunk of opened descriptors. In that case we end up with observably wrong behaviour - e.g. spawn a child with CLONE_FILES, get all descriptors in range 0..127 open, then close_range(64, ~0U, CLOSE_RANGE_UNSHARE) and watch dup(0) ending up with descriptor #128, despite #64 being observably not open. The minimally invasive fix would be to deal with that in dup_fd(). If this proves to add measurable overhead, we can go that way, but let's try to fix copy_fd_bitmaps() first. * new helper: bitmap_copy_and_expand(to, from, bits_to_copy, size). * make copy_fd_bitmaps() take the bitmap size in words, rather than bits; it's 'count' argument is always a multiple of BITS_PER_LONG, so we are not losing any information, and that way we can use the same helper for all three bitmaps - compiler will see that count is a multiple of BITS_PER_LONG for the large ones, so it'll generate plain memcpy()+memset(). Reproducer added to tools/testing/selftests/core/close_range_test.c Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- include/linux/bitmap.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 8c4768c44a01..d3b66d77df7a 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -270,6 +270,18 @@ static inline void bitmap_copy_clear_tail(unsigned long *dst, dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits); } +static inline void bitmap_copy_and_extend(unsigned long *to, + const unsigned long *from, + unsigned int count, unsigned int size) +{ + unsigned int copy = BITS_TO_LONGS(count); + + memcpy(to, from, copy * sizeof(long)); + if (count % BITS_PER_LONG) + to[copy - 1] &= BITMAP_LAST_WORD_MASK(count); + memset(to + copy, 0, bitmap_size(size) - copy * sizeof(long)); +} + /* * On 32-bit systems bitmaps are represented as u32 arrays internally. On LE64 * machines the order of hi and lo parts of numbers match the bitmap structure. -- cgit v1.2.3 From b9abcbb1239cd782f76532397a7c45458de9a73e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Aug 2024 13:40:27 +0000 Subject: udp: constify 'struct net' parameter of socket lookups Following helpers do not touch their 'struct net' argument. - udp_sk_bound_dev_eq() - udp4_lib_lookup() - __udp4_lib_lookup() Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240802134029.3748005-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/udp.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index c4e05b14b648..a0217e3cfe4f 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -79,7 +79,8 @@ struct udp_table { extern struct udp_table udp_table; void udp_table_init(struct udp_table *, const char *); static inline struct udp_hslot *udp_hashslot(struct udp_table *table, - struct net *net, unsigned int num) + const struct net *net, + unsigned int num) { return &table->hash[udp_hashfn(net, num, table->mask)]; } @@ -245,7 +246,7 @@ static inline int udp_rqueue_get(struct sock *sk) return sk_rmem_alloc_get(sk) - READ_ONCE(udp_sk(sk)->forward_deficit); } -static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, +static inline bool udp_sk_bound_dev_eq(const struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) @@ -296,9 +297,10 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, int udp_lib_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen, int (*push_pending_frames)(struct sock *)); -struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, +struct sock *udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); -struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, +struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, + __be16 sport, __be32 daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, -- cgit v1.2.3 From 10b2a44ccb0cdf85480b6f73a23530aa3ac722de Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Aug 2024 13:40:28 +0000 Subject: inet6: constify 'struct net' parameter of various lookup helpers Following helpers do not touch their struct net argument: - bpf_sk_lookup_run_v6() - __inet6_lookup_established() - inet6_lookup_reuseport() - inet6_lookup_listener() - inet6_lookup_run_sk_lookup() - __inet6_lookup() - inet6_lookup() Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240802134029.3748005-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/filter.h | 2 +- include/net/inet6_hashtables.h | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 4acd1da4dac6..64e1506fefb8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1653,7 +1653,7 @@ static inline bool bpf_sk_lookup_run_v4(const struct net *net, int protocol, } #if IS_ENABLED(CONFIG_IPV6) -static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, +static inline bool bpf_sk_lookup_run_v6(const struct net *net, int protocol, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 591cbf5e4d5f..74dd90ff5f12 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -40,7 +40,7 @@ static inline unsigned int __inet6_ehashfn(const u32 lhash, * * The sockhash lock must be held as a reader here. */ -struct sock *__inet6_lookup_established(struct net *net, +struct sock *__inet6_lookup_established(const struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, @@ -56,7 +56,7 @@ inet6_ehashfn_t inet6_ehashfn; INDIRECT_CALLABLE_DECLARE(inet6_ehashfn_t udp6_ehashfn); -struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk, +struct sock *inet6_lookup_reuseport(const struct net *net, struct sock *sk, struct sk_buff *skb, int doff, const struct in6_addr *saddr, __be16 sport, @@ -64,7 +64,7 @@ struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk, unsigned short hnum, inet6_ehashfn_t *ehashfn); -struct sock *inet6_lookup_listener(struct net *net, +struct sock *inet6_lookup_listener(const struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, @@ -73,7 +73,7 @@ struct sock *inet6_lookup_listener(struct net *net, const unsigned short hnum, const int dif, const int sdif); -struct sock *inet6_lookup_run_sk_lookup(struct net *net, +struct sock *inet6_lookup_run_sk_lookup(const struct net *net, int protocol, struct sk_buff *skb, int doff, const struct in6_addr *saddr, @@ -82,7 +82,7 @@ struct sock *inet6_lookup_run_sk_lookup(struct net *net, const u16 hnum, const int dif, inet6_ehashfn_t *ehashfn); -static inline struct sock *__inet6_lookup(struct net *net, +static inline struct sock *__inet6_lookup(const struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, @@ -167,7 +167,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, iif, sdif, refcounted); } -struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, +struct sock *inet6_lookup(const struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, -- cgit v1.2.3 From 87d973e8ddeeddf1777e6689df86d5d369cbcbb3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Aug 2024 13:40:29 +0000 Subject: ipv6: udp: constify 'struct net' parameter of socket lookups Following helpers do not touch their 'struct net' argument. - udp6_lib_lookup() - __udp6_lib_lookup() Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240802134029.3748005-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ipv6_stubs.h | 2 +- include/net/udp.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 11cefd50704d..8a3465c8c2c5 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -82,7 +82,7 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly; struct ipv6_bpf_stub { int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, u32 flags); - struct sock *(*udp6_lib_lookup)(struct net *net, + struct sock *(*udp6_lib_lookup)(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, diff --git a/include/net/udp.h b/include/net/udp.h index a0217e3cfe4f..5ca53b1cec67 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -305,11 +305,11 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, struct udp_table *tbl, struct sk_buff *skb); struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); -struct sock *udp6_lib_lookup(struct net *net, +struct sock *udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif); -struct sock *__udp6_lib_lookup(struct net *net, +struct sock *__udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, -- cgit v1.2.3 From 7e45c1e9edc0004b914e2d5a33245908a06f072c Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 30 Jul 2024 16:40:52 +0300 Subject: net/mlx5: Add support for MTPTM and MTCTR registers Make Management Precision Time Measurement (MTPTM) register and Management Cross Timestamp (MTCTR) register usable in mlx5 driver. Signed-off-by: Rahul Rameshbabu Signed-off-by: Tariq Toukan Reviewed-by: Wojciech Drewek Tested-by: Vadim Fedorenko Link: https://patch.msgid.link/20240730134055.1835261-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/mlx5/device.h | 7 ++++++- include/linux/mlx5/driver.h | 2 ++ include/linux/mlx5/mlx5_ifc.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index ba875a619b97..a94bc9e3af96 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1243,7 +1243,8 @@ enum mlx5_pcam_feature_groups { enum mlx5_mcam_reg_groups { MLX5_MCAM_REGS_FIRST_128 = 0x0, MLX5_MCAM_REGS_0x9100_0x917F = 0x2, - MLX5_MCAM_REGS_NUM = 0x3, + MLX5_MCAM_REGS_0x9180_0x91FF = 0x3, + MLX5_MCAM_REGS_NUM = 0x4, }; enum mlx5_mcam_feature_groups { @@ -1392,6 +1393,10 @@ enum mlx5_qcam_feature_groups { MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9100_0x917F], \ mng_access_reg_cap_mask.access_regs2.reg) +#define MLX5_CAP_MCAM_REG3(mdev, reg) \ + MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9180_0x91FF], \ + mng_access_reg_cap_mask.access_regs3.reg) + #define MLX5_CAP_MCAM_FEATURE(mdev, fld) \ MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a96438ded15f..9f42834f57c5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -159,6 +159,8 @@ enum { MLX5_REG_MSECQ = 0x9155, MLX5_REG_MSEES = 0x9156, MLX5_REG_MIRC = 0x9162, + MLX5_REG_MTPTM = 0x9180, + MLX5_REG_MTCTR = 0x9181, MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, MLX5_REG_DTOR = 0xC00E, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cab228cf51c6..234ad6f16e92 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10401,6 +10401,18 @@ struct mlx5_ifc_mcam_access_reg_bits2 { u8 regs_31_to_0[0x20]; }; +struct mlx5_ifc_mcam_access_reg_bits3 { + u8 regs_127_to_96[0x20]; + + u8 regs_95_to_64[0x20]; + + u8 regs_63_to_32[0x20]; + + u8 regs_31_to_2[0x1e]; + u8 mtctr[0x1]; + u8 mtptm[0x1]; +}; + struct mlx5_ifc_mcam_reg_bits { u8 reserved_at_0[0x8]; u8 feature_group[0x8]; @@ -10413,6 +10425,7 @@ struct mlx5_ifc_mcam_reg_bits { struct mlx5_ifc_mcam_access_reg_bits access_regs; struct mlx5_ifc_mcam_access_reg_bits1 access_regs1; struct mlx5_ifc_mcam_access_reg_bits2 access_regs2; + struct mlx5_ifc_mcam_access_reg_bits3 access_regs3; u8 reserved_at_0[0x80]; } mng_access_reg_cap_mask; @@ -11166,6 +11179,34 @@ struct mlx5_ifc_mtmp_reg_bits { u8 sensor_name_lo[0x20]; }; +struct mlx5_ifc_mtptm_reg_bits { + u8 reserved_at_0[0x10]; + u8 psta[0x1]; + u8 reserved_at_11[0xf]; + + u8 reserved_at_20[0x60]; +}; + +enum { + MLX5_MTCTR_REQUEST_NOP = 0x0, + MLX5_MTCTR_REQUEST_PTM_ROOT_CLOCK = 0x1, + MLX5_MTCTR_REQUEST_FREE_RUNNING_COUNTER = 0x2, + MLX5_MTCTR_REQUEST_REAL_TIME_CLOCK = 0x3, +}; + +struct mlx5_ifc_mtctr_reg_bits { + u8 first_clock_timestamp_request[0x8]; + u8 second_clock_timestamp_request[0x8]; + u8 reserved_at_10[0x10]; + + u8 first_clock_valid[0x1]; + u8 second_clock_valid[0x1]; + u8 reserved_at_22[0x1e]; + + u8 first_clock_timestamp[0x40]; + u8 second_clock_timestamp[0x40]; +}; + union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_bufferx_reg_bits bufferx_reg; struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; @@ -11230,6 +11271,8 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_mrtc_reg_bits mrtc_reg; struct mlx5_ifc_mtcap_reg_bits mtcap_reg; struct mlx5_ifc_mtmp_reg_bits mtmp_reg; + struct mlx5_ifc_mtptm_reg_bits mtptm_reg; + struct mlx5_ifc_mtctr_reg_bits mtctr_reg; u8 reserved_at_0[0x60e0]; }; -- cgit v1.2.3 From c114e9948c2b6a0b400266e59cc656b59e795bca Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Thu, 18 Jul 2024 11:27:24 -0700 Subject: coredump: Standartize and fix logging The coredump code does not log the process ID and the comm consistently, logs unescaped comm when it does log it, and does not always use the ratelimited logging. That makes it harder to analyze logs and puts the system at the risk of spamming the system log incase something crashes many times over and over again. Fix that by logging TGID and comm (escaped) consistently and using the ratelimited logging always. Signed-off-by: Roman Kisel Tested-by: Allen Pais Link: https://lore.kernel.org/r/20240718182743.1959160-2-romank@linux.microsoft.com Signed-off-by: Kees Cook --- include/linux/coredump.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 0904ba010341..45e598fe3476 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -43,8 +43,30 @@ extern int dump_align(struct coredump_params *cprm, int align); int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len); extern void do_coredump(const kernel_siginfo_t *siginfo); + +/* + * Logging for the coredump code, ratelimited. + * The TGID and comm fields are added to the message. + */ + +#define __COREDUMP_PRINTK(Level, Format, ...) \ + do { \ + char comm[TASK_COMM_LEN]; \ + \ + get_task_comm(comm, current); \ + printk_ratelimited(Level "coredump: %d(%*pE): " Format "\n", \ + task_tgid_vnr(current), (int)strlen(comm), comm, ##__VA_ARGS__); \ + } while (0) \ + +#define coredump_report(fmt, ...) __COREDUMP_PRINTK(KERN_INFO, fmt, ##__VA_ARGS__) +#define coredump_report_failure(fmt, ...) __COREDUMP_PRINTK(KERN_WARNING, fmt, ##__VA_ARGS__) + #else static inline void do_coredump(const kernel_siginfo_t *siginfo) {} + +#define coredump_report(...) +#define coredump_report_failure(...) + #endif #if defined(CONFIG_COREDUMP) && defined(CONFIG_SYSCTL) -- cgit v1.2.3 From fb97d2eb542faf19a8725afbd75cbc2518903210 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Thu, 18 Jul 2024 11:27:25 -0700 Subject: binfmt_elf, coredump: Log the reason of the failed core dumps Missing, failed, or corrupted core dumps might impede crash investigations. To improve reliability of that process and consequently the programs themselves, one needs to trace the path from producing a core dumpfile to analyzing it. That path starts from the core dump file written to the disk by the kernel or to the standard input of a user mode helper program to which the kernel streams the coredump contents. There are cases where the kernel will interrupt writing the core out or produce a truncated/not-well-formed core dump without leaving a note. Add logging for the core dump collection failure paths to be able to reason what has gone wrong when the core dump is malformed or missing. Report the size of the data written to aid in diagnosing the user mode helper. Signed-off-by: Roman Kisel Link: https://lore.kernel.org/r/20240718182743.1959160-3-romank@linux.microsoft.com Signed-off-by: Kees Cook --- include/linux/coredump.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 45e598fe3476..edeb8532ce0f 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -42,7 +42,7 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len); -extern void do_coredump(const kernel_siginfo_t *siginfo); +extern int do_coredump(const kernel_siginfo_t *siginfo); /* * Logging for the coredump code, ratelimited. @@ -62,7 +62,11 @@ extern void do_coredump(const kernel_siginfo_t *siginfo); #define coredump_report_failure(fmt, ...) __COREDUMP_PRINTK(KERN_WARNING, fmt, ##__VA_ARGS__) #else -static inline void do_coredump(const kernel_siginfo_t *siginfo) {} +static inline int do_coredump(const kernel_siginfo_t *siginfo) +{ + /* Coredump support is not available, can't fail. */ + return 0; +} #define coredump_report(...) #define coredump_report_failure(...) -- cgit v1.2.3 From 0079c9d1e58a39148e6ce13bda55307ea6aa3a9e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 6 Aug 2024 09:00:23 +0200 Subject: ALSA: ump: Handle MIDI 1.0 Function Block in MIDI 2.0 protocol The UMP v1.1 spec says in the section 6.2.1: "If a UMP Endpoint declares MIDI 2.0 Protocol but a Function Block represents a MIDI 1.0 connection, then may optionally be used for messages to/from that Function Block." It implies that the driver can (and should) keep MIDI 1.0 CVM exceptionally for those FBs even if UMP Endpoint is running in MIDI 2.0 protocol, and the current driver lacks of it. This patch extends the sequencer port info to indicate a MIDI 1.0 port, and tries to send/receive MIDI 1.0 CVM as is when this port is the source or sink. The sequencer port flag is set by the driver at parsing FBs and GTBs although application can set it to its own user-space clients, too. Link: https://patch.msgid.link/20240806070024.14301-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/ump.h | 1 + include/uapi/sound/asequencer.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/sound/ump.h b/include/sound/ump.h index 7f68056acdff..7484f62fb234 100644 --- a/include/sound/ump.h +++ b/include/sound/ump.h @@ -18,6 +18,7 @@ struct snd_ump_group { unsigned int dir_bits; /* directions */ bool active; /* activeness */ bool valid; /* valid group (referred by blocks) */ + bool is_midi1; /* belongs to a MIDI1 FB */ char name[64]; /* group name */ }; diff --git a/include/uapi/sound/asequencer.h b/include/uapi/sound/asequencer.h index 39b37edcf813..bc30c1f2a109 100644 --- a/include/uapi/sound/asequencer.h +++ b/include/uapi/sound/asequencer.h @@ -461,6 +461,8 @@ struct snd_seq_remove_events { #define SNDRV_SEQ_PORT_FLG_TIMESTAMP (1<<1) #define SNDRV_SEQ_PORT_FLG_TIME_REAL (1<<2) +#define SNDRV_SEQ_PORT_FLG_IS_MIDI1 (1<<3) /* Keep MIDI 1.0 protocol */ + /* port direction */ #define SNDRV_SEQ_PORT_DIR_UNKNOWN 0 #define SNDRV_SEQ_PORT_DIR_INPUT 1 -- cgit v1.2.3 From 7424fc6b86c8980a87169e005f5cd4438d18efe6 Mon Sep 17 00:00:00 2001 From: Gatlin Newhouse Date: Wed, 24 Jul 2024 00:01:55 +0000 Subject: x86/traps: Enable UBSAN traps on x86 Currently ARM64 extracts which specific sanitizer has caused a trap via encoded data in the trap instruction. Clang on x86 currently encodes the same data in the UD1 instruction but x86 handle_bug() and is_valid_bugaddr() currently only look at UD2. Bring x86 to parity with ARM64, similar to commit 25b84002afb9 ("arm64: Support Clang UBSAN trap codes for better reporting"). See the llvm links for information about the code generation. Enable the reporting of UBSAN sanitizer details on x86 compiled with clang when CONFIG_UBSAN_TRAP=y by analysing UD1 and retrieving the type immediate which is encoded by the compiler after the UD1. [ tglx: Simplified it by moving the printk() into handle_bug() ] Signed-off-by: Gatlin Newhouse Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Kees Cook Link: https://lore.kernel.org/all/20240724000206.451425-1-gatlin.newhouse@gmail.com Link: https://github.com/llvm/llvm-project/commit/c5978f42ec8e9#diff-bb68d7cd885f41cfc35843998b0f9f534adb60b415f647109e597ce448e92d9f Link: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/X86/X86InstrSystem.td#L27 --- include/linux/ubsan.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/ubsan.h b/include/linux/ubsan.h index bff7445498de..d8219cbe09ff 100644 --- a/include/linux/ubsan.h +++ b/include/linux/ubsan.h @@ -4,6 +4,11 @@ #ifdef CONFIG_UBSAN_TRAP const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type); +#else +static inline const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type) +{ + return NULL; +} #endif #endif -- cgit v1.2.3 From 7d2fb3812acde0a76e0d361877e8295db065f9f4 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 6 Aug 2024 00:00:57 +0000 Subject: ASoC: remove bespoke trigger support Bespoke trigger support was added when Linux v3.5 by this patch. commit 07bf84aaf736781a283b1bd36eaa911453b14574 ("ASoC: dpcm: Add bespoke trigger()") test-component driver is using it, but this is because it indicates used function for debug/trace purpose. Except test-component driver, bespoke trigger has never been used over 10 years in upstream. We can re-support it if needed in the future, but let's remove it for now, because it just noise in upstream. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87v80ewmdi.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 5 +---- include/sound/soc-dpcm.h | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index bbb72ad4c951..ab4e109fe71d 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -240,8 +240,6 @@ int snd_soc_pcm_dai_new(struct snd_soc_pcm_runtime *rtd); int snd_soc_pcm_dai_prepare(struct snd_pcm_substream *substream); int snd_soc_pcm_dai_trigger(struct snd_pcm_substream *substream, int cmd, int rollback); -int snd_soc_pcm_dai_bespoke_trigger(struct snd_pcm_substream *substream, - int cmd); void snd_soc_pcm_dai_delay(struct snd_pcm_substream *substream, snd_pcm_sframes_t *cpu_delay, snd_pcm_sframes_t *codec_delay); @@ -345,8 +343,7 @@ struct snd_soc_dai_ops { */ int (*trigger)(struct snd_pcm_substream *, int, struct snd_soc_dai *); - int (*bespoke_trigger)(struct snd_pcm_substream *, int, - struct snd_soc_dai *); + /* * For hardware based FIFO caused delay reporting. * Optional. diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h index ebd24753dd00..773f2db8c31c 100644 --- a/include/sound/soc-dpcm.h +++ b/include/sound/soc-dpcm.h @@ -58,7 +58,6 @@ enum snd_soc_dpcm_state { enum snd_soc_dpcm_trigger { SND_SOC_DPCM_TRIGGER_PRE = 0, SND_SOC_DPCM_TRIGGER_POST, - SND_SOC_DPCM_TRIGGER_BESPOKE, }; /* -- cgit v1.2.3 From db65eb46de135338d6177f8853e0fd208f19d63e Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 2 Aug 2024 11:13:19 +0530 Subject: drm/buddy: Add start address support to trim function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add a new start parameter in trim function to specify exact address from where to start the trimming. This would help us in situations like if drivers would like to do address alignment for specific requirements. - Add a new flag DRM_BUDDY_TRIM_DISABLE. Drivers can use this flag to disable the allocator trimming part. This patch enables the drivers control trimming and they can do it themselves based on the application requirements. v1:(Matthew) - check new_start alignment with min chunk_size - use range_overflows() Signed-off-by: Arunpravin Paneer Selvam Acked-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- include/drm/drm_buddy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 2a74fa9d0ce5..9689a7c5dd36 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -27,6 +27,7 @@ #define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) #define DRM_BUDDY_CLEAR_ALLOCATION BIT(3) #define DRM_BUDDY_CLEARED BIT(4) +#define DRM_BUDDY_TRIM_DISABLE BIT(5) struct drm_buddy_block { #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) @@ -155,6 +156,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned long flags); int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks); -- cgit v1.2.3 From 90c36325c796cc7111329607a649c34f4979c78e Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 2 Aug 2024 18:21:06 +0800 Subject: tcp: rstreason: introduce SK_RST_REASON_TCP_ABORT_ON_CLOSE for active reset Introducing a new type TCP_ABORT_ON_CLOSE for tcp reset reason to handle the case where more data is unread in closing phase. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/rstreason.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/rstreason.h b/include/net/rstreason.h index 2575c85d7f7a..fa6bfd0d7d69 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -17,6 +17,7 @@ FN(TCP_ABORT_ON_DATA) \ FN(TCP_TIMEWAIT_SOCKET) \ FN(INVALID_SYN) \ + FN(TCP_ABORT_ON_CLOSE) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ @@ -84,6 +85,11 @@ enum sk_rst_reason { * an error, send a reset" */ SK_RST_REASON_INVALID_SYN, + /** + * @SK_RST_REASON_TCP_ABORT_ON_CLOSE: abort on close + * corresponding to LINUX_MIB_TCPABORTONCLOSE + */ + SK_RST_REASON_TCP_ABORT_ON_CLOSE, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to -- cgit v1.2.3 From edc92b48abc5b21c98eca5d05b98a560d7df2e4d Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 2 Aug 2024 18:21:07 +0800 Subject: tcp: rstreason: introduce SK_RST_REASON_TCP_ABORT_ON_LINGER for active reset Introducing a new type TCP_ABORT_ON_LINGER for tcp reset reason to handle negative linger value case. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/rstreason.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/rstreason.h b/include/net/rstreason.h index fa6bfd0d7d69..fbbaeb969e6a 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -18,6 +18,7 @@ FN(TCP_TIMEWAIT_SOCKET) \ FN(INVALID_SYN) \ FN(TCP_ABORT_ON_CLOSE) \ + FN(TCP_ABORT_ON_LINGER) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ @@ -90,6 +91,11 @@ enum sk_rst_reason { * corresponding to LINUX_MIB_TCPABORTONCLOSE */ SK_RST_REASON_TCP_ABORT_ON_CLOSE, + /** + * @SK_RST_REASON_TCP_ABORT_ON_LINGER: abort on linger + * corresponding to LINUX_MIB_TCPABORTONLINGER + */ + SK_RST_REASON_TCP_ABORT_ON_LINGER, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to -- cgit v1.2.3 From 8407994f0c3594eb3854e3799af86224f4a8e6e6 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 2 Aug 2024 18:21:08 +0800 Subject: tcp: rstreason: introduce SK_RST_REASON_TCP_ABORT_ON_MEMORY for active reset Introducing a new type TCP_ABORT_ON_MEMORY for tcp reset reason to handle out of memory case. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/rstreason.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/rstreason.h b/include/net/rstreason.h index fbbaeb969e6a..eef658da8952 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -19,6 +19,7 @@ FN(INVALID_SYN) \ FN(TCP_ABORT_ON_CLOSE) \ FN(TCP_ABORT_ON_LINGER) \ + FN(TCP_ABORT_ON_MEMORY) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ @@ -96,6 +97,11 @@ enum sk_rst_reason { * corresponding to LINUX_MIB_TCPABORTONLINGER */ SK_RST_REASON_TCP_ABORT_ON_LINGER, + /** + * @SK_RST_REASON_TCP_ABORT_ON_MEMORY: abort on memory + * corresponding to LINUX_MIB_TCPABORTONMEMORY + */ + SK_RST_REASON_TCP_ABORT_ON_MEMORY, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to -- cgit v1.2.3 From edefba66d929eb2d023df93a0a8175a4ffe82684 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 2 Aug 2024 18:21:09 +0800 Subject: tcp: rstreason: introduce SK_RST_REASON_TCP_STATE for active reset Introducing a new type TCP_STATE to handle some reset conditions appearing in RFC 793 due to its socket state. Actually, we can look into RFC 9293 which has no discrepancy about this part. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/rstreason.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/rstreason.h b/include/net/rstreason.h index eef658da8952..bbf20d0bbde7 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -20,6 +20,7 @@ FN(TCP_ABORT_ON_CLOSE) \ FN(TCP_ABORT_ON_LINGER) \ FN(TCP_ABORT_ON_MEMORY) \ + FN(TCP_STATE) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ @@ -102,6 +103,11 @@ enum sk_rst_reason { * corresponding to LINUX_MIB_TCPABORTONMEMORY */ SK_RST_REASON_TCP_ABORT_ON_MEMORY, + /** + * @SK_RST_REASON_TCP_STATE: abort on tcp state + * Please see RFC 9293 for all possible reset conditions + */ + SK_RST_REASON_TCP_STATE, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to -- cgit v1.2.3 From 0a399892a596055c8f069a17b4775fe5ab66d32a Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 2 Aug 2024 18:21:10 +0800 Subject: tcp: rstreason: introduce SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT for active reset Introducing this to show the users the reason of keepalive timeout. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/rstreason.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/rstreason.h b/include/net/rstreason.h index bbf20d0bbde7..9c0c46df0e73 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -21,6 +21,7 @@ FN(TCP_ABORT_ON_LINGER) \ FN(TCP_ABORT_ON_MEMORY) \ FN(TCP_STATE) \ + FN(TCP_KEEPALIVE_TIMEOUT) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ @@ -108,6 +109,12 @@ enum sk_rst_reason { * Please see RFC 9293 for all possible reset conditions */ SK_RST_REASON_TCP_STATE, + /** + * @SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT: time to timeout + * When we have already run out of all the chances, which means + * keepalive timeout, we have to reset the connection + */ + SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to -- cgit v1.2.3 From c026c6562f86b24dd2dfef501fb1e64cc3884a79 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 2 Aug 2024 18:21:11 +0800 Subject: tcp: rstreason: introduce SK_RST_REASON_TCP_DISCONNECT_WITH_DATA for active reset When user tries to disconnect a socket and there are more data written into tcp write queue, we should tell users about this reset reason. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/rstreason.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/rstreason.h b/include/net/rstreason.h index 9c0c46df0e73..69cb2e52b7da 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -22,6 +22,7 @@ FN(TCP_ABORT_ON_MEMORY) \ FN(TCP_STATE) \ FN(TCP_KEEPALIVE_TIMEOUT) \ + FN(TCP_DISCONNECT_WITH_DATA) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ @@ -115,6 +116,13 @@ enum sk_rst_reason { * keepalive timeout, we have to reset the connection */ SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT, + /** + * @SK_RST_REASON_TCP_DISCONNECT_WITH_DATA: disconnect when write + * queue is not empty + * It means user has written data into the write queue when doing + * disconnecting, so we have to send an RST. + */ + SK_RST_REASON_TCP_DISCONNECT_WITH_DATA, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to -- cgit v1.2.3 From ebaa86c0bddd2c47c516bf2096b17c0bed71d914 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 7 Aug 2024 11:22:59 +0200 Subject: ALSA: usb-audio: Update UMP group attributes for GTB blocks, too When a FB is created from a GTB instead of UMP FB Info inquiry, we missed the update of the corresponding UMP Group attributes. Export the call of updater and let it be called from the USB driver. Fixes: 0642a3c5cacc ("ALSA: ump: Update substream name from assigned FB names") Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240807092303.1935-5-tiwai@suse.de --- include/sound/ump.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/ump.h b/include/sound/ump.h index 7484f62fb234..532c2c3ea28e 100644 --- a/include/sound/ump.h +++ b/include/sound/ump.h @@ -123,6 +123,7 @@ static inline int snd_ump_attach_legacy_rawmidi(struct snd_ump_endpoint *ump, int snd_ump_receive_ump_val(struct snd_ump_endpoint *ump, u32 val); int snd_ump_switch_protocol(struct snd_ump_endpoint *ump, unsigned int protocol); +void snd_ump_update_group_attrs(struct snd_ump_endpoint *ump); /* * Some definitions for UMP -- cgit v1.2.3 From 97edbc02b2efdb0cd0f507b6a45fc509acc861bb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 10 Jul 2024 14:51:11 -0400 Subject: buffer: Convert block_write_end() to take a folio All callers now have a folio, so pass it in instead of converting from a folio to a page and back to a folio again. Saves a call to compound_head(). Reviewed-by: Josef Bacik Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Christian Brauner --- include/linux/buffer_head.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 14acf1bbe0ce..3a3fec154536 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -262,8 +262,8 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, int __block_write_begin(struct page *page, loff_t pos, unsigned len, get_block_t *get_block); int block_write_end(struct file *, struct address_space *, - loff_t, unsigned, unsigned, - struct page *, void *); + loff_t, unsigned len, unsigned copied, + struct folio *, void *); int generic_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); -- cgit v1.2.3 From a225800f322a3d6cc8b8b6c7dc4d5281f2f5375b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 10 Jul 2024 15:45:32 -0400 Subject: fs: Convert aops->write_end to take a folio Most callers have a folio, and most implementations operate on a folio, so remove the conversion from folio->page->folio to fit through this interface. Reviewed-by: Josef Bacik Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Christian Brauner --- include/linux/buffer_head.h | 4 ++-- include/linux/fs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 3a3fec154536..165e859664a5 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -265,8 +265,8 @@ int block_write_end(struct file *, struct address_space *, loff_t, unsigned len, unsigned copied, struct folio *, void *); int generic_write_end(struct file *, struct address_space *, - loff_t, unsigned, unsigned, - struct page *, void *); + loff_t, unsigned len, unsigned copied, + struct folio *, void *); void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to); int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, struct page **, void **, diff --git a/include/linux/fs.h b/include/linux/fs.h index fd34b5755c0b..1af3373c7cd2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -411,7 +411,7 @@ struct address_space_operations { struct page **pagep, void **fsdata); int (*write_end)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata); + struct folio *folio, void *fsdata); /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); -- cgit v1.2.3 From 1da86618bdce301d23e89ecce92161f9d3b3c5e7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 15 Jul 2024 14:24:01 -0400 Subject: fs: Convert aops->write_begin to take a folio Convert all callers from working on a page to working on one page of a folio (support for working on an entire folio can come later). Removes a lot of folio->page->folio conversions. Reviewed-by: Josef Bacik Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Christian Brauner --- include/linux/buffer_head.h | 4 ++-- include/linux/fs.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 165e859664a5..254563a2a9de 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -258,7 +258,7 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio, int block_read_full_folio(struct folio *, get_block_t *); bool block_is_partially_uptodate(struct folio *, size_t from, size_t count); int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, get_block_t *get_block); + struct folio **foliop, get_block_t *get_block); int __block_write_begin(struct page *page, loff_t pos, unsigned len, get_block_t *get_block); int block_write_end(struct file *, struct address_space *, @@ -269,7 +269,7 @@ int generic_write_end(struct file *, struct address_space *, struct folio *, void *); void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to); int cont_write_begin(struct file *, struct address_space *, loff_t, - unsigned, struct page **, void **, + unsigned, struct folio **, void **, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); void block_commit_write(struct page *page, unsigned int from, unsigned int to); diff --git a/include/linux/fs.h b/include/linux/fs.h index 1af3373c7cd2..7a79b88a8f5a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -408,7 +408,7 @@ struct address_space_operations { int (*write_begin)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata); + struct folio **foliop, void **fsdata); int (*write_end)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct folio *folio, void *fsdata); @@ -3331,7 +3331,7 @@ extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); extern int simple_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata); + struct folio **foliop, void **fsdata); extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); -- cgit v1.2.3 From 9f04609f74ec7a439e1ac42da5db9e6ddf4f7b13 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 10 Jul 2024 23:09:04 -0400 Subject: buffer: Convert __block_write_begin() to take a folio Almost all callers have a folio now, so change __block_write_begin() to take a folio and remove a call to compound_head(). Reviewed-by: Josef Bacik Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Christian Brauner --- include/linux/buffer_head.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 254563a2a9de..c85e65fd4cdc 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -259,7 +259,7 @@ int block_read_full_folio(struct folio *, get_block_t *); bool block_is_partially_uptodate(struct folio *, size_t from, size_t count); int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, get_block_t *get_block); -int __block_write_begin(struct page *page, loff_t pos, unsigned len, +int __block_write_begin(struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block); int block_write_end(struct file *, struct address_space *, loff_t, unsigned len, unsigned copied, -- cgit v1.2.3 From 07a83512fa5be3f3d46369eee6352102fd9fb505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Wed, 10 Jul 2024 11:36:08 +0100 Subject: usb: typec: tcpci: fix a comment typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit autonously -> autonomously Signed-off-by: André Draszik Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240710-tcpc-cleanup-v1-1-0ec1f41f4263@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 0ab39b6ea205..d27fe0c22a8b 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -190,7 +190,7 @@ struct tcpci; * Optional; Callback to perform chip specific operations when FRS * is sourcing vbus. * @auto_discharge_disconnect: - * Optional; Enables TCPC to autonously discharge vbus on disconnect. + * Optional; Enables TCPC to autonomously discharge vbus on disconnect. * @vbus_vsafe0v: * optional; Set when TCPC can detect whether vbus is at VSAFE0V. * @set_partner_usb_comm_capable: -- cgit v1.2.3 From f523aa6d72598bcd2946c7e02d29f33f94806a15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Wed, 10 Jul 2024 11:36:10 +0100 Subject: usb: typec: tcpci: use GENMASK() for TCPC_CC_STATUS_CC[12] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing code here, particularly in maxim_contaminant.c, is arguably quite hard to read due to the open-coded masking and shifting spanning multiple lines. Use GENMASK() and FIELD_GET() instead, which arguably make the code much easier to follow. While at it, use the symbolic name TCPC_CC_STATE_SRC_OPEN for one instance of open-coded 0x0. Signed-off-by: André Draszik Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240710-tcpc-cleanup-v1-3-0ec1f41f4263@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpci.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index d27fe0c22a8b..31d21ccf662b 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -92,11 +92,9 @@ #define TCPC_CC_STATUS_TERM BIT(4) #define TCPC_CC_STATUS_TERM_RP 0 #define TCPC_CC_STATUS_TERM_RD 1 +#define TCPC_CC_STATUS_CC2 GENMASK(3, 2) +#define TCPC_CC_STATUS_CC1 GENMASK(1, 0) #define TCPC_CC_STATE_SRC_OPEN 0 -#define TCPC_CC_STATUS_CC2_SHIFT 2 -#define TCPC_CC_STATUS_CC2_MASK 0x3 -#define TCPC_CC_STATUS_CC1_SHIFT 0 -#define TCPC_CC_STATUS_CC1_MASK 0x3 #define TCPC_POWER_STATUS 0x1e #define TCPC_POWER_STATUS_DBG_ACC_CON BIT(7) @@ -256,7 +254,7 @@ static inline enum typec_cc_status tcpci_to_typec_cc(unsigned int cc, bool sink) if (sink) return TYPEC_CC_RP_3_0; fallthrough; - case 0x0: + case TCPC_CC_STATE_SRC_OPEN: default: return TYPEC_CC_OPEN; } -- cgit v1.2.3 From 83b254c13ac093810e1058d9cb1bbb4b7ef9c246 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Wed, 10 Jul 2024 11:36:11 +0100 Subject: usb: typec: tcpci: use GENMASK() for TCPC_ROLE_CTRL_CC[12] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All this open-coded shifting and masking is quite hard to read, in particular the if-statement in tcpci_apply_rc(). Declare TCPC_ROLE_CTRL_CC[12] using GENMASK() which allows using FIELD_GET() and FIELD_PREP() to arguably make the code more legible. Signed-off-by: André Draszik Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240710-tcpc-cleanup-v1-4-0ec1f41f4263@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpci.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 31d21ccf662b..552d074429f0 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -68,10 +68,8 @@ #define TCPC_ROLE_CTRL_RP_VAL_DEF 0x0 #define TCPC_ROLE_CTRL_RP_VAL_1_5 0x1 #define TCPC_ROLE_CTRL_RP_VAL_3_0 0x2 -#define TCPC_ROLE_CTRL_CC2_SHIFT 2 -#define TCPC_ROLE_CTRL_CC2_MASK 0x3 -#define TCPC_ROLE_CTRL_CC1_SHIFT 0 -#define TCPC_ROLE_CTRL_CC1_MASK 0x3 +#define TCPC_ROLE_CTRL_CC2 GENMASK(3, 2) +#define TCPC_ROLE_CTRL_CC1 GENMASK(1, 0) #define TCPC_ROLE_CTRL_CC_RA 0x0 #define TCPC_ROLE_CTRL_CC_RP 0x1 #define TCPC_ROLE_CTRL_CC_RD 0x2 @@ -176,8 +174,7 @@ #define tcpc_presenting_rd(reg, cc) \ (!(TCPC_ROLE_CTRL_DRP & (reg)) && \ - (((reg) & (TCPC_ROLE_CTRL_## cc ##_MASK << TCPC_ROLE_CTRL_## cc ##_SHIFT)) == \ - (TCPC_ROLE_CTRL_CC_RD << TCPC_ROLE_CTRL_## cc ##_SHIFT))) + FIELD_GET(TCPC_ROLE_CTRL_## cc, reg) == TCPC_ROLE_CTRL_CC_RD) struct tcpci; -- cgit v1.2.3 From 46b1e0f87ba89f0a06ffbde5fe8d13c5af93f94e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Wed, 10 Jul 2024 11:36:12 +0100 Subject: usb: typec: tcpci: use GENMASK() for TCPC_ROLE_CTRL_RP_VAL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align the last remaining field TCPC_ROLE_CTRL_RP_VAL with the other fields in the TCPC_ROLE_CTRL register by using GENMASK() and FIELD_PREP(). Signed-off-by: André Draszik Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240710-tcpc-cleanup-v1-5-0ec1f41f4263@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpci.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 552d074429f0..80652d4f722e 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -63,8 +63,7 @@ #define TCPC_ROLE_CTRL 0x1a #define TCPC_ROLE_CTRL_DRP BIT(6) -#define TCPC_ROLE_CTRL_RP_VAL_SHIFT 4 -#define TCPC_ROLE_CTRL_RP_VAL_MASK 0x3 +#define TCPC_ROLE_CTRL_RP_VAL GENMASK(5, 4) #define TCPC_ROLE_CTRL_RP_VAL_DEF 0x0 #define TCPC_ROLE_CTRL_RP_VAL_1_5 0x1 #define TCPC_ROLE_CTRL_RP_VAL_3_0 0x2 -- cgit v1.2.3 From aee4568f42e0d7526207a10944bb89bb45522b59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Wed, 10 Jul 2024 11:36:13 +0100 Subject: usb: typec: tcpci: use GENMASK() for TCPC_MSG_HDR_INFO_REV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert field TCPC_MSG_HDR_INFO_REV from register TCPC_MSG_HDR_INFO to using GENMASK() and FIELD_PREP() so as to keep using a similar approach for all fields. Signed-off-by: André Draszik Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240710-tcpc-cleanup-v1-6-0ec1f41f4263@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpci.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 80652d4f722e..3cd61e9f73b3 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -129,9 +129,8 @@ #define TCPC_MSG_HDR_INFO 0x2e #define TCPC_MSG_HDR_INFO_DATA_ROLE BIT(3) +#define TCPC_MSG_HDR_INFO_REV GENMASK(2, 1) #define TCPC_MSG_HDR_INFO_PWR_ROLE BIT(0) -#define TCPC_MSG_HDR_INFO_REV_SHIFT 1 -#define TCPC_MSG_HDR_INFO_REV_MASK 0x3 #define TCPC_RX_DETECT 0x2f #define TCPC_RX_DETECT_HARD_RESET BIT(5) -- cgit v1.2.3 From 7cd41974d2c81055f61ba9f69e31c02e866716e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Wed, 10 Jul 2024 11:36:14 +0100 Subject: usb: typec: tcpci: use GENMASK() for TCPC_TRANSMIT register fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert all fields from register TCPC_TRANSMIT to using GENMASK() and FIELD_PREP() so as to keep using a similar approach throughout the code base and make it arguably easier to read. Signed-off-by: André Draszik Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240710-tcpc-cleanup-v1-7-0ec1f41f4263@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpci.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 3cd61e9f73b3..f7f5cfbdef12 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -148,10 +148,8 @@ #define TCPC_RX_DATA 0x34 /* through 0x4f */ #define TCPC_TRANSMIT 0x50 -#define TCPC_TRANSMIT_RETRY_SHIFT 4 -#define TCPC_TRANSMIT_RETRY_MASK 0x3 -#define TCPC_TRANSMIT_TYPE_SHIFT 0 -#define TCPC_TRANSMIT_TYPE_MASK 0x7 +#define TCPC_TRANSMIT_RETRY GENMASK(5, 4) +#define TCPC_TRANSMIT_TYPE GENMASK(2, 0) #define TCPC_TX_BYTE_CNT 0x51 #define TCPC_TX_HDR 0x52 -- cgit v1.2.3 From 08d6fd691bdd5d6e7881ce42ca6774ed92b10896 Mon Sep 17 00:00:00 2001 From: Akash Kumar Date: Thu, 1 Aug 2024 11:00:03 +0530 Subject: usb: gadget: Increase max configuration interface to 32 Currently, max configuration interfaces are limited to 16, which fails for compositions containing 10 UVC configurations with interrupt ep disabled along with other configurations , and we see bind failures while allocating interface ID in uvc bind. Increase max configuration interface to 32 to support any large compositions limited to same size as usb device endpoints as interfaces cannot be more than endpoints. Signed-off-by: Akash Kumar Link: https://lore.kernel.org/r/20240801053003.15153-1-quic_akakum@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index af3cd2aae4bc..6e38fb9d2117 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -256,7 +256,7 @@ int config_ep_by_speed(struct usb_gadget *g, struct usb_function *f, struct usb_ep *_ep); int usb_func_wakeup(struct usb_function *func); -#define MAX_CONFIG_INTERFACES 16 /* arbitrary; max 255 */ +#define MAX_CONFIG_INTERFACES 32 /** * struct usb_configuration - represents one gadget configuration -- cgit v1.2.3 From 259b46204885431f2853afa2a2bffa63f3705e67 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 5 Aug 2024 12:20:37 +0200 Subject: serial: remove quot_frac from serial8250_do_set_divisor() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit quot_frac is unused in serial8250_do_set_divisor() since commit b2b4b8ed3c06 (serial: 8250_exar: Move custom divisor support out from 8250_port). So no point to pass it. Signed-off-by: Jiri Slaby (SUSE) Cc: Ilpo Järvinen Cc: Andy Shevchenko Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240805102046.307511-5-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index fd59ed2cca53..e0717c8393d7 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -193,7 +193,7 @@ void serial8250_do_pm(struct uart_port *port, unsigned int state, unsigned int oldstate); void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl); void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud, - unsigned int quot, unsigned int quot_frac); + unsigned int quot); int fsl8250_handle_irq(struct uart_port *port); int serial8250_handle_irq(struct uart_port *port, unsigned int iir); u16 serial8250_rx_chars(struct uart_8250_port *up, u16 lsr); -- cgit v1.2.3 From 3dc73106ffc47640e692b9b32ebfd59d776c07fd Mon Sep 17 00:00:00 2001 From: Varshini Rajendran Date: Mon, 29 Jul 2024 12:38:03 +0530 Subject: dt-bindings: clock: at91: Allow PLLs to be exported and referenced in DT Allow PLLADIV2 and LVDSPLL to be referenced as a PMC_TYPE_CORE clock from phandle in DT for sam9x7 SoC family. Signed-off-by: Varshini Rajendran Acked-by: Rob Herring Reviewed-by: Claudiu Beznea Link: https://lore.kernel.org/r/20240729070803.1990916-1-varshini.rajendran@microchip.com Signed-off-by: Claudiu Beznea --- include/dt-bindings/clock/at91.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/at91.h b/include/dt-bindings/clock/at91.h index 3e3972a814c1..6ede88c3992d 100644 --- a/include/dt-bindings/clock/at91.h +++ b/include/dt-bindings/clock/at91.h @@ -38,6 +38,10 @@ #define PMC_CPU (PMC_MAIN + 9) #define PMC_MCK1 (PMC_MAIN + 10) +/* SAM9X7 */ +#define PMC_PLLADIV2 (PMC_MAIN + 11) +#define PMC_LVDSPLL (PMC_MAIN + 12) + #ifndef AT91_PMC_MOSCS #define AT91_PMC_MOSCS 0 /* MOSCS Flag */ #define AT91_PMC_LOCKA 1 /* PLLA Lock */ -- cgit v1.2.3 From 130fd056dd82b02db9a661c013071af35309be1a Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Mon, 10 Jun 2024 20:20:16 +0100 Subject: sched/rt: Clean up usage of rt_task() rt_task() checks if a task has RT priority. But depends on your dictionary, this could mean it belongs to RT class, or is a 'realtime' task, which includes RT and DL classes. Since this has caused some confusion already on discussion [1], it seemed a clean up is due. I define the usage of rt_task() to be tasks that belong to RT class. Make sure that it returns true only for RT class and audit the users and replace the ones required the old behavior with the new realtime_task() which returns true for RT and DL classes. Introduce similar realtime_prio() to create similar distinction to rt_prio() and update the users that required the old behavior to use the new function. Move MAX_DL_PRIO to prio.h so it can be used in the new definitions. Document the functions to make it more obvious what is the difference between them. PI-boosted tasks is a factor that must be taken into account when choosing which function to use. Rename task_is_realtime() to realtime_task_policy() as the old name is confusing against the new realtime_task(). No functional changes were intended. [1] https://lore.kernel.org/lkml/20240506100509.GL40213@noisy.programming.kicks-ass.net/ Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Phil Auld Reviewed-by: "Steven Rostedt (Google)" Reviewed-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20240610192018.1567075-2-qyousef@layalina.io --- include/linux/ioprio.h | 2 +- include/linux/sched/deadline.h | 6 ++++-- include/linux/sched/prio.h | 1 + include/linux/sched/rt.h | 27 ++++++++++++++++++++++++++- 4 files changed, 32 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index db1249cd9692..75859b78d540 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -40,7 +40,7 @@ static inline int task_nice_ioclass(struct task_struct *task) { if (task->policy == SCHED_IDLE) return IOPRIO_CLASS_IDLE; - else if (task_is_realtime(task)) + else if (realtime_task_policy(task)) return IOPRIO_CLASS_RT; else return IOPRIO_CLASS_BE; diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index df3aca89d4f5..5cb88b748ad6 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -10,8 +10,6 @@ #include -#define MAX_DL_PRIO 0 - static inline int dl_prio(int prio) { if (unlikely(prio < MAX_DL_PRIO)) @@ -19,6 +17,10 @@ static inline int dl_prio(int prio) return 0; } +/* + * Returns true if a task has a priority that belongs to DL class. PI-boosted + * tasks will return true. Use dl_policy() to ignore PI-boosted tasks. + */ static inline int dl_task(struct task_struct *p) { return dl_prio(p->prio); diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index ab83d85e1183..6ab43b4f72f9 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -14,6 +14,7 @@ */ #define MAX_RT_PRIO 100 +#define MAX_DL_PRIO 0 #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index b2b9e6eb9683..a055dd68a77c 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -7,18 +7,43 @@ struct task_struct; static inline int rt_prio(int prio) +{ + if (unlikely(prio < MAX_RT_PRIO && prio >= MAX_DL_PRIO)) + return 1; + return 0; +} + +static inline int realtime_prio(int prio) { if (unlikely(prio < MAX_RT_PRIO)) return 1; return 0; } +/* + * Returns true if a task has a priority that belongs to RT class. PI-boosted + * tasks will return true. Use rt_policy() to ignore PI-boosted tasks. + */ static inline int rt_task(struct task_struct *p) { return rt_prio(p->prio); } -static inline bool task_is_realtime(struct task_struct *tsk) +/* + * Returns true if a task has a priority that belongs to RT or DL classes. + * PI-boosted tasks will return true. Use realtime_task_policy() to ignore + * PI-boosted tasks. + */ +static inline int realtime_task(struct task_struct *p) +{ + return realtime_prio(p->prio); +} + +/* + * Returns true if a task has a policy that belongs to RT or DL classes. + * PI-boosted tasks will return false. + */ +static inline bool realtime_task_policy(struct task_struct *tsk) { int policy = tsk->policy; -- cgit v1.2.3 From b166af3db70fdcecf125662a2360471bb20be203 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Mon, 10 Jun 2024 20:20:17 +0100 Subject: sched/rt, dl: Convert functions to return bool {rt, realtime, dl}_{task, prio}() functions' return value is actually a bool. Convert their return type to reflect that. Suggested-by: "Steven Rostedt (Google)" Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Sebastian Andrzej Siewior Reviewed-by: "Steven Rostedt (Google)" Reviewed-by: Metin Kaya Link: https://lore.kernel.org/r/20240610192018.1567075-3-qyousef@layalina.io --- include/linux/sched/deadline.h | 8 +++----- include/linux/sched/rt.h | 16 ++++++---------- 2 files changed, 9 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 5cb88b748ad6..3a912ab42bb5 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -10,18 +10,16 @@ #include -static inline int dl_prio(int prio) +static inline bool dl_prio(int prio) { - if (unlikely(prio < MAX_DL_PRIO)) - return 1; - return 0; + return unlikely(prio < MAX_DL_PRIO); } /* * Returns true if a task has a priority that belongs to DL class. PI-boosted * tasks will return true. Use dl_policy() to ignore PI-boosted tasks. */ -static inline int dl_task(struct task_struct *p) +static inline bool dl_task(struct task_struct *p) { return dl_prio(p->prio); } diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index a055dd68a77c..91ef1ef2019f 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -6,25 +6,21 @@ struct task_struct; -static inline int rt_prio(int prio) +static inline bool rt_prio(int prio) { - if (unlikely(prio < MAX_RT_PRIO && prio >= MAX_DL_PRIO)) - return 1; - return 0; + return unlikely(prio < MAX_RT_PRIO && prio >= MAX_DL_PRIO); } -static inline int realtime_prio(int prio) +static inline bool realtime_prio(int prio) { - if (unlikely(prio < MAX_RT_PRIO)) - return 1; - return 0; + return unlikely(prio < MAX_RT_PRIO); } /* * Returns true if a task has a priority that belongs to RT class. PI-boosted * tasks will return true. Use rt_policy() to ignore PI-boosted tasks. */ -static inline int rt_task(struct task_struct *p) +static inline bool rt_task(struct task_struct *p) { return rt_prio(p->prio); } @@ -34,7 +30,7 @@ static inline int rt_task(struct task_struct *p) * PI-boosted tasks will return true. Use realtime_task_policy() to ignore * PI-boosted tasks. */ -static inline int realtime_task(struct task_struct *p) +static inline bool realtime_task(struct task_struct *p) { return realtime_prio(p->prio); } -- cgit v1.2.3 From ae04f69de0bef93c7086cf2983dbc8e8fd624ebe Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Mon, 10 Jun 2024 20:20:18 +0100 Subject: sched/rt: Rename realtime_{prio, task}() to rt_or_dl_{prio, task}() Some find the name realtime overloaded. Use rt_or_dl() as an alternative, hopefully better, name. Suggested-by: Daniel Bristot de Oliveira Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240610192018.1567075-4-qyousef@layalina.io --- include/linux/ioprio.h | 2 +- include/linux/sched/rt.h | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 75859b78d540..b25377b6ea98 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -40,7 +40,7 @@ static inline int task_nice_ioclass(struct task_struct *task) { if (task->policy == SCHED_IDLE) return IOPRIO_CLASS_IDLE; - else if (realtime_task_policy(task)) + else if (rt_or_dl_task_policy(task)) return IOPRIO_CLASS_RT; else return IOPRIO_CLASS_BE; diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 91ef1ef2019f..4e3338103654 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -11,7 +11,7 @@ static inline bool rt_prio(int prio) return unlikely(prio < MAX_RT_PRIO && prio >= MAX_DL_PRIO); } -static inline bool realtime_prio(int prio) +static inline bool rt_or_dl_prio(int prio) { return unlikely(prio < MAX_RT_PRIO); } @@ -27,19 +27,19 @@ static inline bool rt_task(struct task_struct *p) /* * Returns true if a task has a priority that belongs to RT or DL classes. - * PI-boosted tasks will return true. Use realtime_task_policy() to ignore + * PI-boosted tasks will return true. Use rt_or_dl_task_policy() to ignore * PI-boosted tasks. */ -static inline bool realtime_task(struct task_struct *p) +static inline bool rt_or_dl_task(struct task_struct *p) { - return realtime_prio(p->prio); + return rt_or_dl_prio(p->prio); } /* * Returns true if a task has a policy that belongs to RT or DL classes. * PI-boosted tasks will return false. */ -static inline bool realtime_task_policy(struct task_struct *tsk) +static inline bool rt_or_dl_task_policy(struct task_struct *tsk) { int policy = tsk->policy; -- cgit v1.2.3 From 6e2fdceffdc6bd7b8ba314a1d1b976721533c8f9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 26 Jul 2024 14:42:08 -0400 Subject: tracing: Use refcount for trace_event_file reference counter Instead of using an atomic counter for the trace_event_file reference counter, use the refcount interface. It has various checks to make sure the reference counting is correct, and will warn if it detects an error (like refcount_inc() on '0'). Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20240726144208.687cce24@rorschach.local.home Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 9df3e2973626..fed58e54f15e 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -680,7 +680,7 @@ struct trace_event_file { * caching and such. Which is mostly OK ;-) */ unsigned long flags; - atomic_t ref; /* ref count for opened files */ + refcount_t ref; /* ref count for opened files */ atomic_t sm_ref; /* soft-mode reference counter */ atomic_t tm_ref; /* trigger-mode reference counter */ }; -- cgit v1.2.3 From d507ae0dc83b7f43cdf6760b8f1a30aac4fc405a Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 2 Aug 2024 11:13:19 +0530 Subject: drm/buddy: Add start address support to trim function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add a new start parameter in trim function to specify exact address from where to start the trimming. This would help us in situations like if drivers would like to do address alignment for specific requirements. - Add a new flag DRM_BUDDY_TRIM_DISABLE. Drivers can use this flag to disable the allocator trimming part. This patch enables the drivers control trimming and they can do it themselves based on the application requirements. v1:(Matthew) - check new_start alignment with min chunk_size - use range_overflows() Signed-off-by: Arunpravin Paneer Selvam Acked-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit db65eb46de135338d6177f8853e0fd208f19d63e) --- include/drm/drm_buddy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 2a74fa9d0ce5..9689a7c5dd36 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -27,6 +27,7 @@ #define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) #define DRM_BUDDY_CLEAR_ALLOCATION BIT(3) #define DRM_BUDDY_CLEARED BIT(4) +#define DRM_BUDDY_TRIM_DISABLE BIT(5) struct drm_buddy_block { #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) @@ -155,6 +156,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned long flags); int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks); -- cgit v1.2.3 From c2c0b67dca3cb3b3cea0dd60075a1c5ba77e2fcd Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 7 Aug 2024 17:02:32 +0200 Subject: ASoC: tas2781-i2c: Drop weird GPIO code The tas2781-i2c driver gets an IRQ from either ACPI or device tree, then proceeds to check if the IRQ has a corresponding GPIO and in case it does enforce the GPIO as input and set a label on it. This is abuse of the API: - First we cannot guarantee that the numberspaces of the GPIOs and the IRQs are the same, i.e that an IRQ number corresponds to a GPIO number like that. - Second, GPIO chips and IRQ chips should be treated as orthogonal APIs, the irqchip needs to ascertain that the backing GPIO line is set to input etc just using the irqchip. - Third it is using the legacy API which should not be used in new code yet this was added just a year ago. Delete the offending code. If this creates problems the GPIO and irqchip maintainers can help to fix the issues. It *should* not create any problems, because the irq isn't used anywhere in the driver, it's just obtained and then left unused. Fixes: ef3bcde75d06 ("ASoC: tas2781: Add tas2781 driver") Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20240807-asoc-tas-gpios-v2-1-bd0f2705d58b@linaro.org Signed-off-by: Mark Brown --- include/sound/tas2781.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index 18161d02a96f..dbda552398b5 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -81,11 +81,6 @@ struct tasdevice { bool is_loaderr; }; -struct tasdevice_irqinfo { - int irq_gpio; - int irq; -}; - struct calidata { unsigned char *data; unsigned long total_sz; @@ -93,7 +88,6 @@ struct calidata { struct tasdevice_priv { struct tasdevice tasdevice[TASDEVICE_MAX_CHANNELS]; - struct tasdevice_irqinfo irq_info; struct tasdevice_rca rcabin; struct calidata cali_data; struct tasdevice_fw *fmw; @@ -115,6 +109,7 @@ struct tasdevice_priv { unsigned int chip_id; unsigned int sysclk; + int irq; int cur_prog; int cur_conf; int fw_state; -- cgit v1.2.3 From 58f7e4d7ba32758b861807e77535853cacc1f426 Mon Sep 17 00:00:00 2001 From: Jianhui Zhou <912460177@qq.com> Date: Mon, 5 Aug 2024 19:36:31 +0800 Subject: ring-buffer: Remove unused function ring_buffer_nr_pages() Because ring_buffer_nr_pages() is not an inline function and user accesses buffer->buffers[cpu]->nr_pages directly, the function ring_buffer_nr_pages is removed. Signed-off-by: Jianhui Zhou <912460177@qq.com> Link: https://lore.kernel.org/tencent_F4A7E9AB337F44E0F4B858D07D19EF460708@qq.com Signed-off-by: Steven Rostedt (Google) --- include/linux/ring_buffer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 96d2140b471e..fd35d4ec12e1 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -193,7 +193,6 @@ void ring_buffer_set_clock(struct trace_buffer *buffer, void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs); bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer); -size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu); size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu); struct buffer_data_read_page; -- cgit v1.2.3 From b426b3ba9f6fe17990893c5324727dd217d420b6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 7 Aug 2024 15:34:00 +0200 Subject: ALSA: vx_core: Drop unused dev field The vx_core.dev field has never been set but referred incorrectly at firmware loading. Pass the proper device pointer from card->dev at request_firmware(), and drop the unused dev field from vx_core, too. Reviewed-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240807133452.9424-11-tiwai@suse.de --- include/sound/vx_core.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/sound/vx_core.h b/include/sound/vx_core.h index 1ddd3036bdfc..ca87fa6a8135 100644 --- a/include/sound/vx_core.h +++ b/include/sound/vx_core.h @@ -155,7 +155,6 @@ struct vx_core { unsigned int chip_status; unsigned int pcm_running; - struct device *dev; struct snd_hwdep *hwdep; struct vx_rmh irq_rmh; /* RMH used in interrupts */ -- cgit v1.2.3 From 7f7eff209ee283eec9ae56e9c1446dd1ac3e1022 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 7 Aug 2024 15:34:14 +0200 Subject: ALSA: es1688: Use standard print API Use the standard print API with dev_*() instead of the old house-baked one. It gives better information and allows dynamically control of debug prints. For referring to the device, introduce snd_card pointer to struct snd_es1688. Reviewed-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240807133452.9424-25-tiwai@suse.de --- include/sound/es1688.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/es1688.h b/include/sound/es1688.h index 099569c31fbb..425a3717d77a 100644 --- a/include/sound/es1688.h +++ b/include/sound/es1688.h @@ -17,6 +17,7 @@ #define ES1688_HW_UNDEF 0x0003 struct snd_es1688 { + struct snd_card *card; unsigned long port; /* port of ESS chip */ struct resource *res_port; unsigned long mpu_port; /* MPU-401 port of ESS chip */ -- cgit v1.2.3 From 40b15de3c4f23994f53f930e94939c7b6a0cc52f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 7 Aug 2024 15:34:19 +0200 Subject: ALSA: opti9xx: Use standard print API Use the standard print API with dev_*() instead of the old house-baked one. It gives better information and allows dynamically control of debug prints. The card pointer is stored in struct snd_opti9xx and snd_miro to be referred for dev_*() calls. Reviewed-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240807133452.9424-30-tiwai@suse.de --- include/sound/aci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/aci.h b/include/sound/aci.h index 6ebbd4223f12..36a761c9820d 100644 --- a/include/sound/aci.h +++ b/include/sound/aci.h @@ -72,6 +72,7 @@ #define ACI_SET_EQ7 0x46 /* ... to Treble */ struct snd_miro_aci { + struct snd_card *card; unsigned long aci_port; int aci_vendor; int aci_product; -- cgit v1.2.3 From 8b4ac5429938dd5f1fbf2eea0687f08cbcccb6be Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 7 Aug 2024 15:34:25 +0200 Subject: ALSA: wavefront: Use standard print API Use the standard print API with dev_*() instead of the old house-baked one. It gives better information and allows dynamically control of debug prints. Reviewed-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240807133452.9424-36-tiwai@suse.de --- include/sound/snd_wavefront.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/sound/snd_wavefront.h b/include/sound/snd_wavefront.h index 55053557c898..27f7e8a477c2 100644 --- a/include/sound/snd_wavefront.h +++ b/include/sound/snd_wavefront.h @@ -137,8 +137,4 @@ extern int snd_wavefront_fx_ioctl (struct snd_hwdep *, extern int snd_wavefront_fx_open (struct snd_hwdep *, struct file *); extern int snd_wavefront_fx_release (struct snd_hwdep *, struct file *); -/* prefix in all snd_printk() delivered messages */ - -#define LOGNAME "WaveFront: " - #endif /* __SOUND_SND_WAVEFRONT_H__ */ -- cgit v1.2.3 From fea1510719dd2a33e0ffa1eae6118c5437f7071d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 7 Aug 2024 15:34:33 +0200 Subject: ALSA: emux: Use standard print API Use the standard print API with dev_*() instead of the old house-baked one. It gives better information and allows dynamically control of debug prints. Some functions are changed to receive snd_card object for calling dev_*() functions, too. Reviewed-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240807133452.9424-44-tiwai@suse.de --- include/sound/soundfont.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/soundfont.h b/include/sound/soundfont.h index 98ed98d89d6d..8a40cc15f66d 100644 --- a/include/sound/soundfont.h +++ b/include/sound/soundfont.h @@ -86,9 +86,11 @@ struct snd_sf_list { }; /* Prototypes for soundfont.c */ -int snd_soundfont_load(struct snd_sf_list *sflist, const void __user *data, +int snd_soundfont_load(struct snd_card *card, + struct snd_sf_list *sflist, const void __user *data, long count, int client); -int snd_soundfont_load_guspatch(struct snd_sf_list *sflist, const char __user *data, +int snd_soundfont_load_guspatch(struct snd_card *card, + struct snd_sf_list *sflist, const char __user *data, long count); int snd_soundfont_close_check(struct snd_sf_list *sflist, int client); -- cgit v1.2.3 From 504dc9f5e62e3beef2554c453576e84993053647 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 7 Aug 2024 15:34:44 +0200 Subject: ALSA: core: Drop snd_print stuff and co Now that all users of snd_print*() are gone, let's drop the functions completely. This also makes CONFIG_SND_VERBOSE_PRINTK redundant, and it's dropped, too. Reviewed-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240807133452.9424-55-tiwai@suse.de --- include/sound/core.h | 67 ---------------------------------------------------- 1 file changed, 67 deletions(-) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index dfef0c9d4b9f..23401aaf3dc2 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -345,45 +345,7 @@ void release_and_free_resource(struct resource *res); /* --- */ -/* sound printk debug levels */ -enum { - SND_PR_ALWAYS, - SND_PR_DEBUG, - SND_PR_VERBOSE, -}; - -#if defined(CONFIG_SND_DEBUG) || defined(CONFIG_SND_VERBOSE_PRINTK) -__printf(4, 5) -void __snd_printk(unsigned int level, const char *file, int line, - const char *format, ...); -#else -#define __snd_printk(level, file, line, format, ...) \ - printk(format, ##__VA_ARGS__) -#endif - -/** - * snd_printk - printk wrapper - * @fmt: format string - * - * Works like printk() but prints the file and the line of the caller - * when configured with CONFIG_SND_VERBOSE_PRINTK. - */ -#define snd_printk(fmt, ...) \ - __snd_printk(0, __FILE__, __LINE__, fmt, ##__VA_ARGS__) - #ifdef CONFIG_SND_DEBUG -/** - * snd_printd - debug printk - * @fmt: format string - * - * Works like snd_printk() for debugging purposes. - * Ignored when CONFIG_SND_DEBUG is not set. - */ -#define snd_printd(fmt, ...) \ - __snd_printk(1, __FILE__, __LINE__, fmt, ##__VA_ARGS__) -#define _snd_printd(level, fmt, ...) \ - __snd_printk(level, __FILE__, __LINE__, fmt, ##__VA_ARGS__) - /** * snd_BUG - give a BUG warning message and stack trace * @@ -392,12 +354,6 @@ void __snd_printk(unsigned int level, const char *file, int line, */ #define snd_BUG() WARN(1, "BUG?\n") -/** - * snd_printd_ratelimit - Suppress high rates of output when - * CONFIG_SND_DEBUG is enabled. - */ -#define snd_printd_ratelimit() printk_ratelimit() - /** * snd_BUG_ON - debugging check macro * @cond: condition to evaluate @@ -409,11 +365,6 @@ void __snd_printk(unsigned int level, const char *file, int line, #else /* !CONFIG_SND_DEBUG */ -__printf(1, 2) -static inline void snd_printd(const char *format, ...) {} -__printf(2, 3) -static inline void _snd_printd(int level, const char *format, ...) {} - #define snd_BUG() do { } while (0) #define snd_BUG_ON(condition) ({ \ @@ -421,26 +372,8 @@ static inline void _snd_printd(int level, const char *format, ...) {} unlikely(__ret_warn_on); \ }) -static inline bool snd_printd_ratelimit(void) { return false; } - #endif /* CONFIG_SND_DEBUG */ -#ifdef CONFIG_SND_DEBUG_VERBOSE -/** - * snd_printdd - debug printk - * @format: format string - * - * Works like snd_printk() for debugging purposes. - * Ignored when CONFIG_SND_DEBUG_VERBOSE is not set. - */ -#define snd_printdd(format, ...) \ - __snd_printk(2, __FILE__, __LINE__, format, ##__VA_ARGS__) -#else -__printf(1, 2) -static inline void snd_printdd(const char *format, ...) {} -#endif - - #define SNDRV_OSS_VERSION ((3<<16)|(8<<8)|(1<<4)|(0)) /* 3.8.1a */ /* for easier backward-porting */ -- cgit v1.2.3 From c772a2c690182410642ead740f7a84b3a7544b2b Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 1 Aug 2024 15:05:10 +0300 Subject: net/mlx5: Add IFC related stuff for data direct Add IFC related stuff for data direct. Signed-off-by: Yishai Hadas Link: https://patch.msgid.link/82da7f578a567909bb5858a64ba844fe4cc298fa.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 51 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cab228cf51c6..970c9d8473ef 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -313,6 +313,7 @@ enum { MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e, MLX5_CMD_OP_SYNC_CRYPTO = 0xb12, MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS = 0xb16, + MLX5_CMD_OPCODE_QUERY_VUID = 0xb22, MLX5_CMD_OP_MAX }; @@ -1885,7 +1886,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_5a0[0x10]; u8 enhanced_cqe_compression[0x1]; - u8 reserved_at_5b1[0x2]; + u8 reserved_at_5b1[0x1]; + u8 crossing_vhca_mkey[0x1]; u8 log_max_dek[0x5]; u8 reserved_at_5b8[0x4]; u8 mini_cqe_resp_stride_index[0x1]; @@ -1954,7 +1956,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 dynamic_msix_table_size[0xc]; u8 reserved_at_740[0xc]; u8 min_dynamic_vf_msix_table_size[0x4]; - u8 reserved_at_750[0x4]; + u8 reserved_at_750[0x2]; + u8 data_direct[0x1]; + u8 reserved_at_753[0x1]; u8 max_dynamic_vf_msix_table_size[0xc]; u8 reserved_at_760[0x3]; @@ -1982,7 +1986,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_0[0x80]; u8 migratable[0x1]; - u8 reserved_at_81[0x1f]; + u8 reserved_at_81[0x11]; + u8 query_vuid[0x1]; + u8 reserved_at_93[0xd]; u8 max_reformat_insert_size[0x8]; u8 max_reformat_insert_offset[0x8]; @@ -4154,6 +4160,7 @@ enum { MLX5_MKC_ACCESS_MODE_KSM = 0x3, MLX5_MKC_ACCESS_MODE_SW_ICM = 0x4, MLX5_MKC_ACCESS_MODE_MEMIC = 0x5, + MLX5_MKC_ACCESS_MODE_CROSSING = 0x6, }; struct mlx5_ifc_mkc_bits { @@ -4196,7 +4203,10 @@ struct mlx5_ifc_mkc_bits { u8 bsf_octword_size[0x20]; - u8 reserved_at_120[0x80]; + u8 reserved_at_120[0x60]; + + u8 crossing_target_vhca_id[0x10]; + u8 reserved_at_190[0x10]; u8 translations_octword_size[0x20]; @@ -5124,6 +5134,36 @@ struct mlx5_ifc_query_vport_state_out_bits { u8 state[0x4]; }; +struct mlx5_ifc_array1024_auto_bits { + u8 array1024_auto[32][0x20]; +}; + +struct mlx5_ifc_query_vuid_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x40]; + + u8 query_vfs_vuid[0x1]; + u8 data_direct[0x1]; + u8 reserved_at_62[0xe]; + u8 vhca_id[0x10]; +}; + +struct mlx5_ifc_query_vuid_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x1a0]; + + u8 reserved_at_1e0[0x10]; + u8 num_of_entries[0x10]; + + struct mlx5_ifc_array1024_auto_bits vuid[]; +}; + enum { MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT = 0x0, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT = 0x1, @@ -8989,7 +9029,8 @@ struct mlx5_ifc_create_mkey_in_bits { u8 pg_access[0x1]; u8 mkey_umem_valid[0x1]; - u8 reserved_at_62[0x1e]; + u8 data_direct[0x1]; + u8 reserved_at_63[0x1d]; struct mlx5_ifc_mkc_bits memory_key_mkey_entry; -- cgit v1.2.3 From abf3a3ea9acb5c886c8729191a670744ecd42024 Mon Sep 17 00:00:00 2001 From: David Virag Date: Tue, 6 Aug 2024 14:11:44 +0200 Subject: dt-bindings: clock: exynos7885: Fix duplicated binding The numbering in Exynos7885's FSYS CMU bindings has 4 duplicated by accident, with the rest of the bindings continuing with 5. Fix this by moving CLK_MOUT_FSYS_USB30DRD_USER to the end as 11. Since CLK_MOUT_FSYS_USB30DRD_USER is not used in any device tree as of now, and there are no other clocks affected (maybe apart from CLK_MOUT_FSYS_MMC_SDIO_USER which the number was shared with, also not used in a device tree), this is the least impactful way to solve this problem. Fixes: cd268e309c29 ("dt-bindings: clock: Add bindings for Exynos7885 CMU_FSYS") Cc: stable@vger.kernel.org Signed-off-by: David Virag Link: https://lore.kernel.org/r/20240806121157.479212-2-virag.david003@gmail.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/exynos7885.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/exynos7885.h b/include/dt-bindings/clock/exynos7885.h index 255e3aa94323..54cfccff8508 100644 --- a/include/dt-bindings/clock/exynos7885.h +++ b/include/dt-bindings/clock/exynos7885.h @@ -136,12 +136,12 @@ #define CLK_MOUT_FSYS_MMC_CARD_USER 2 #define CLK_MOUT_FSYS_MMC_EMBD_USER 3 #define CLK_MOUT_FSYS_MMC_SDIO_USER 4 -#define CLK_MOUT_FSYS_USB30DRD_USER 4 #define CLK_GOUT_MMC_CARD_ACLK 5 #define CLK_GOUT_MMC_CARD_SDCLKIN 6 #define CLK_GOUT_MMC_EMBD_ACLK 7 #define CLK_GOUT_MMC_EMBD_SDCLKIN 8 #define CLK_GOUT_MMC_SDIO_ACLK 9 #define CLK_GOUT_MMC_SDIO_SDCLKIN 10 +#define CLK_MOUT_FSYS_USB30DRD_USER 11 #endif /* _DT_BINDINGS_CLOCK_EXYNOS_7885_H */ -- cgit v1.2.3 From 59baa83e30f82b74b4c7dc07c20eac9899b6c0c6 Mon Sep 17 00:00:00 2001 From: David Virag Date: Tue, 6 Aug 2024 14:11:45 +0200 Subject: dt-bindings: clock: exynos7885: Add CMU_TOP PLL MUX indices Add indices for missing MUX clocks from PLLs in CMU_TOP. Signed-off-by: David Virag Link: https://lore.kernel.org/r/20240806121157.479212-3-virag.david003@gmail.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/exynos7885.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/exynos7885.h b/include/dt-bindings/clock/exynos7885.h index 54cfccff8508..4ce86810b10d 100644 --- a/include/dt-bindings/clock/exynos7885.h +++ b/include/dt-bindings/clock/exynos7885.h @@ -69,6 +69,8 @@ #define CLK_GOUT_FSYS_MMC_EMBD 58 #define CLK_GOUT_FSYS_MMC_SDIO 59 #define CLK_GOUT_FSYS_USB30DRD 60 +#define CLK_MOUT_SHARED0_PLL 61 +#define CLK_MOUT_SHARED1_PLL 62 /* CMU_CORE */ #define CLK_MOUT_CORE_BUS_USER 1 -- cgit v1.2.3 From b9dee49cc6f9efa97eee059d03b704dec0f45658 Mon Sep 17 00:00:00 2001 From: David Virag Date: Tue, 6 Aug 2024 14:11:46 +0200 Subject: dt-bindings: clock: exynos7885: Add indices for USB clocks Exynos7885 SoC has a DWC3 USB Controller with Exynos USB PHY which in theory supports USB3 SuperSpeed, but is only used as USB2 in all known devices. These, of course, need some clocks. Add indices for these clocks. Signed-off-by: David Virag Link: https://lore.kernel.org/r/20240806121157.479212-4-virag.david003@gmail.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/exynos7885.h | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/exynos7885.h b/include/dt-bindings/clock/exynos7885.h index 4ce86810b10d..cfede84b46b9 100644 --- a/include/dt-bindings/clock/exynos7885.h +++ b/include/dt-bindings/clock/exynos7885.h @@ -134,16 +134,24 @@ #define CLK_GOUT_WDT1_PCLK 43 /* CMU_FSYS */ -#define CLK_MOUT_FSYS_BUS_USER 1 -#define CLK_MOUT_FSYS_MMC_CARD_USER 2 -#define CLK_MOUT_FSYS_MMC_EMBD_USER 3 -#define CLK_MOUT_FSYS_MMC_SDIO_USER 4 -#define CLK_GOUT_MMC_CARD_ACLK 5 -#define CLK_GOUT_MMC_CARD_SDCLKIN 6 -#define CLK_GOUT_MMC_EMBD_ACLK 7 -#define CLK_GOUT_MMC_EMBD_SDCLKIN 8 -#define CLK_GOUT_MMC_SDIO_ACLK 9 -#define CLK_GOUT_MMC_SDIO_SDCLKIN 10 -#define CLK_MOUT_FSYS_USB30DRD_USER 11 +#define CLK_MOUT_FSYS_BUS_USER 1 +#define CLK_MOUT_FSYS_MMC_CARD_USER 2 +#define CLK_MOUT_FSYS_MMC_EMBD_USER 3 +#define CLK_MOUT_FSYS_MMC_SDIO_USER 4 +#define CLK_GOUT_MMC_CARD_ACLK 5 +#define CLK_GOUT_MMC_CARD_SDCLKIN 6 +#define CLK_GOUT_MMC_EMBD_ACLK 7 +#define CLK_GOUT_MMC_EMBD_SDCLKIN 8 +#define CLK_GOUT_MMC_SDIO_ACLK 9 +#define CLK_GOUT_MMC_SDIO_SDCLKIN 10 +#define CLK_MOUT_FSYS_USB30DRD_USER 11 +#define CLK_MOUT_USB_PLL 12 +#define CLK_FOUT_USB_PLL 13 +#define CLK_FSYS_USB20PHY_CLKCORE 14 +#define CLK_FSYS_USB30DRD_ACLK_20PHYCTRL 15 +#define CLK_FSYS_USB30DRD_ACLK_30PHYCTRL_0 16 +#define CLK_FSYS_USB30DRD_ACLK_30PHYCTRL_1 17 +#define CLK_FSYS_USB30DRD_BUS_CLK_EARLY 18 +#define CLK_FSYS_USB30DRD_REF_CLK 19 #endif /* _DT_BINDINGS_CLOCK_EXYNOS_7885_H */ -- cgit v1.2.3 From 599f6899051cb70c4e0aa9fd591b9ee220cb6f14 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 7 Aug 2024 09:22:10 +0200 Subject: media: uapi/linux/cec.h: cec_msg_set_reply_to: zero flags The cec_msg_set_reply_to() helper function never zeroed the struct cec_msg flags field, this can cause unexpected behavior if flags was uninitialized to begin with. Signed-off-by: Hans Verkuil Fixes: 0dbacebede1e ("[media] cec: move the CEC framework out of staging and to media") Cc: Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/cec.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h index 894fffc66f2c..b2af1dddd4d7 100644 --- a/include/uapi/linux/cec.h +++ b/include/uapi/linux/cec.h @@ -132,6 +132,8 @@ static inline void cec_msg_init(struct cec_msg *msg, * Set the msg destination to the orig initiator and the msg initiator to the * orig destination. Note that msg and orig may be the same pointer, in which * case the change is done in place. + * + * It also zeroes the reply, timeout and flags fields. */ static inline void cec_msg_set_reply_to(struct cec_msg *msg, struct cec_msg *orig) @@ -139,7 +141,9 @@ static inline void cec_msg_set_reply_to(struct cec_msg *msg, /* The destination becomes the initiator and vice versa */ msg->msg[0] = (cec_msg_destination(orig) << 4) | cec_msg_initiator(orig); - msg->reply = msg->timeout = 0; + msg->reply = 0; + msg->timeout = 0; + msg->flags = 0; } /** -- cgit v1.2.3 From a09cdb8f564613769142a60400bb5160864c3269 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 8 Aug 2024 12:41:17 +0200 Subject: genirq: Remove unused irq_chip_generic:: {type,polarity}_cache The type_cache and polarity_cache members of struct irq_chip_generic are unused. Remove them both along with their kernel-doc. Found by https://github.com/jirislaby/clang-struct. Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240808104118.430670-2-jirislaby@kernel.org --- include/linux/irq.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 1f5dbf1f92c9..00490d6ead65 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1040,8 +1040,6 @@ struct irq_chip_type { * @irq_base: Interrupt base nr for this chip * @irq_cnt: Number of interrupts handled by this chip * @mask_cache: Cached mask register shared between all chip types - * @type_cache: Cached type register - * @polarity_cache: Cached polarity register * @wake_enabled: Interrupt can wakeup from suspend * @wake_active: Interrupt is marked as an wakeup from suspend source * @num_ct: Number of available irq_chip_type instances (usually 1) @@ -1068,8 +1066,6 @@ struct irq_chip_generic { unsigned int irq_base; unsigned int irq_cnt; u32 mask_cache; - u32 type_cache; - u32 polarity_cache; u32 wake_enabled; u32 wake_active; unsigned int num_ct; -- cgit v1.2.3 From 60029162a0458832ab2bcfc6fd4986bfd9ca0f55 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 8 Aug 2024 12:41:18 +0200 Subject: genirq: Remove irq_chip_regs:: Polarity The polarity member of struct irq_chip_regs is unused. Remove it along with its kernel-doc. Found by https://github.com/jirislaby/clang-struct. Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240808104118.430670-3-jirislaby@kernel.org --- include/linux/irq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 00490d6ead65..fa711f80957b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -991,7 +991,6 @@ void irq_init_desc(unsigned int irq); * @ack: Ack register offset to reg_base * @eoi: Eoi register offset to reg_base * @type: Type configuration register offset to reg_base - * @polarity: Polarity configuration register offset to reg_base */ struct irq_chip_regs { unsigned long enable; @@ -1000,7 +999,6 @@ struct irq_chip_regs { unsigned long ack; unsigned long eoi; unsigned long type; - unsigned long polarity; }; /** -- cgit v1.2.3 From b54de55990b0467538c6bb33523b28816384958a Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 7 Aug 2024 17:06:12 +0100 Subject: net: ethtool: fix off-by-one error in max RSS context IDs Both ethtool_ops.rxfh_max_context_id and the default value used when it's not specified are supposed to be exclusive maxima (the former is documented as such; the latter, U32_MAX, cannot be used as an ID since it equals ETH_RXFH_CONTEXT_ALLOC), but xa_alloc() expects an inclusive maximum. Subtract one from 'limit' to produce an inclusive maximum, and pass that to xa_alloc(). Increase bnxt's max by one to prevent a (very minor) regression, as BNXT_MAX_ETH_RSS_CTX is an inclusive max. This is safe since bnxt is not actually hard-limited; BNXT_MAX_ETH_RSS_CTX is just a leftover from old driver code that managed context IDs itself. Rename rxfh_max_context_id to rxfh_max_num_contexts to make its semantics (hopefully) more obvious. Fixes: 847a8ab18676 ("net: ethtool: let the core choose RSS context IDs") Signed-off-by: Edward Cree Link: https://patch.msgid.link/5a2d11a599aa5b0cc6141072c01accfb7758650c.1723045898.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 303fda54ef17..989c94eddb2b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -736,10 +736,10 @@ struct kernel_ethtool_ts_info { * @rxfh_key_space: same as @rxfh_indir_space, but for the key. * @rxfh_priv_size: size of the driver private data area the core should * allocate for an RSS context (in &struct ethtool_rxfh_context). - * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this - * is zero then the core may choose any (nonzero) ID, otherwise the core - * will only use IDs strictly less than this value, as the @rss_context - * argument to @create_rxfh_context and friends. + * @rxfh_max_num_contexts: maximum (exclusive) supported RSS context ID. + * If this is zero then the core may choose any (nonzero) ID, otherwise + * the core will only use IDs strictly less than this value, as the + * @rss_context argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -954,7 +954,7 @@ struct ethtool_ops { u32 rxfh_indir_space; u16 rxfh_key_space; u16 rxfh_priv_size; - u32 rxfh_max_context_id; + u32 rxfh_max_num_contexts; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); -- cgit v1.2.3 From 5819e464a17587e6830cfab05f3e91a9a8753a41 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 8 Aug 2024 14:08:08 +1000 Subject: cpumask: Fix crash on updating CPU enabled mask The CPU enabled mask instead of the CPU possible mask should be used by set_cpu_enabled(). Otherwise, we run into crash due to write to the read-only CPU possible mask when vCPU is hot added on ARM64. (qemu) device_add host-arm-cpu,id=cpu1,socket-id=1 Unable to handle kernel write to read-only memory at virtual address ffff800080fa7190 : Call trace: register_cpu+0x1a4/0x2e8 arch_register_cpu+0x84/0xd8 acpi_processor_add+0x480/0x5b0 acpi_bus_attach+0x1c4/0x300 acpi_dev_for_one_check+0x3c/0x50 device_for_each_child+0x68/0xc8 acpi_dev_for_each_child+0x48/0x80 acpi_bus_attach+0x84/0x300 acpi_bus_scan+0x74/0x220 acpi_scan_rescan_bus+0x54/0x88 acpi_device_hotplug+0x208/0x478 acpi_hotplug_work_fn+0x2c/0x50 process_one_work+0x15c/0x3c0 worker_thread+0x2ec/0x400 kthread+0x120/0x130 ret_from_fork+0x10/0x20 Fix it by passing the CPU enabled mask instead of the CPU possible mask to set_cpu_enabled(). Fixes: 51c4767503d5 ("Merge tag 'bitmap-6.11-rc1' of https://github.com:/norov/linux") Signed-off-by: Gavin Shan Reviewed-by: Jonathan Cameron Signed-off-by: Yury Norov --- include/linux/cpumask.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 801a7e524113..53158de44b83 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1037,7 +1037,7 @@ void init_cpu_online(const struct cpumask *src); assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val)) #define set_cpu_possible(cpu, possible) assign_cpu((cpu), &__cpu_possible_mask, (possible)) -#define set_cpu_enabled(cpu, enabled) assign_cpu((cpu), &__cpu_possible_mask, (enabled)) +#define set_cpu_enabled(cpu, enabled) assign_cpu((cpu), &__cpu_enabled_mask, (enabled)) #define set_cpu_present(cpu, present) assign_cpu((cpu), &__cpu_present_mask, (present)) #define set_cpu_active(cpu, active) assign_cpu((cpu), &__cpu_active_mask, (active)) #define set_cpu_dying(cpu, dying) assign_cpu((cpu), &__cpu_dying_mask, (dying)) -- cgit v1.2.3 From 3882dccf48f9fbe787b5df3187d708ef348ac860 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Thu, 8 Aug 2024 16:05:57 +0100 Subject: bpf/bpf_get,set_sockopt: add option to set TCP-BPF sock ops flags Currently the only opportunity to set sock ops flags dictating which callbacks fire for a socket is from within a TCP-BPF sockops program. This is problematic if the connection is already set up as there is no further chance to specify callbacks for that socket. Add TCP_BPF_SOCK_OPS_CB_FLAGS to bpf_setsockopt() and bpf_getsockopt() to allow users to specify callbacks later, either via an iterator over sockets or via a socket-specific program triggered by a setsockopt() on the socket. Previous discussion on this here [1]. [1] https://lore.kernel.org/bpf/f42f157b-6e52-dd4d-3d97-9b86c84c0b00@oracle.com/ Signed-off-by: Alan Maguire Link: https://lore.kernel.org/r/20240808150558.1035626-2-alan.maguire@oracle.com Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 35bcf52dbc65..e05b39e39c3f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2851,7 +2851,7 @@ union bpf_attr { * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**, * **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**, * **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**, - * **TCP_BPF_RTO_MIN**. + * **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports the following *optname*\ s: * **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**. @@ -7080,6 +7080,7 @@ enum { TCP_BPF_SYN = 1005, /* Copy the TCP header */ TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ + TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */ }; enum { -- cgit v1.2.3 From 09612576046a3cd29bd2b2b88c5813b1dfe96775 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 7 Aug 2024 14:22:26 +0200 Subject: net: sungem_phy: Constify struct mii_phy_def 'struct mii_phy_def' are not modified in this driver. Constifying these structures moves some data to a read-only section, so increase overall security. While at it fix the checkpatch warning related to this patch (some missing newlines and spaces around *) On a x86_64, with allmodconfig: Before: ====== 27709 928 0 28637 6fdd drivers/net/sungem_phy.o After: ===== text data bss dec hex filename 28157 476 0 28633 6fd9 drivers/net/sungem_phy.o Signed-off-by: Christophe JAILLET Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/54c3b30930f80f4895e6fa2f4234714fdea4ef4e.1723033266.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- include/linux/sungem_phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sungem_phy.h b/include/linux/sungem_phy.h index c505f30e8b68..eecc7eb63bfb 100644 --- a/include/linux/sungem_phy.h +++ b/include/linux/sungem_phy.h @@ -40,7 +40,7 @@ enum { /* An instance of a PHY, partially borrowed from mii_if_info */ struct mii_phy { - struct mii_phy_def* def; + const struct mii_phy_def *def; u32 advertising; int mii_id; -- cgit v1.2.3 From 5fcf329676cf9d132842d9871f83a091c32f3bfc Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 19 Jul 2024 13:41:50 +0200 Subject: fs: add put_mnt_ns() cleanup helper Add a simple helper to put a mount namespace reference. Link: https://lore.kernel.org/r/20240719-work-mount-namespace-v1-3-834113cab0d2@kernel.org Reviewed-by: Josef Bacik Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/mnt_namespace.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 8f882f5881e8..70b366b64816 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -3,6 +3,9 @@ #define _NAMESPACE_H_ #ifdef __KERNEL__ +#include +#include + struct mnt_namespace; struct fs_struct; struct user_namespace; @@ -11,6 +14,7 @@ struct ns_common; extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct user_namespace *, struct fs_struct *); extern void put_mnt_ns(struct mnt_namespace *ns); +DEFINE_FREE(put_mnt_ns, struct mnt_namespace *, if (!IS_ERR_OR_NULL(_T)) put_mnt_ns(_T)) extern struct ns_common *from_mnt_ns(struct mnt_namespace *); extern const struct file_operations proc_mounts_operations; -- cgit v1.2.3 From 257b1c2c78c25643526609dee0c15f1544eb3252 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 19 Jul 2024 13:41:51 +0200 Subject: file: add fput() cleanup helper Add a simple helper to put a file reference. Link: https://lore.kernel.org/r/20240719-work-mount-namespace-v1-4-834113cab0d2@kernel.org Reviewed-by: Josef Bacik Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/file.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/file.h b/include/linux/file.h index 237931f20739..d1e768b06069 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -11,6 +11,7 @@ #include #include #include +#include struct file; @@ -96,6 +97,7 @@ extern void put_unused_fd(unsigned int fd); DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), get_unused_fd_flags(flags), unsigned flags) +DEFINE_FREE(fput, struct file *, if (!IS_ERR_OR_NULL(_T)) fput(_T)) /* * take_fd() will take care to set @fd to -EBADF ensuring that -- cgit v1.2.3 From a1d220d9dafa8d76ba60a784a1016c3134e6a1e8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 19 Jul 2024 13:41:52 +0200 Subject: nsfs: iterate through mount namespaces It is already possible to list mounts in other mount namespaces and to retrieve namespace file descriptors without having to go through procfs by deriving them from pidfds. Augment these abilities by adding the ability to retrieve information about a mount namespace via NS_MNT_GET_INFO. This will return the mount namespace id and the number of mounts currently in the mount namespace. The number of mounts can be used to size the buffer that needs to be used for listmount() and is in general useful without having to actually iterate through all the mounts. The structure is extensible. And add the ability to iterate through all mount namespaces over which the caller holds privilege returning the file descriptor for the next or previous mount namespace. To retrieve a mount namespace the caller must be privileged wrt to it's owning user namespace. This means that PID 1 on the host can list all mounts in all mount namespaces or that a container can list all mounts of its nested containers. Optionally pass a structure for NS_MNT_GET_INFO with NS_MNT_GET_{PREV,NEXT} to retrieve information about the mount namespace in one go. Both ioctls can be implemented for other namespace types easily. Together with recent api additions this means one can iterate through all mounts in all mount namespaces without ever touching procfs. Link: https://lore.kernel.org/r/20240719-work-mount-namespace-v1-5-834113cab0d2@kernel.org Reviewed-by: Josef Bacik Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/uapi/linux/nsfs.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index b133211331f6..cb31af3c1840 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -3,6 +3,7 @@ #define __LINUX_NSFS_H #include +#include #define NSIO 0xb7 @@ -26,4 +27,19 @@ /* Return thread-group leader id of pid in the target pid namespace. */ #define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int) +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ + +/* Get information about namespace. */ +#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info) +/* Get next namespace. */ +#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info) +/* Get previous namespace. */ +#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info) + #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3 From f428cc9eac6e29d57579be4978ba210c344322ea Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 9 Aug 2024 12:42:29 +0200 Subject: ALSA: control: Rename ctl_files_rwlock to controls_rwlock We'll re-use the existing rwlock for the protection of control list lookup, too, and now rename it to a more generic name. This is a preliminary change, only the rename of the struct field here, no functional changes. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240809104234.8488-2-tiwai@suse.de --- include/sound/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index dfef0c9d4b9f..55607e91d5fd 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -99,7 +99,7 @@ struct snd_card { struct device *ctl_dev; /* control device */ unsigned int last_numid; /* last used numeric ID */ struct rw_semaphore controls_rwsem; /* controls lock (list and values) */ - rwlock_t ctl_files_rwlock; /* ctl_files list lock */ + rwlock_t controls_rwlock; /* lock for ctl_files list */ int controls_count; /* count of all controls */ size_t user_ctl_alloc_size; // current memory allocation by user controls. struct list_head controls; /* all controls for this card */ -- cgit v1.2.3 From 38ea4c3dc306edf6f4e483e8ad9cb8d33943afde Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 9 Aug 2024 12:42:30 +0200 Subject: ALSA: control: Optimize locking for look-up For a fast look-up of a control element via either numid or name matching (enabled via CONFIG_SND_CTL_FAST_LOOKUP), a locking isn't needed at all thanks to Xarray. OTOH, the locking is still needed for a slow linked-list traversal, and that's rather a rare case. In this patch, we reduce the use of locking at snd_ctl_find_*() API functions, and switch from controls_rwsem to controls_rwlock for avoiding unnecessary lock inversions. This also resulted in a nice cleanup, as *_unlocked() version of snd_ctl_find_*() APIs can be dropped. snd_ctl_find_id_mixer_unlocked() is still left just as an alias of snd_ctl_find_id_mixer(), since soc-card.c has a wrapper and there are several users. Once after converting there, we can remove it later. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240809104234.8488-3-tiwai@suse.de --- include/sound/control.h | 22 ++-------------------- include/sound/core.h | 2 +- 2 files changed, 3 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/sound/control.h b/include/sound/control.h index c1659036c4a7..4851a86c72ef 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -140,9 +140,7 @@ int snd_ctl_remove_id(struct snd_card * card, struct snd_ctl_elem_id *id); int snd_ctl_rename_id(struct snd_card * card, struct snd_ctl_elem_id *src_id, struct snd_ctl_elem_id *dst_id); void snd_ctl_rename(struct snd_card *card, struct snd_kcontrol *kctl, const char *name); int snd_ctl_activate_id(struct snd_card *card, struct snd_ctl_elem_id *id, int active); -struct snd_kcontrol *snd_ctl_find_numid_locked(struct snd_card *card, unsigned int numid); struct snd_kcontrol *snd_ctl_find_numid(struct snd_card *card, unsigned int numid); -struct snd_kcontrol *snd_ctl_find_id_locked(struct snd_card *card, const struct snd_ctl_elem_id *id); struct snd_kcontrol *snd_ctl_find_id(struct snd_card *card, const struct snd_ctl_elem_id *id); /** @@ -167,27 +165,11 @@ snd_ctl_find_id_mixer(struct snd_card *card, const char *name) return snd_ctl_find_id(card, &id); } -/** - * snd_ctl_find_id_mixer_locked - find the control instance with the given name string - * @card: the card instance - * @name: the name string - * - * Finds the control instance with the given name and - * @SNDRV_CTL_ELEM_IFACE_MIXER. Other fields are set to zero. - * - * This is merely a wrapper to snd_ctl_find_id_locked(). - * The caller must down card->controls_rwsem before calling this function. - * - * Return: The pointer of the instance if found, or %NULL if not. - */ +/* to be removed */ static inline struct snd_kcontrol * snd_ctl_find_id_mixer_locked(struct snd_card *card, const char *name) { - struct snd_ctl_elem_id id = {}; - - id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; - strscpy(id.name, name, sizeof(id.name)); - return snd_ctl_find_id_locked(card, &id); + return snd_ctl_find_id_mixer(card, name); } int snd_ctl_create(struct snd_card *card); diff --git a/include/sound/core.h b/include/sound/core.h index 55607e91d5fd..5c418ab24aa4 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -99,7 +99,7 @@ struct snd_card { struct device *ctl_dev; /* control device */ unsigned int last_numid; /* last used numeric ID */ struct rw_semaphore controls_rwsem; /* controls lock (list and values) */ - rwlock_t controls_rwlock; /* lock for ctl_files list */ + rwlock_t controls_rwlock; /* lock for lookup and ctl_files list */ int controls_count; /* count of all controls */ size_t user_ctl_alloc_size; // current memory allocation by user controls. struct list_head controls; /* all controls for this card */ -- cgit v1.2.3 From 9cacb32a0ba691c2859a20bd5e30b71cc592fad2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 9 Aug 2024 12:42:31 +0200 Subject: ASoC: Drop snd_soc_*_get_kcontrol_locked() The recent cleanup in ALSA control core made no difference between snd_ctl_find_id_mixer() and snd_ctl_find_id_mixer_locked(), and the latter is to be dropped. The only user of the left API was ASoC, and that's snd_soc_card_get_kcontrol_locked() and snd_soc_component_get_kcontrol_locked(). This patch drops those functions and rewrites those users to call the variant without locked instead. The test of the API became superfluous, hence dropped as well. As all callers of snd_ctl_find_id_mixer_locked() are gone, snd_ctl_find_id_mixer_locked() is finally dropped, too. Reviewed-by: Mark Brown Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240809104234.8488-4-tiwai@suse.de --- include/sound/control.h | 7 ------- include/sound/soc-card.h | 2 -- include/sound/soc-component.h | 3 --- 3 files changed, 12 deletions(-) (limited to 'include') diff --git a/include/sound/control.h b/include/sound/control.h index 4851a86c72ef..e31bd8e36bfa 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -165,13 +165,6 @@ snd_ctl_find_id_mixer(struct snd_card *card, const char *name) return snd_ctl_find_id(card, &id); } -/* to be removed */ -static inline struct snd_kcontrol * -snd_ctl_find_id_mixer_locked(struct snd_card *card, const char *name) -{ - return snd_ctl_find_id_mixer(card, name); -} - int snd_ctl_create(struct snd_card *card); int snd_ctl_register_ioctl(snd_kctl_ioctl_func_t fcn); diff --git a/include/sound/soc-card.h b/include/sound/soc-card.h index 1f4c39922d82..ecc02e955279 100644 --- a/include/sound/soc-card.h +++ b/include/sound/soc-card.h @@ -30,8 +30,6 @@ static inline void snd_soc_card_mutex_unlock(struct snd_soc_card *card) struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card, const char *name); -struct snd_kcontrol *snd_soc_card_get_kcontrol_locked(struct snd_soc_card *soc_card, - const char *name); int snd_soc_card_jack_new(struct snd_soc_card *card, const char *id, int type, struct snd_soc_jack *jack); int snd_soc_card_jack_new_pins(struct snd_soc_card *card, const char *id, diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index bf2e381cd124..61534ac0edd1 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -464,9 +464,6 @@ int snd_soc_component_force_enable_pin_unlocked( /* component controls */ struct snd_kcontrol *snd_soc_component_get_kcontrol(struct snd_soc_component *component, const char * const ctl); -struct snd_kcontrol * -snd_soc_component_get_kcontrol_locked(struct snd_soc_component *component, - const char * const ctl); int snd_soc_component_notify_control(struct snd_soc_component *component, const char * const ctl); -- cgit v1.2.3 From 7ff7509fa52397455e04bd44982e9dfbbd19457f Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 29 Jul 2024 11:36:26 +0200 Subject: PCI: Make pcim_request_region() a public function pcim_request_region() is the managed counterpart of pci_request_region(). It is currently only used internally for PCI. It can be useful for a number of drivers and exporting it is a step towards deprecating more complicated functions. Make pcim_request_region() a public function. Link: https://lore.kernel.org/r/20240729093625.17561-4-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Tested-by: Hans de Goede Reviewed-by: Hans de Goede Acked-by: Hans de Goede --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4cf89a4b4cbc..4246cb790c7b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2292,6 +2292,7 @@ static inline void pci_fixup_device(enum pci_fixup_pass pass, void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr); void __iomem * const *pcim_iomap_table(struct pci_dev *pdev); +int pcim_request_region(struct pci_dev *pdev, int bar, const char *name); int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name); int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask, const char *name); -- cgit v1.2.3 From d140f80f603584d8282817994c0c6241e736bef4 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 7 Aug 2024 10:30:18 +0200 Subject: PCI: Deprecate pcim_iomap_regions() in favor of pcim_iomap_region() pcim_iomap_regions() is a complicated function that uses a bit mask to determine the BARs the user wishes to request and ioremap. Almost all users only ever set a single bit in that mask, making that mechanism questionable. pcim_iomap_region() is now available as a more simple replacement. Make pcim_iomap_region() a public function. Mark pcim_iomap_regions() as deprecated. Link: https://lore.kernel.org/r/20240807083018.8734-2-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4246cb790c7b..aa9ef7890c50 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2290,6 +2290,8 @@ static inline void pci_fixup_device(enum pci_fixup_pass pass, #endif void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); +void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar, + const char *name); void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr); void __iomem * const *pcim_iomap_table(struct pci_dev *pdev); int pcim_request_region(struct pci_dev *pdev, int bar, const char *name); -- cgit v1.2.3 From 70114e7f7585ef078c2b7033ee14218f95f55e22 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Thu, 8 Aug 2024 15:34:02 +0300 Subject: irqdomain: Simplify simple and legacy domain creation irq_domain_create_simple() and irq_domain_create_legacy() use __irq_domain_instantiate(), but have extra handling of allocating interrupt descriptors and associating interrupts in them. Some of that is duplicated. There are also call sites which have conditonals to invoke different interrupt domain creator functions, where one of them is usually irq_domain_create_legacy(). Alternatively they associate the interrupts for the legacy case after creating the domain. Moving the extra logic of irq_domain_create_simple()/legacy() into __irq_domain_instantiate() allows to consolidate that. Introduce hwirq_base and virq_base members in the irq_domain_info structure, which allows to transport the required information and add the conditional interrupt descriptor allocation and interrupt association into __irq_domain_instantiate(). This reduces irq_domain_create_legacy() and irq_domain_create_simple() to trivial wrappers which fill in the info structure and allows call sites which must support the legacy case along with more modern mechanism to select the domain type via the parameters of the info struct. [ tglx: Massaged change log ] Suggested-by: Thomas Gleixner Signed-off-by: Matti Vaittinen Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/32d07bd79eb2b5416e24da9e9e8fe5955423dcf9.1723120028.git.mazziesaccount@gmail.com --- include/linux/irqdomain.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index de6105f68fec..bfcffa2c7047 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -291,6 +291,9 @@ struct irq_domain_chip_generic_info; * @hwirq_max: Maximum number of interrupts supported by controller * @direct_max: Maximum value of direct maps; * Use ~0 for no limit; 0 for no direct mapping + * @hwirq_base: The first hardware interrupt number (legacy domains only) + * @virq_base: The first Linux interrupt number for legacy domains to + * immediately associate the interrupts after domain creation * @bus_token: Domain bus token * @ops: Domain operation callbacks * @host_data: Controller private data pointer @@ -307,6 +310,8 @@ struct irq_domain_info { unsigned int size; irq_hw_number_t hwirq_max; int direct_max; + unsigned int hwirq_base; + unsigned int virq_base; enum irq_domain_bus_token bus_token; const struct irq_domain_ops *ops; void *host_data; -- cgit v1.2.3 From 1e7c05292531e5b6bebe409cd531ed4ec0b2ff56 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Thu, 8 Aug 2024 22:23:06 +0200 Subject: irqdomain: Allow giving name suffix for domain Devices can provide multiple interrupt lines. One reason for this is that a device has multiple subfunctions, each providing its own interrupt line. Another reason is that a device can be designed to be used (also) on a system where some of the interrupts can be routed to another processor. A line often further acts as a demultiplex for specific interrupts and has it's respective set of interrupt (status, mask, ack, ...) registers. Regmap supports the handling of these registers and demultiplexing interrupts, but the interrupt domain code ends up assigning the same name for the per interrupt line domains. This causes a naming collision in the debugFS code and leads to confusion, as /proc/interrupts shows two separate interrupts with the same domain name and hardware interrupt number. Instead of adding a workaround in regmap or driver code, allow giving a name suffix for the domain name when the domain is created. Add a name_suffix field in the irq_domain_info structure and make irq_domain_instantiate() use this suffix if it is given when a domain is created. [ tglx: Adopt it to the cleanup patch and fixup the invalid NULL return ] Signed-off-by: Matti Vaittinen Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/871q2yvk5x.ffs@tglx --- include/linux/irqdomain.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index bfcffa2c7047..e432b6a12a32 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -295,6 +295,8 @@ struct irq_domain_chip_generic_info; * @virq_base: The first Linux interrupt number for legacy domains to * immediately associate the interrupts after domain creation * @bus_token: Domain bus token + * @name_suffix: Optional name suffix to avoid collisions when multiple + * domains are added using same fwnode * @ops: Domain operation callbacks * @host_data: Controller private data pointer * @dgc_info: Geneneric chip information structure pointer used to @@ -313,6 +315,7 @@ struct irq_domain_info { unsigned int hwirq_base; unsigned int virq_base; enum irq_domain_bus_token bus_token; + const char *name_suffix; const struct irq_domain_ops *ops; void *host_data; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY -- cgit v1.2.3 From da4fe6815aca25603944a64b0965310512e867d0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 Aug 2024 14:09:00 +0800 Subject: Revert "lib/mpi: Introduce ec implementation to MPI library" This reverts commit d58bb7e55a8a65894cc02f27c3e2bf9403e7c40f. It's no longer needed since sm2 has been removed. Signed-off-by: Herbert Xu --- include/linux/mpi.h | 105 ---------------------------------------------------- 1 file changed, 105 deletions(-) (limited to 'include') diff --git a/include/linux/mpi.h b/include/linux/mpi.h index eb0d1c1db208..89b720893e12 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -157,111 +157,6 @@ void mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor); /*-- mpi-inv.c --*/ int mpi_invm(MPI x, MPI a, MPI n); -/*-- ec.c --*/ - -/* Object to represent a point in projective coordinates */ -struct gcry_mpi_point { - MPI x; - MPI y; - MPI z; -}; - -typedef struct gcry_mpi_point *MPI_POINT; - -/* Models describing an elliptic curve */ -enum gcry_mpi_ec_models { - /* The Short Weierstrass equation is - * y^2 = x^3 + ax + b - */ - MPI_EC_WEIERSTRASS = 0, - /* The Montgomery equation is - * by^2 = x^3 + ax^2 + x - */ - MPI_EC_MONTGOMERY, - /* The Twisted Edwards equation is - * ax^2 + y^2 = 1 + bx^2y^2 - * Note that we use 'b' instead of the commonly used 'd'. - */ - MPI_EC_EDWARDS -}; - -/* Dialects used with elliptic curves */ -enum ecc_dialects { - ECC_DIALECT_STANDARD = 0, - ECC_DIALECT_ED25519, - ECC_DIALECT_SAFECURVE -}; - -/* This context is used with all our EC functions. */ -struct mpi_ec_ctx { - enum gcry_mpi_ec_models model; /* The model describing this curve. */ - enum ecc_dialects dialect; /* The ECC dialect used with the curve. */ - int flags; /* Public key flags (not always used). */ - unsigned int nbits; /* Number of bits. */ - - /* Domain parameters. Note that they may not all be set and if set - * the MPIs may be flagged as constant. - */ - MPI p; /* Prime specifying the field GF(p). */ - MPI a; /* First coefficient of the Weierstrass equation. */ - MPI b; /* Second coefficient of the Weierstrass equation. */ - MPI_POINT G; /* Base point (generator). */ - MPI n; /* Order of G. */ - unsigned int h; /* Cofactor. */ - - /* The actual key. May not be set. */ - MPI_POINT Q; /* Public key. */ - MPI d; /* Private key. */ - - const char *name; /* Name of the curve. */ - - /* This structure is private to mpi/ec.c! */ - struct { - struct { - unsigned int a_is_pminus3:1; - unsigned int two_inv_p:1; - } valid; /* Flags to help setting the helper vars below. */ - - int a_is_pminus3; /* True if A = P - 3. */ - - MPI two_inv_p; - - mpi_barrett_t p_barrett; - - /* Scratch variables. */ - MPI scratch[11]; - - /* Helper for fast reduction. */ - /* int nist_nbits; /\* If this is a NIST curve, the # of bits. *\/ */ - /* MPI s[10]; */ - /* MPI c; */ - } t; - - /* Curve specific computation routines for the field. */ - void (*addm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx); - void (*subm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ec); - void (*mulm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx); - void (*pow2)(MPI w, const MPI b, struct mpi_ec_ctx *ctx); - void (*mul2)(MPI w, MPI u, struct mpi_ec_ctx *ctx); -}; - -void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model, - enum ecc_dialects dialect, - int flags, MPI p, MPI a, MPI b); -void mpi_ec_deinit(struct mpi_ec_ctx *ctx); -MPI_POINT mpi_point_new(unsigned int nbits); -void mpi_point_release(MPI_POINT p); -void mpi_point_init(MPI_POINT p); -void mpi_point_free_parts(MPI_POINT p); -int mpi_ec_get_affine(MPI x, MPI y, MPI_POINT point, struct mpi_ec_ctx *ctx); -void mpi_ec_add_points(MPI_POINT result, - MPI_POINT p1, MPI_POINT p2, - struct mpi_ec_ctx *ctx); -void mpi_ec_mul_point(MPI_POINT result, - MPI scalar, MPI_POINT point, - struct mpi_ec_ctx *ctx); -int mpi_ec_curve_point(MPI_POINT point, struct mpi_ec_ctx *ctx); - /* inline functions */ /** -- cgit v1.2.3 From 72c0f57dbe8bf625108dc67e34f3472f28501776 Mon Sep 17 00:00:00 2001 From: Norman Bintang Date: Fri, 9 Aug 2024 22:06:45 +0800 Subject: ALSA: pcm: Add xrun counter for snd_pcm_substream This patch adds an xrun counter to snd_pcm_substream as an alternative to using logs from XRUN_DEBUG_BASIC. The counter provides a way to track the number of xrun occurences, accessible through the /proc interface. The counter is enabled when CONFIG_SND_PCM_XRUN_DEBUG is set. Example output: $ cat /proc/asound/card0/pcm9p/sub0/status owner_pid : 1425 trigger_time: 235.248957291 tstamp : 0.000000000 delay : 1912 avail : 480 avail_max : 1920 ----- hw_ptr : 672000 appl_ptr : 673440 xrun_counter: 3 # (new row) Signed-off-by: Norman Bintang Reviewed-by: Chih-Yang Hsia Tested-by: Chih-Yang Hsia Reviewed-by: David Riley Link: https://patch.msgid.link/20240809140648.3414349-1-normanbt@chromium.org Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index ac8f3aef9205..384032b6c59c 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -498,6 +498,9 @@ struct snd_pcm_substream { /* misc flags */ unsigned int hw_opened: 1; unsigned int managed_buffer_alloc:1; +#ifdef CONFIG_SND_PCM_XRUN_DEBUG + unsigned int xrun_counter; /* number of times xrun happens */ +#endif /* CONFIG_SND_PCM_XRUN_DEBUG */ }; #define SUBSTREAM_BUSY(substream) ((substream)->ref_count > 0) -- cgit v1.2.3 From 0737158aabc278526c784f63b8dd8963cdc558a9 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Tue, 30 Jul 2024 10:46:31 +0200 Subject: iio: add read scale and offset services to iio backend framework Add iio_backend_read_scale() and iio_backend_read_offset() services to read channel scale and offset from an IIO backbend device. Also add a read_raw callback which replicates the read_raw callback of the IIO framework, and is intended to request miscellaneous channel attributes from the backend device. Both scale and offset helpers use this callback. Signed-off-by: Olivier Moysan Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20240730084640.1307938-2-olivier.moysan@foss.st.com Signed-off-by: Jonathan Cameron --- include/linux/iio/backend.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index 2b9d1aa86552..838092b0de3d 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -3,6 +3,7 @@ #define _IIO_BACKEND_H_ #include +#include struct iio_chan_spec; struct fwnode_handle; @@ -85,6 +86,7 @@ enum iio_backend_sample_trigger { * @extend_chan_spec: Extend an IIO channel. * @ext_info_set: Extended info setter. * @ext_info_get: Extended info getter. + * @read_raw: Read a channel attribute from a backend device * @debugfs_print_chan_status: Print channel status into a buffer. * @debugfs_reg_access: Read or write register value of backend. **/ @@ -119,6 +121,9 @@ struct iio_backend_ops { const char *buf, size_t len); int (*ext_info_get)(struct iio_backend *back, uintptr_t private, const struct iio_chan_spec *chan, char *buf); + int (*read_raw)(struct iio_backend *back, + struct iio_chan_spec const *chan, int *val, int *val2, + long mask); int (*debugfs_print_chan_status)(struct iio_backend *back, unsigned int chan, char *buf, size_t len); @@ -162,7 +167,9 @@ ssize_t iio_backend_ext_info_set(struct iio_dev *indio_dev, uintptr_t private, const char *buf, size_t len); ssize_t iio_backend_ext_info_get(struct iio_dev *indio_dev, uintptr_t private, const struct iio_chan_spec *chan, char *buf); - +int iio_backend_read_raw(struct iio_backend *back, + struct iio_chan_spec const *chan, int *val, int *val2, + long mask); int iio_backend_extend_chan_spec(struct iio_backend *back, struct iio_chan_spec *chan); void *iio_backend_get_priv(const struct iio_backend *conv); @@ -174,6 +181,21 @@ __devm_iio_backend_get_from_fwnode_lookup(struct device *dev, int devm_iio_backend_register(struct device *dev, const struct iio_backend_info *info, void *priv); +static inline int iio_backend_read_scale(struct iio_backend *back, + struct iio_chan_spec const *chan, + int *val, int *val2) +{ + return iio_backend_read_raw(back, chan, val, val2, IIO_CHAN_INFO_SCALE); +} + +static inline int iio_backend_read_offset(struct iio_backend *back, + struct iio_chan_spec const *chan, + int *val, int *val2) +{ + return iio_backend_read_raw(back, chan, val, val2, + IIO_CHAN_INFO_OFFSET); +} + ssize_t iio_backend_debugfs_print_chan_status(struct iio_backend *back, unsigned int chan, char *buf, size_t len); -- cgit v1.2.3 From 2530d7d44ca6dc0c1c059f143cdcb2be8600c59a Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Tue, 30 Jul 2024 10:46:32 +0200 Subject: iio: add enable and disable services to iio backend framework Add iio_backend_disable() and iio_backend_enable() APIs to allow IIO backend consumer to request backend disabling and enabling. Signed-off-by: Olivier Moysan Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20240730084640.1307938-3-olivier.moysan@foss.st.com Signed-off-by: Jonathan Cameron --- include/linux/iio/backend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index 838092b0de3d..b0b663163ff4 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -144,6 +144,8 @@ struct iio_backend_info { int iio_backend_chan_enable(struct iio_backend *back, unsigned int chan); int iio_backend_chan_disable(struct iio_backend *back, unsigned int chan); int devm_iio_backend_enable(struct device *dev, struct iio_backend *back); +int iio_backend_enable(struct iio_backend *back); +void iio_backend_disable(struct iio_backend *back); int iio_backend_data_format_set(struct iio_backend *back, unsigned int chan, const struct iio_backend_data_fmt *data); int iio_backend_data_source_set(struct iio_backend *back, unsigned int chan, -- cgit v1.2.3 From c464cc610f51f6eb5f27bf905d4c1ab1896b2725 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Tue, 30 Jul 2024 10:46:33 +0200 Subject: iio: add child nodes support in iio backend framework Add an API to support IIO generic channels binding: http://devicetree.org/schemas/iio/adc/adc.yaml# This new API is needed, as generic channel DT node isn't populated as a device. Add devm_iio_backend_fwnode_get() to allow an IIO device backend consumer to reference backend phandles in its child nodes. Signed-off-by: Olivier Moysan Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20240730084640.1307938-4-olivier.moysan@foss.st.com Signed-off-by: Jonathan Cameron --- include/linux/iio/backend.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index b0b663163ff4..37d56914d485 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -176,6 +176,9 @@ int iio_backend_extend_chan_spec(struct iio_backend *back, struct iio_chan_spec *chan); void *iio_backend_get_priv(const struct iio_backend *conv); struct iio_backend *devm_iio_backend_get(struct device *dev, const char *name); +struct iio_backend *devm_iio_backend_fwnode_get(struct device *dev, + const char *name, + struct fwnode_handle *fwnode); struct iio_backend * __devm_iio_backend_get_from_fwnode_lookup(struct device *dev, struct fwnode_handle *fwnode); -- cgit v1.2.3 From 2837efdc7cef5b86b0c4d94a7410bc03cc2f003f Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Wed, 7 Aug 2024 20:56:18 +0200 Subject: iio: trigger: allow devices to suspend/resume theirs associated trigger When a machine enters a sleep state while a trigger is associated to an iio device that trigger is not resumed after exiting the sleep state: provide iio device drivers a way to suspend and resume the associated trigger to solve the aforementioned bug. Each iio driver supporting external triggers is expected to call iio_device_suspend_triggering before suspending, and iio_device_resume_triggering upon resuming. Signed-off-by: Denis Benato Link: https://patch.msgid.link/20240807185619.7261-2-benato.denis96@gmail.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 3a735a9a9eae..18779b631e90 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -810,6 +810,23 @@ static inline struct dentry *iio_get_debugfs_dentry(struct iio_dev *indio_dev) } #endif +/** + * iio_device_suspend_triggering() - suspend trigger attached to an iio_dev + * @indio_dev: iio_dev associated with the device that will have triggers suspended + * + * Return 0 if successful, negative otherwise + **/ +int iio_device_suspend_triggering(struct iio_dev *indio_dev); + +/** + * iio_device_resume_triggering() - resume trigger attached to an iio_dev + * that was previously suspended with iio_device_suspend_triggering() + * @indio_dev: iio_dev associated with the device that will have triggers resumed + * + * Return 0 if successful, negative otherwise + **/ +int iio_device_resume_triggering(struct iio_dev *indio_dev); + #ifdef CONFIG_ACPI bool iio_read_acpi_mount_matrix(struct device *dev, struct iio_mount_matrix *orientation, -- cgit v1.2.3 From bf66471987b4bd537bc800353ca7d39bfd1d1022 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 16 Apr 2024 10:51:10 +0200 Subject: context_tracking, rcu: Rename struct context_tracking .dynticks_nesting into .nesting The context_tracking.state RCU_DYNTICKS subvariable has been renamed to RCU_WATCHING, reflect that change in the related helpers. [ neeraj.upadhyay: Fix htmldocs build error reported by Stephen Rothwell ] Suggested-by: Frederic Weisbecker Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/linux/context_tracking_state.h | 6 +++--- include/trace/events/rcu.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index ad6570ffeff3..65290e7677e6 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -39,7 +39,7 @@ struct context_tracking { atomic_t state; #endif #ifdef CONFIG_CONTEXT_TRACKING_IDLE - long dynticks_nesting; /* Track process nesting level. */ + long nesting; /* Track process nesting level. */ long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */ #endif }; @@ -77,14 +77,14 @@ static __always_inline int ct_rcu_watching_cpu_acquire(int cpu) static __always_inline long ct_dynticks_nesting(void) { - return __this_cpu_read(context_tracking.dynticks_nesting); + return __this_cpu_read(context_tracking.nesting); } static __always_inline long ct_dynticks_nesting_cpu(int cpu) { struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); - return ct->dynticks_nesting; + return ct->nesting; } static __always_inline long ct_dynticks_nmi_nesting(void) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 31b3e0d3e65f..4066b6d51e46 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -469,7 +469,7 @@ TRACE_EVENT(rcu_stall_warning, * polarity: "Start", "End", "StillNonIdle" for entering, exiting or still not * being in dyntick-idle mode. * context: "USER" or "IDLE" or "IRQ". - * NMIs nested in IRQs are inferred with dynticks_nesting > 1 in IRQ context. + * NMIs nested in IRQs are inferred with nesting > 1 in IRQ context. * * These events also take a pair of numbers, which indicate the nesting * depth before and after the event of interest, and a third number that is -- cgit v1.2.3 From 1089c0078b69bce0c2d540e3cc43470ba9e5dcfd Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 16 Apr 2024 14:45:30 +0200 Subject: context_tracking, rcu: Rename ct_dynticks_nesting() into ct_nesting() The context_tracking.state RCU_DYNTICKS subvariable has been renamed to RCU_WATCHING, reflect that change in the related helpers. Suggested-by: Frederic Weisbecker Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/linux/context_tracking_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 65290e7677e6..586c1ff22c2e 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -75,7 +75,7 @@ static __always_inline int ct_rcu_watching_cpu_acquire(int cpu) return atomic_read_acquire(&ct->state) & CT_RCU_WATCHING_MASK; } -static __always_inline long ct_dynticks_nesting(void) +static __always_inline long ct_nesting(void) { return __this_cpu_read(context_tracking.nesting); } -- cgit v1.2.3 From bca9455da531a3c2520c28ff349d0dab4e471d28 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 16 Apr 2024 14:48:26 +0200 Subject: context_tracking, rcu: Rename ct_dynticks_nesting_cpu() into ct_nesting_cpu() The context_tracking.state RCU_DYNTICKS subvariable has been renamed to RCU_WATCHING, and the 'dynticks' prefix can be dropped without losing any meaning. Suggested-by: Frederic Weisbecker Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/linux/context_tracking_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 586c1ff22c2e..fd42d8120ac2 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -80,7 +80,7 @@ static __always_inline long ct_nesting(void) return __this_cpu_read(context_tracking.nesting); } -static __always_inline long ct_dynticks_nesting_cpu(int cpu) +static __always_inline long ct_nesting_cpu(int cpu) { struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); -- cgit v1.2.3 From dc5fface4b30508933b515a4985401c8c8f6bcfd Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 16 Apr 2024 10:52:03 +0200 Subject: context_tracking, rcu: Rename struct context_tracking .dynticks_nmi_nesting into .nmi_nesting The context_tracking.state RCU_DYNTICKS subvariable has been renamed to RCU_WATCHING, and the 'dynticks' prefix can be dropped without losing any meaning. [ neeraj.upadhyay: Fix htmldocs build error reported by Stephen Rothwell ] Suggested-by: Frederic Weisbecker Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/linux/context_tracking_state.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index fd42d8120ac2..12d00adf29e1 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -40,7 +40,7 @@ struct context_tracking { #endif #ifdef CONFIG_CONTEXT_TRACKING_IDLE long nesting; /* Track process nesting level. */ - long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */ + long nmi_nesting; /* Track irq/NMI nesting level. */ #endif }; @@ -89,14 +89,14 @@ static __always_inline long ct_nesting_cpu(int cpu) static __always_inline long ct_dynticks_nmi_nesting(void) { - return __this_cpu_read(context_tracking.dynticks_nmi_nesting); + return __this_cpu_read(context_tracking.nmi_nesting); } static __always_inline long ct_dynticks_nmi_nesting_cpu(int cpu) { struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); - return ct->dynticks_nmi_nesting; + return ct->nmi_nesting; } #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ -- cgit v1.2.3 From 8375cb260d7e43610934af63748d1a51201449f8 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 16 Apr 2024 14:46:14 +0200 Subject: context_tracking, rcu: Rename ct_dynticks_nmi_nesting() into ct_nmi_nesting() The context_tracking.state RCU_DYNTICKS subvariable has been renamed to RCU_WATCHING, and the 'dynticks' prefix can be dropped without losing any meaning. Suggested-by: Frederic Weisbecker Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/linux/context_tracking_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 12d00adf29e1..8f32fe599c5c 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -87,7 +87,7 @@ static __always_inline long ct_nesting_cpu(int cpu) return ct->nesting; } -static __always_inline long ct_dynticks_nmi_nesting(void) +static __always_inline long ct_nmi_nesting(void) { return __this_cpu_read(context_tracking.nmi_nesting); } -- cgit v1.2.3 From 2ef2890b7a94fbbfa8fdf0a90499ae1df476b8e8 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 16 Apr 2024 14:49:13 +0200 Subject: context_tracking, rcu: Rename ct_dynticks_nmi_nesting_cpu() into ct_nmi_nesting_cpu() The context_tracking.state RCU_DYNTICKS subvariable has been renamed to RCU_WATCHING, and the 'dynticks' prefix can be dropped without losing any meaning. Suggested-by: Frederic Weisbecker Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/linux/context_tracking_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 8f32fe599c5c..34fd504e53a8 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -92,7 +92,7 @@ static __always_inline long ct_nmi_nesting(void) return __this_cpu_read(context_tracking.nmi_nesting); } -static __always_inline long ct_dynticks_nmi_nesting_cpu(int cpu) +static __always_inline long ct_nmi_nesting_cpu(int cpu) { struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); -- cgit v1.2.3 From e1de438336222dbe27f2d4baa8c01074fcac2bef Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 29 Apr 2024 15:52:32 +0200 Subject: context_tracking, rcu: Rename DYNTICK_IRQ_NONIDLE into CT_NESTING_IRQ_NONIDLE The context_tracking.state RCU_DYNTICKS subvariable has been renamed to RCU_WATCHING, and the 'dynticks' prefix can be dropped without losing any meaning. Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/linux/context_tracking_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 34fd504e53a8..0dbda59c9f37 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -7,7 +7,7 @@ #include /* Offset to allow distinguishing irq vs. task-based idle entry/exit. */ -#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) +#define CT_NESTING_IRQ_NONIDLE ((LONG_MAX / 2) + 1) enum ctx_state { CT_STATE_DISABLED = -1, /* returned by ct_state() if unknown */ -- cgit v1.2.3 From 682358fd35dece838e6ae2d9d6a69fc0b9a9d411 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 1 Aug 2024 15:05:13 +0300 Subject: RDMA/umem: Add support for creating pinned DMABUF umem with a given dma device Add support for creating pinned DMABUF umem with a specified DMA device instead of the DMA device of the given IB device. This API will be utilized in the upcoming patches of the series when multiple path DMAs are implemented. Signed-off-by: Yishai Hadas Link: https://patch.msgid.link/038aad36a43797e5591b20ba81051fc5758124f9.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_umem.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 565a85044541..de05268ed632 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -150,6 +150,11 @@ struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device, unsigned long offset, size_t size, int fd, int access); +struct ib_umem_dmabuf * +ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device, + struct device *dma_device, + unsigned long offset, size_t size, + int fd, int access); int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf); void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf); void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf); @@ -196,6 +201,16 @@ ib_umem_dmabuf_get_pinned(struct ib_device *device, unsigned long offset, { return ERR_PTR(-EOPNOTSUPP); } + +static inline struct ib_umem_dmabuf * +ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device, + struct device *dma_device, + unsigned long offset, size_t size, + int fd, int access) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) { return -EOPNOTSUPP; -- cgit v1.2.3 From 253c61dc256b3e6be65657f78b4a8452163ce00f Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 1 Aug 2024 15:05:14 +0300 Subject: RDMA/umem: Introduce an option to revoke DMABUF umem Introduce an option to revoke DMABUF umem. This option will retain the umem allocation while revoking its DMA mapping. Furthermore, any subsequent attempts to map the pages should fail once the umem has been revoked. This functionality will be utilized in the upcoming patches in the series, where we aim to delay umem deallocation until the mkey deregistration. However, we must unmap its pages immediately. Signed-off-by: Yishai Hadas Link: https://patch.msgid.link/a38270f2fe4a194868ca2312f4c1c760e51bcbff.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_umem.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index de05268ed632..7dc7b1cc71b5 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -38,6 +38,7 @@ struct ib_umem_dmabuf { unsigned long last_sg_trim; void *private; u8 pinned : 1; + u8 revoked : 1; }; static inline struct ib_umem_dmabuf *to_ib_umem_dmabuf(struct ib_umem *umem) @@ -158,6 +159,7 @@ ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device, int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf); void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf); void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf); +void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf); #else /* CONFIG_INFINIBAND_USER_MEM */ @@ -217,6 +219,7 @@ static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) } static inline void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) { } static inline void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) { } +static inline void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf) {} #endif /* CONFIG_INFINIBAND_USER_MEM */ #endif /* IB_UMEM_H */ -- cgit v1.2.3 From 3aa73c6b795b9aaaf933f3c95495d85fc0de39e3 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 1 Aug 2024 15:05:15 +0300 Subject: RDMA: Pass uverbs_attr_bundle as part of '.reg_user_mr_dmabuf' API Pass uverbs_attr_bundle as part of '.reg_user_mr_dmabuf' API instead of udata. This enables passing some new ioctl attributes to the drivers, as will be introduced in the next patches for mlx5 driver. Change the involved drivers accordingly. Signed-off-by: Yishai Hadas Link: https://patch.msgid.link/9a25b2fc02443f7c36c2d93499ae25252b6afd40.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6c5712ae559d..a1dcf812d787 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2476,7 +2476,7 @@ struct ib_device_ops { struct ib_mr *(*reg_user_mr_dmabuf)(struct ib_pd *pd, u64 offset, u64 length, u64 virt_addr, int fd, int mr_access_flags, - struct ib_udata *udata); + struct uverbs_attr_bundle *attrs); struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, -- cgit v1.2.3 From de8f847a5114ff7cfcdfc114af8485c431dec703 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 1 Aug 2024 15:05:16 +0300 Subject: RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 4 ++++ include/uapi/rdma/mlx5_user_ioctl_verbs.h | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 5b74d6534899..106276a4cce7 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -274,6 +274,10 @@ enum mlx5_ib_create_cq_attrs { MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX = UVERBS_ID_DRIVER_NS_WITH_UHW, }; +enum mlx5_ib_reg_dmabuf_mr_attrs { + MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS = (1U << UVERBS_ID_NS_SHIFT), +}; + #define MLX5_IB_DW_MATCH_PARAM 0xA0 struct mlx5_ib_match_params { diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index 3189c7f08d17..7c233df475e7 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -54,6 +54,10 @@ enum mlx5_ib_uapi_flow_action_packet_reformat_type { MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3, }; +enum mlx5_ib_uapi_reg_dmabuf_flags { + MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT = 1 << 0, +}; + struct mlx5_ib_uapi_devx_async_cmd_hdr { __aligned_u64 wr_id; __u8 out_data[]; -- cgit v1.2.3 From ec7ad6530909983c8736c80af46e3529ce7bab55 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 1 Aug 2024 15:05:17 +0300 Subject: RDMA/mlx5: Introduce GET_DATA_DIRECT_SYSFS_PATH ioctl Introduce the 'GET_DATA_DIRECT_SYSFS_PATH' ioctl to return the sysfs path of the affiliated 'data direct' device for a given device. Signed-off-by: Yishai Hadas Link: https://patch.msgid.link/403745463e0ef52adbef681ff09aa6a29a756352.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 106276a4cce7..fd2e4a3a56b3 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -348,6 +348,7 @@ enum mlx5_ib_pd_methods { enum mlx5_ib_device_methods { MLX5_IB_METHOD_QUERY_PORT = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_GET_DATA_DIRECT_SYSFS_PATH, }; enum mlx5_ib_query_port_attrs { @@ -355,4 +356,8 @@ enum mlx5_ib_query_port_attrs { MLX5_IB_ATTR_QUERY_PORT, }; +enum mlx5_ib_get_data_direct_sysfs_path_attrs { + MLX5_IB_ATTR_GET_DATA_DIRECT_SYSFS_PATH = (1U << UVERBS_ID_NS_SHIFT), +}; + #endif -- cgit v1.2.3 From ccb41c445a3e9506ef43fe33867a356048e41477 Mon Sep 17 00:00:00 2001 From: Kwanghoon Son Date: Fri, 9 Aug 2024 20:54:12 +0900 Subject: dt-bindings: clock: exynosautov9: add dpum clock Add dpum clock definitions and compatibles. Also used clock name 'bus' instead of full clock name dout_clkcmu_dpum_bus like other board cmu schema (GS101). Signed-off-by: Kwanghoon Son Link: https://lore.kernel.org/r/20240809-clk_dpum-v3-1-359decc30fe2@samsung.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/samsung,exynosautov9.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/samsung,exynosautov9.h b/include/dt-bindings/clock/samsung,exynosautov9.h index 3065375c2d8b..ce8fb8f7d718 100644 --- a/include/dt-bindings/clock/samsung,exynosautov9.h +++ b/include/dt-bindings/clock/samsung,exynosautov9.h @@ -179,6 +179,17 @@ #define CLK_GOUT_CORE_CCI_PCLK 4 #define CLK_GOUT_CORE_CMU_CORE_PCLK 5 +/* CMU_DPUM */ +#define CLK_MOUT_DPUM_BUS_USER 1 +#define CLK_DOUT_DPUM_BUSP 2 +#define CLK_GOUT_DPUM_ACLK_DECON 3 +#define CLK_GOUT_DPUM_ACLK_DMA 4 +#define CLK_GOUT_DPUM_ACLK_DPP 5 +#define CLK_GOUT_DPUM_SYSMMU_D0_CLK 6 +#define CLK_GOUT_DPUM_SYSMMU_D1_CLK 7 +#define CLK_GOUT_DPUM_SYSMMU_D2_CLK 8 +#define CLK_GOUT_DPUM_SYSMMU_D3_CLK 9 + /* CMU_FSYS0 */ #define CLK_MOUT_FSYS0_BUS_USER 1 #define CLK_MOUT_FSYS0_PCIE_USER 2 -- cgit v1.2.3 From aa9fbc5dd9da9e095a8b1a58540cf20cb06f595f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 8 Aug 2024 12:41:17 +0100 Subject: net: mii: constify advertising mask Constify the advertising mask to linkmode functions that only read from the advertising mask. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/mii.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mii.h b/include/linux/mii.h index d5a959ce4877..b8f26d4513c3 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -140,7 +140,7 @@ static inline u32 ethtool_adv_to_mii_adv_t(u32 ethadv) * settings to phy autonegotiation advertisements for the * MII_ADVERTISE register. */ -static inline u32 linkmode_adv_to_mii_adv_t(unsigned long *advertising) +static inline u32 linkmode_adv_to_mii_adv_t(const unsigned long *advertising) { u32 result = 0; @@ -215,7 +215,8 @@ static inline u32 ethtool_adv_to_mii_ctrl1000_t(u32 ethadv) * settings to phy autonegotiation advertisements for the * MII_CTRL1000 register when in 1000T mode. */ -static inline u32 linkmode_adv_to_mii_ctrl1000_t(unsigned long *advertising) +static inline u32 +linkmode_adv_to_mii_ctrl1000_t(const unsigned long *advertising) { u32 result = 0; @@ -453,7 +454,7 @@ static inline void mii_ctrl1000_mod_linkmode_adv_t(unsigned long *advertising, * A small helper function that translates linkmode advertising to LVL * pause capabilities. */ -static inline u32 linkmode_adv_to_lcl_adv_t(unsigned long *advertising) +static inline u32 linkmode_adv_to_lcl_adv_t(const unsigned long *advertising) { u32 lcl_adv = 0; -- cgit v1.2.3 From d0f8a8973f265f6a276f99d091af99edfb2b87de Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 8 Aug 2024 00:14:13 +0000 Subject: mm/memblock: introduce a new helper memblock_estimated_nr_free_pages() During bootup, system may need the number of free pages in the whole system to do some calculation before all pages are freed to buddy system. Usually this number is get from totalram_pages(). Since we plan to move the free pages accounting in __free_pages_core(), this value may not represent total free pages at the early stage, especially when CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled. Instead of using raw memblock api, let's introduce a new helper for user to get the estimated number of free pages from memblock point of view. Signed-off-by: Wei Yang CC: David Hildenbrand Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20240808001415.6298-1-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- include/linux/memblock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index fc4d75c6cec3..673d5cae7c81 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -467,6 +467,7 @@ static inline __init_memblock bool memblock_bottom_up(void) phys_addr_t memblock_phys_mem_size(void); phys_addr_t memblock_reserved_size(void); +unsigned long memblock_estimated_nr_free_pages(void); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); void memblock_enforce_memory_limit(phys_addr_t memory_limit); -- cgit v1.2.3 From d27a14060f8501e556a65b346b2644be0d0a2de8 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Wed, 7 Aug 2024 15:36:12 +0200 Subject: drm/panic: Move drm_panic_register prototype to drm_crtc_internal.h drm_panic_[un]register() are only used by the core drm, and are not intended to be called by other drm drivers, so move their prototypes to drm_crtc_internal.h. Suggested-by: Daniel Vetter Signed-off-by: Jocelyn Falempe Reviewed-by: Daniel Vetter Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240807134902.458669-4-jfalempe@redhat.com --- include/drm/drm_panic.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/drm/drm_panic.h b/include/drm/drm_panic.h index 73bb3f3d9ed9..a4bd3681920d 100644 --- a/include/drm/drm_panic.h +++ b/include/drm/drm_panic.h @@ -146,16 +146,4 @@ struct drm_scanout_buffer { #define drm_panic_unlock(dev, flags) \ raw_spin_unlock_irqrestore(&(dev)->mode_config.panic_lock, flags) -#ifdef CONFIG_DRM_PANIC - -void drm_panic_register(struct drm_device *dev); -void drm_panic_unregister(struct drm_device *dev); - -#else - -static inline void drm_panic_register(struct drm_device *dev) {} -static inline void drm_panic_unregister(struct drm_device *dev) {} - -#endif - #endif /* __DRM_PANIC_H__ */ -- cgit v1.2.3 From 969135862e731620b9e03bb0c21179ff1cccfd0e Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Wed, 7 Aug 2024 15:36:13 +0200 Subject: drm/panic: Move copyright notice to the top Move the copyright notice to the top of drm_panic.h, and add the missing Red Hat copyright notice. Suggested-by: Thomas Zimmermann Signed-off-by: Jocelyn Falempe Reviewed-by: Daniel Vetter Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240807134902.458669-5-jfalempe@redhat.com --- include/drm/drm_panic.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/drm_panic.h b/include/drm/drm_panic.h index a4bd3681920d..54085d5d05c3 100644 --- a/include/drm/drm_panic.h +++ b/include/drm/drm_panic.h @@ -1,4 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 or MIT */ + +/* + * Copyright (c) 2024 Intel + * Copyright (c) 2024 Red Hat + */ + #ifndef __DRM_PANIC_H__ #define __DRM_PANIC_H__ @@ -8,9 +14,6 @@ #include #include -/* - * Copyright (c) 2024 Intel - */ /** * struct drm_scanout_buffer - DRM scanout buffer -- cgit v1.2.3 From 0dc4fb69eb14320ea0fcd9657b7748eec201ccaa Mon Sep 17 00:00:00 2001 From: Mohammed Anees Date: Sun, 11 Aug 2024 06:16:51 -0400 Subject: drm: Add missing documentation for struct drm_plane_size_hint This patch takes care of the following warnings during documentation compiling: ./include/uapi/drm/drm_mode.h:869: warning: Function parameter or struct member 'width' not described in 'drm_plane_size_hint' ./include/uapi/drm/drm_mode.h:869: warning: Function parameter or struct member 'height' not described in 'drm_plane_size_hint' Signed-off-by: Mohammed Anees Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240811101653.170223-1-pvmohammedanees2003@gmail.com --- include/uapi/drm/drm_mode.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index d390011b89b4..c082810c08a8 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -859,6 +859,8 @@ struct drm_color_lut { /** * struct drm_plane_size_hint - Plane size hints + * @width: The width of the plane in pixel + * @height: The height of the plane in pixel * * The plane SIZE_HINTS property blob contains an * array of struct drm_plane_size_hint. -- cgit v1.2.3 From 8c38617722bdf57a90e6c77ed9ee5ebb60958d2a Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 9 Aug 2024 16:15:54 +0200 Subject: memory: ti-aemif: remove platform data support There are no longer any users of the ti-aemif driver that set up platform data from board files. We can shrink the driver by removing support for it. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20240809-ti-aemif-v1-1-27b1e5001390@linaro.org Signed-off-by: Krzysztof Kozlowski --- include/linux/platform_data/ti-aemif.h | 45 ---------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 include/linux/platform_data/ti-aemif.h (limited to 'include') diff --git a/include/linux/platform_data/ti-aemif.h b/include/linux/platform_data/ti-aemif.h deleted file mode 100644 index 77625251df07..000000000000 --- a/include/linux/platform_data/ti-aemif.h +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * TI DaVinci AEMIF platform glue. - * - * Copyright (C) 2017 BayLibre SAS - * - * Author: - * Bartosz Golaszewski - */ - -#ifndef __TI_DAVINCI_AEMIF_DATA_H__ -#define __TI_DAVINCI_AEMIF_DATA_H__ - -#include - -/** - * struct aemif_abus_data - Async bus configuration parameters. - * - * @cs - Chip-select number. - */ -struct aemif_abus_data { - u32 cs; -}; - -/** - * struct aemif_platform_data - Data to set up the TI aemif driver. - * - * @dev_lookup: of_dev_auxdata passed to of_platform_populate() for aemif - * subdevices. - * @cs_offset: Lowest allowed chip-select number. - * @abus_data: Array of async bus configuration entries. - * @num_abus_data: Number of abus entries. - * @sub_devices: Array of platform subdevices. - * @num_sub_devices: Number of subdevices. - */ -struct aemif_platform_data { - struct of_dev_auxdata *dev_lookup; - u32 cs_offset; - struct aemif_abus_data *abus_data; - size_t num_abus_data; - struct platform_device *sub_devices; - size_t num_sub_devices; -}; - -#endif /* __TI_DAVINCI_AEMIF_DATA_H__ */ -- cgit v1.2.3 From e9d05e9d5db155dcc708891611f1b6f977c3fa11 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Thu, 8 Aug 2024 22:40:54 +0200 Subject: media: uapi: rkisp1-config: Add extensible params format Add to the rkisp1-config.h header data types and documentation of the extensible parameters format. Signed-off-by: Jacopo Mondi Reviewed-by: Laurent Pinchart Reviewed-by: Paul Elder Tested-by: Kieran Bingham Acked-by: Sakari Ailus Signed-off-by: Laurent Pinchart --- include/uapi/linux/rkisp1-config.h | 491 +++++++++++++++++++++++++++++++++++++ 1 file changed, 491 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h index 6eeaf8bf2362..9767bb19c72d 100644 --- a/include/uapi/linux/rkisp1-config.h +++ b/include/uapi/linux/rkisp1-config.h @@ -996,4 +996,495 @@ struct rkisp1_stat_buffer { struct rkisp1_cif_isp_stat params; }; +/*---------- PART3: Extensible Configuration Parameters ------------*/ + +/** + * enum rkisp1_ext_params_block_type - RkISP1 extensible params block type + * + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_BLS: Black level subtraction + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_DPCC: Defect pixel cluster correction + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_SDG: Sensor de-gamma + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_AWB_GAIN: Auto white balance gains + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_FLT: ISP filtering + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_BDM: Bayer de-mosaic + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_CTK: Cross-talk correction + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_GOC: Gamma out correction + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_DPF: De-noise pre-filter + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_DPF_STRENGTH: De-noise pre-filter strength + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_CPROC: Color processing + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_IE: Image effects + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_LSC: Lens shading correction + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_AWB_MEAS: Auto white balance statistics + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_HST_MEAS: Histogram statistics + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_AEC_MEAS: Auto exposure statistics + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_AFC_MEAS: Auto-focus statistics + */ +enum rkisp1_ext_params_block_type { + RKISP1_EXT_PARAMS_BLOCK_TYPE_BLS, + RKISP1_EXT_PARAMS_BLOCK_TYPE_DPCC, + RKISP1_EXT_PARAMS_BLOCK_TYPE_SDG, + RKISP1_EXT_PARAMS_BLOCK_TYPE_AWB_GAIN, + RKISP1_EXT_PARAMS_BLOCK_TYPE_FLT, + RKISP1_EXT_PARAMS_BLOCK_TYPE_BDM, + RKISP1_EXT_PARAMS_BLOCK_TYPE_CTK, + RKISP1_EXT_PARAMS_BLOCK_TYPE_GOC, + RKISP1_EXT_PARAMS_BLOCK_TYPE_DPF, + RKISP1_EXT_PARAMS_BLOCK_TYPE_DPF_STRENGTH, + RKISP1_EXT_PARAMS_BLOCK_TYPE_CPROC, + RKISP1_EXT_PARAMS_BLOCK_TYPE_IE, + RKISP1_EXT_PARAMS_BLOCK_TYPE_LSC, + RKISP1_EXT_PARAMS_BLOCK_TYPE_AWB_MEAS, + RKISP1_EXT_PARAMS_BLOCK_TYPE_HST_MEAS, + RKISP1_EXT_PARAMS_BLOCK_TYPE_AEC_MEAS, + RKISP1_EXT_PARAMS_BLOCK_TYPE_AFC_MEAS, +}; + +#define RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE (1U << 0) +#define RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE (1U << 1) + +/** + * struct rkisp1_ext_params_block_header - RkISP1 extensible parameters block + * header + * + * This structure represents the common part of all the ISP configuration + * blocks. Each parameters block shall embed an instance of this structure type + * as its first member, followed by the block-specific configuration data. The + * driver inspects this common header to discern the block type and its size and + * properly handle the block content by casting it to the correct block-specific + * type. + * + * The @type field is one of the values enumerated by + * :c:type:`rkisp1_ext_params_block_type` and specifies how the data should be + * interpreted by the driver. The @size field specifies the size of the + * parameters block and is used by the driver for validation purposes. + * + * The @flags field is a bitmask of per-block flags RKISP1_EXT_PARAMS_FL_*. + * + * When userspace wants to configure and enable an ISP block it shall fully + * populate the block configuration and set the + * RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE bit in the @flags field. + * + * When userspace simply wants to disable an ISP block the + * RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE bit should be set in @flags field. The + * driver ignores the rest of the block configuration structure in this case. + * + * If a new configuration of an ISP block has to be applied userspace shall + * fully populate the ISP block configuration and omit setting the + * RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE and RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE bits + * in the @flags field. + * + * Setting both the RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE and + * RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE bits in the @flags field is not allowed + * and not accepted by the driver. + * + * Userspace is responsible for correctly populating the parameters block header + * fields (@type, @flags and @size) and the block-specific parameters. + * + * For example: + * + * .. code-block:: c + * + * void populate_bls(struct rkisp1_ext_params_block_header *block) { + * struct rkisp1_ext_params_bls_config *bls = + * (struct rkisp1_ext_params_bls_config *)block; + * + * bls->header.type = RKISP1_EXT_PARAMS_BLOCK_ID_BLS; + * bls->header.flags = RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE; + * bls->header.size = sizeof(*bls); + * + * bls->config.enable_auto = 0; + * bls->config.fixed_val.r = blackLevelRed_; + * bls->config.fixed_val.gr = blackLevelGreenR_; + * bls->config.fixed_val.gb = blackLevelGreenB_; + * bls->config.fixed_val.b = blackLevelBlue_; + * } + * + * @type: The parameters block type, see + * :c:type:`rkisp1_ext_params_block_type` + * @flags: A bitmask of block flags + * @size: Size (in bytes) of the parameters block, including this header + */ +struct rkisp1_ext_params_block_header { + __u16 type; + __u16 flags; + __u32 size; +}; + +/** + * struct rkisp1_ext_params_bls_config - RkISP1 extensible params BLS config + * + * RkISP1 extensible parameters Black Level Subtraction configuration block. + * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_BLS`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Black Level Subtraction configuration, see + * :c:type:`rkisp1_cif_isp_bls_config` + */ +struct rkisp1_ext_params_bls_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_bls_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_dpcc_config - RkISP1 extensible params DPCC config + * + * RkISP1 extensible parameters Defective Pixel Cluster Correction configuration + * block. Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_DPCC`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Defective Pixel Cluster Correction configuration, see + * :c:type:`rkisp1_cif_isp_dpcc_config` + */ +struct rkisp1_ext_params_dpcc_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_dpcc_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_sdg_config - RkISP1 extensible params SDG config + * + * RkISP1 extensible parameters Sensor Degamma configuration block. Identified + * by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_SDG`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Sensor Degamma configuration, see + * :c:type:`rkisp1_cif_isp_sdg_config` + */ +struct rkisp1_ext_params_sdg_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_sdg_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_lsc_config - RkISP1 extensible params LSC config + * + * RkISP1 extensible parameters Lens Shading Correction configuration block. + * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_LSC`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Lens Shading Correction configuration, see + * :c:type:`rkisp1_cif_isp_lsc_config` + */ +struct rkisp1_ext_params_lsc_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_lsc_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_awb_gain_config - RkISP1 extensible params AWB + * gain config + * + * RkISP1 extensible parameters Auto-White Balance Gains configuration block. + * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_AWB_GAIN`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Auto-White Balance Gains configuration, see + * :c:type:`rkisp1_cif_isp_awb_gain_config` + */ +struct rkisp1_ext_params_awb_gain_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_awb_gain_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_flt_config - RkISP1 extensible params FLT config + * + * RkISP1 extensible parameters Filter configuration block. Identified by + * :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_FLT`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Filter configuration, see :c:type:`rkisp1_cif_isp_flt_config` + */ +struct rkisp1_ext_params_flt_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_flt_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_bdm_config - RkISP1 extensible params BDM config + * + * RkISP1 extensible parameters Demosaicing configuration block. Identified by + * :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_BDM`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Demosaicing configuration, see :c:type:`rkisp1_cif_isp_bdm_config` + */ +struct rkisp1_ext_params_bdm_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_bdm_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_ctk_config - RkISP1 extensible params CTK config + * + * RkISP1 extensible parameters Cross-Talk configuration block. Identified by + * :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_CTK`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Cross-Talk configuration, see :c:type:`rkisp1_cif_isp_ctk_config` + */ +struct rkisp1_ext_params_ctk_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_ctk_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_goc_config - RkISP1 extensible params GOC config + * + * RkISP1 extensible parameters Gamma-Out configuration block. Identified by + * :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_GOC`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Gamma-Out configuration, see :c:type:`rkisp1_cif_isp_goc_config` + */ +struct rkisp1_ext_params_goc_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_goc_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_dpf_config - RkISP1 extensible params DPF config + * + * RkISP1 extensible parameters De-noise Pre-Filter configuration block. + * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_DPF`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: De-noise Pre-Filter configuration, see + * :c:type:`rkisp1_cif_isp_dpf_config` + */ +struct rkisp1_ext_params_dpf_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_dpf_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_dpf_strength_config - RkISP1 extensible params DPF + * strength config + * + * RkISP1 extensible parameters De-noise Pre-Filter strength configuration + * block. Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_DPF_STRENGTH`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: De-noise Pre-Filter strength configuration, see + * :c:type:`rkisp1_cif_isp_dpf_strength_config` + */ +struct rkisp1_ext_params_dpf_strength_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_dpf_strength_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_cproc_config - RkISP1 extensible params CPROC config + * + * RkISP1 extensible parameters Color Processing configuration block. + * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_CPROC`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Color processing configuration, see + * :c:type:`rkisp1_cif_isp_cproc_config` + */ +struct rkisp1_ext_params_cproc_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_cproc_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_ie_config - RkISP1 extensible params IE config + * + * RkISP1 extensible parameters Image Effect configuration block. Identified by + * :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_IE`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Image Effect configuration, see :c:type:`rkisp1_cif_isp_ie_config` + */ +struct rkisp1_ext_params_ie_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_ie_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_awb_meas_config - RkISP1 extensible params AWB + * Meas config + * + * RkISP1 extensible parameters Auto-White Balance Measurement configuration + * block. Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_AWB_MEAS`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Auto-White Balance measure configuration, see + * :c:type:`rkisp1_cif_isp_awb_meas_config` + */ +struct rkisp1_ext_params_awb_meas_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_awb_meas_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_hst_config - RkISP1 extensible params Histogram config + * + * RkISP1 extensible parameters Histogram statistics configuration block. + * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_HST_MEAS`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Histogram statistics configuration, see + * :c:type:`rkisp1_cif_isp_hst_config` + */ +struct rkisp1_ext_params_hst_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_hst_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_aec_config - RkISP1 extensible params AEC config + * + * RkISP1 extensible parameters Auto-Exposure statistics configuration block. + * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_AEC_MEAS`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Auto-Exposure statistics configuration, see + * :c:type:`rkisp1_cif_isp_aec_config` + */ +struct rkisp1_ext_params_aec_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_aec_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_afc_config - RkISP1 extensible params AFC config + * + * RkISP1 extensible parameters Auto-Focus statistics configuration block. + * Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_AFC_MEAS`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Auto-Focus statistics configuration, see + * :c:type:`rkisp1_cif_isp_afc_config` + */ +struct rkisp1_ext_params_afc_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_afc_config config; +} __attribute__((aligned(8))); + +#define RKISP1_EXT_PARAMS_MAX_SIZE \ + (sizeof(struct rkisp1_ext_params_bls_config) +\ + sizeof(struct rkisp1_ext_params_dpcc_config) +\ + sizeof(struct rkisp1_ext_params_sdg_config) +\ + sizeof(struct rkisp1_ext_params_lsc_config) +\ + sizeof(struct rkisp1_ext_params_awb_gain_config) +\ + sizeof(struct rkisp1_ext_params_flt_config) +\ + sizeof(struct rkisp1_ext_params_bdm_config) +\ + sizeof(struct rkisp1_ext_params_ctk_config) +\ + sizeof(struct rkisp1_ext_params_goc_config) +\ + sizeof(struct rkisp1_ext_params_dpf_config) +\ + sizeof(struct rkisp1_ext_params_dpf_strength_config) +\ + sizeof(struct rkisp1_ext_params_cproc_config) +\ + sizeof(struct rkisp1_ext_params_ie_config) +\ + sizeof(struct rkisp1_ext_params_awb_meas_config) +\ + sizeof(struct rkisp1_ext_params_hst_config) +\ + sizeof(struct rkisp1_ext_params_aec_config) +\ + sizeof(struct rkisp1_ext_params_afc_config)) + +/** + * enum rksip1_ext_param_buffer_version - RkISP1 extensible parameters version + * + * @RKISP1_EXT_PARAM_BUFFER_V1: First version of RkISP1 extensible parameters + */ +enum rksip1_ext_param_buffer_version { + RKISP1_EXT_PARAM_BUFFER_V1 = 1, +}; + +/** + * struct rkisp1_ext_params_cfg - RkISP1 extensible parameters configuration + * + * This struct contains the configuration parameters of the RkISP1 ISP + * algorithms, serialized by userspace into a data buffer. Each configuration + * parameter block is represented by a block-specific structure which contains a + * :c:type:`rkisp1_ext_params_block_header` entry as first member. Userspace + * populates the @data buffer with configuration parameters for the blocks that + * it intends to configure. As a consequence, the data buffer effective size + * changes according to the number of ISP blocks that userspace intends to + * configure and is set by userspace in the @data_size field. + * + * The parameters buffer is versioned by the @version field to allow modifying + * and extending its definition. Userspace shall populate the @version field to + * inform the driver about the version it intends to use. The driver will parse + * and handle the @data buffer according to the data layout specific to the + * indicated version and return an error if the desired version is not + * supported. + * + * Currently the single RKISP1_EXT_PARAM_BUFFER_V1 version is supported. + * When a new format version will be added, a mechanism for userspace to query + * the supported format versions will be implemented in the form of a read-only + * V4L2 control. If such control is not available, userspace should assume only + * RKISP1_EXT_PARAM_BUFFER_V1 is supported by the driver. + * + * For each ISP block that userspace wants to configure, a block-specific + * structure is appended to the @data buffer, one after the other without gaps + * in between nor overlaps. Userspace shall populate the @data_size field with + * the effective size, in bytes, of the @data buffer. + * + * The expected memory layout of the parameters buffer is:: + * + * +-------------------- struct rkisp1_ext_params_cfg -------------------+ + * | version = RKISP_EXT_PARAMS_BUFFER_V1; | + * | data_size = sizeof(struct rkisp1_ext_params_bls_config) | + * | + sizeof(struct rkisp1_ext_params_dpcc_config); | + * | +------------------------- data ---------------------------------+ | + * | | +------------- struct rkisp1_ext_params_bls_config -----------+ | | + * | | | +-------- struct rkisp1_ext_params_block_header ---------+ | | | + * | | | | type = RKISP1_EXT_PARAMS_BLOCK_TYPE_BLS; | | | | + * | | | | flags = RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE; | | | | + * | | | | size = sizeof(struct rkisp1_ext_params_bls_config); | | | | + * | | | +---------------------------------------------------------+ | | | + * | | | +---------- struct rkisp1_cif_isp_bls_config -------------+ | | | + * | | | | enable_auto = 0; | | | | + * | | | | fixed_val.r = 256; | | | | + * | | | | fixed_val.gr = 256; | | | | + * | | | | fixed_val.gb = 256; | | | | + * | | | | fixed_val.b = 256; | | | | + * | | | +---------------------------------------------------------+ | | | + * | | +------------ struct rkisp1_ext_params_dpcc_config -----------+ | | + * | | | +-------- struct rkisp1_ext_params_block_header ---------+ | | | + * | | | | type = RKISP1_EXT_PARAMS_BLOCK_TYPE_DPCC; | | | | + * | | | | flags = RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE; | | | | + * | | | | size = sizeof(struct rkisp1_ext_params_dpcc_config); | | | | + * | | | +---------------------------------------------------------+ | | | + * | | | +---------- struct rkisp1_cif_isp_dpcc_config ------------+ | | | + * | | | | mode = RKISP1_CIF_ISP_DPCC_MODE_STAGE1_ENABLE; | | | | + * | | | | output_mode = | | | | + * | | | | RKISP1_CIF_ISP_DPCC_OUTPUT_MODE_STAGE1_INCL_G_CENTER; | | | | + * | | | | set_use = ... ; | | | | + * | | | | ... = ... ; | | | | + * | | | +---------------------------------------------------------+ | | | + * | | +-------------------------------------------------------------+ | | + * | +-----------------------------------------------------------------+ | + * +---------------------------------------------------------------------+ + * + * @version: The RkISP1 extensible parameters buffer version, see + * :c:type:`rksip1_ext_param_buffer_version` + * @data_size: The RkISP1 configuration data effective size, excluding this + * header + * @data: The RkISP1 extensible configuration data blocks + */ +struct rkisp1_ext_params_cfg { + __u32 version; + __u32 data_size; + __u8 data[RKISP1_EXT_PARAMS_MAX_SIZE]; +}; + #endif /* _UAPI_RKISP1_CONFIG_H */ -- cgit v1.2.3 From 1fc379f6241b331207c4573c4ff43526fe404301 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Thu, 8 Aug 2024 22:40:55 +0200 Subject: media: uapi: videodev2: Add V4L2_META_FMT_RK_ISP1_EXT_PARAMS The rkisp1 driver stores ISP configuration parameters in the fixed rkisp1_params_cfg structure. As the members of the structure are part of the userspace API, the structure layout is immutable and cannot be extended further. Introducing new parameters or modifying the existing ones would change the buffer layout and cause breakages in existing applications. The allow for future extensions to the ISP parameters, introduce a new extensible parameters format, with a new format 4CC. Document usage of the new format in the rkisp1 admin guide. Signed-off-by: Jacopo Mondi Reviewed-by: Daniel Scally Reviewed-by: Paul Elder Reviewed-by: Laurent Pinchart Tested-by: Kieran Bingham Acked-by: Sakari Ailus Signed-off-by: Laurent Pinchart --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 4e91362da6da..725e86c4bbbd 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -854,6 +854,7 @@ struct v4l2_pix_format { /* Vendor specific - used for RK_ISP1 camera sub-system */ #define V4L2_META_FMT_RK_ISP1_PARAMS v4l2_fourcc('R', 'K', '1', 'P') /* Rockchip ISP1 3A Parameters */ #define V4L2_META_FMT_RK_ISP1_STAT_3A v4l2_fourcc('R', 'K', '1', 'S') /* Rockchip ISP1 3A Statistics */ +#define V4L2_META_FMT_RK_ISP1_EXT_PARAMS v4l2_fourcc('R', 'K', '1', 'E') /* Rockchip ISP1 3a Extensible Parameters */ /* Vendor specific - used for RaspberryPi PiSP */ #define V4L2_META_FMT_RPI_BE_CFG v4l2_fourcc('R', 'P', 'B', 'C') /* PiSP BE configuration */ -- cgit v1.2.3 From ce056504e2e53b22c1f789cff2ad6a0a135dc24d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 9 Aug 2024 22:37:20 -0700 Subject: ethtool: make ethtool_ops::cap_rss_ctx_supported optional cap_rss_ctx_supported was created because the API for creating and configuring additional contexts is mux'ed with the normal RSS API. Presence of ops does not imply driver can actually support rss_context != 0 (in fact drivers mostly ignore that field). cap_rss_ctx_supported lets core check that the driver is context-aware before calling it. Now that we have .create_rxfh_context, there is no such ambiguity. We can depend on presence of the op. Make setting the bit optional. Reviewed-by: Gal Pressman Reviewed-by: Edward Cree Reviewed-by: Joe Damato Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 989c94eddb2b..a149485904d8 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -727,7 +727,8 @@ struct kernel_ethtool_ts_info { * @cap_link_lanes_supported: indicates if the driver supports lanes * parameter. * @cap_rss_ctx_supported: indicates if the driver supports RSS - * contexts. + * contexts via legacy API, drivers implementing @create_rxfh_context + * do not have to set this bit. * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor * RSS. * @rxfh_indir_space: max size of RSS indirection tables, if indirection table -- cgit v1.2.3 From ec6e57beaf8bc64ea0c2dc0cc360afcc7f504425 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 9 Aug 2024 22:37:22 -0700 Subject: ethtool: rss: don't report key if device doesn't support it marvell/otx2 and mvpp2 do not support setting different keys for different RSS contexts. Contexts have separate indirection tables but key is shared with all other contexts. This is likely fine, indirection table is the most important piece. Don't report the key-related parameters from such drivers. This prevents driver-errors, e.g. otx2 always writes the main key, even when user asks to change per-context key. The second reason is that without this change tracking the keys by the core gets complicated. Even if the driver correctly reject setting key with rss_context != 0, change of the main key would have to be reflected in the XArray for all additional contexts. Since the additional contexts don't have their own keys not including the attributes (in Netlink speak) seems intuitive. ethtool CLI seems to deal with it just fine. Having to set the flag in majority of the drivers is a bit tedious but not reporting the key is a safer default. Reviewed-by: Edward Cree Reviewed-by: Joe Damato Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index a149485904d8..12f6dc567598 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -731,6 +731,9 @@ struct kernel_ethtool_ts_info { * do not have to set this bit. * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor * RSS. + * @rxfh_per_ctx_key: device supports setting different RSS key for each + * additional context. Netlink API should report hfunc, key, and input_xfrm + * for every context, not just context 0. * @rxfh_indir_space: max size of RSS indirection tables, if indirection table * size as returned by @get_rxfh_indir_size may change during lifetime * of the device. Leave as 0 if the table size is constant. @@ -952,6 +955,7 @@ struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; + u32 rxfh_per_ctx_key:1; u32 rxfh_indir_space; u16 rxfh_key_space; u16 rxfh_priv_size; -- cgit v1.2.3 From 3d50c66c0609c8b64fb22e9c188fca88f34e7c98 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 9 Aug 2024 22:37:26 -0700 Subject: ethtool: rss: support skipping contexts during dump Applications may want to deal with dynamic RSS contexts only. So dumping context 0 will be counter-productive for them. Support starting the dump from a given context ID. Alternative would be to implement a dump flag to skip just context 0, not sure which is better... Reviewed-by: Edward Cree Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 6d5bdcc67631..93c57525a975 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -965,6 +965,7 @@ enum { ETHTOOL_A_RSS_INDIR, /* binary */ ETHTOOL_A_RSS_HKEY, /* binary */ ETHTOOL_A_RSS_INPUT_XFRM, /* u32 */ + ETHTOOL_A_RSS_START_CONTEXT, /* u32 */ __ETHTOOL_A_RSS_CNT, ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), -- cgit v1.2.3 From fc9aef4382c02774662da3d7e1de8ba224e04f80 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Thu, 25 Jul 2024 08:23:40 -0400 Subject: platform/x86/intel/vsec.h: Move to include/linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some drivers outside of PDX86 need access to the vsec header. Move it to include/linux to make it easier to include. Reviewed-by: Ilpo Järvinen Reviewed-by: Michael J. Ruhl Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20240725122346.4063913-2-michael.j.ruhl@intel.com Signed-off-by: Hans de Goede --- include/linux/intel_vsec.h | 134 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 include/linux/intel_vsec.h (limited to 'include') diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h new file mode 100644 index 000000000000..6495e37c9079 --- /dev/null +++ b/include/linux/intel_vsec.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _INTEL_VSEC_H +#define _INTEL_VSEC_H + +#include +#include + +#define VSEC_CAP_TELEMETRY BIT(0) +#define VSEC_CAP_WATCHER BIT(1) +#define VSEC_CAP_CRASHLOG BIT(2) +#define VSEC_CAP_SDSI BIT(3) +#define VSEC_CAP_TPMI BIT(4) + +/* Intel DVSEC offsets */ +#define INTEL_DVSEC_ENTRIES 0xA +#define INTEL_DVSEC_SIZE 0xB +#define INTEL_DVSEC_TABLE 0xC +#define INTEL_DVSEC_TABLE_BAR(x) ((x) & GENMASK(2, 0)) +#define INTEL_DVSEC_TABLE_OFFSET(x) ((x) & GENMASK(31, 3)) +#define TABLE_OFFSET_SHIFT 3 + +struct pci_dev; +struct resource; + +enum intel_vsec_id { + VSEC_ID_TELEMETRY = 2, + VSEC_ID_WATCHER = 3, + VSEC_ID_CRASHLOG = 4, + VSEC_ID_SDSI = 65, + VSEC_ID_TPMI = 66, +}; + +/** + * struct intel_vsec_header - Common fields of Intel VSEC and DVSEC registers. + * @rev: Revision ID of the VSEC/DVSEC register space + * @length: Length of the VSEC/DVSEC register space + * @id: ID of the feature + * @num_entries: Number of instances of the feature + * @entry_size: Size of the discovery table for each feature + * @tbir: BAR containing the discovery tables + * @offset: BAR offset of start of the first discovery table + */ +struct intel_vsec_header { + u8 rev; + u16 length; + u16 id; + u8 num_entries; + u8 entry_size; + u8 tbir; + u32 offset; +}; + +enum intel_vsec_quirks { + /* Watcher feature not supported */ + VSEC_QUIRK_NO_WATCHER = BIT(0), + + /* Crashlog feature not supported */ + VSEC_QUIRK_NO_CRASHLOG = BIT(1), + + /* Use shift instead of mask to read discovery table offset */ + VSEC_QUIRK_TABLE_SHIFT = BIT(2), + + /* DVSEC not present (provided in driver data) */ + VSEC_QUIRK_NO_DVSEC = BIT(3), + + /* Platforms requiring quirk in the auxiliary driver */ + VSEC_QUIRK_EARLY_HW = BIT(4), +}; + +/** + * struct intel_vsec_platform_info - Platform specific data + * @parent: parent device in the auxbus chain + * @headers: list of headers to define the PMT client devices to create + * @caps: bitmask of PMT capabilities for the given headers + * @quirks: bitmask of VSEC device quirks + * @base_addr: allow a base address to be specified (rather than derived) + */ +struct intel_vsec_platform_info { + struct device *parent; + struct intel_vsec_header **headers; + unsigned long caps; + unsigned long quirks; + u64 base_addr; +}; + +/** + * struct intel_sec_device - Auxbus specific device information + * @auxdev: auxbus device struct for auxbus access + * @pcidev: pci device associated with the device + * @resource: any resources shared by the parent + * @ida: id reference + * @num_resources: number of resources + * @id: xarray id + * @priv_data: any private data needed + * @quirks: specified quirks + * @base_addr: base address of entries (if specified) + */ +struct intel_vsec_device { + struct auxiliary_device auxdev; + struct pci_dev *pcidev; + struct resource *resource; + struct ida *ida; + int num_resources; + int id; /* xa */ + void *priv_data; + size_t priv_data_size; + unsigned long quirks; + u64 base_addr; +}; + +int intel_vsec_add_aux(struct pci_dev *pdev, struct device *parent, + struct intel_vsec_device *intel_vsec_dev, + const char *name); + +static inline struct intel_vsec_device *dev_to_ivdev(struct device *dev) +{ + return container_of(dev, struct intel_vsec_device, auxdev.dev); +} + +static inline struct intel_vsec_device *auxdev_to_ivdev(struct auxiliary_device *auxdev) +{ + return container_of(auxdev, struct intel_vsec_device, auxdev); +} + +#if IS_ENABLED(CONFIG_INTEL_VSEC) +void intel_vsec_register(struct pci_dev *pdev, + struct intel_vsec_platform_info *info); +#else +static inline void intel_vsec_register(struct pci_dev *pdev, + struct intel_vsec_platform_info *info) +{ +} +#endif +#endif -- cgit v1.2.3 From e92affc74cd8624a548b380af7364be037adef35 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Thu, 25 Jul 2024 08:23:41 -0400 Subject: platform/x86/intel/vsec: Add PMT read callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some PMT providers require device specific actions before their telemetry can be read. Provide assignable PMT read callbacks to allow providers to perform those actions. Reviewed-by: Ilpo Järvinen Reviewed-by: Michael J. Ruhl Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20240725122346.4063913-3-michael.j.ruhl@intel.com Signed-off-by: Hans de Goede --- include/linux/intel_vsec.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h index 6495e37c9079..11ee185566c3 100644 --- a/include/linux/intel_vsec.h +++ b/include/linux/intel_vsec.h @@ -67,10 +67,24 @@ enum intel_vsec_quirks { VSEC_QUIRK_EARLY_HW = BIT(4), }; +/** + * struct pmt_callbacks - Callback infrastructure for PMT devices + * ->read_telem() when specified, called by client driver to access PMT data (instead + * of direct copy). + * @pdev: PCI device reference for the callback's use + * @guid: ID of data to acccss + * @data: buffer for the data to be copied + * @count: size of buffer + */ +struct pmt_callbacks { + int (*read_telem)(struct pci_dev *pdev, u32 guid, u64 *data, u32 count); +}; + /** * struct intel_vsec_platform_info - Platform specific data * @parent: parent device in the auxbus chain * @headers: list of headers to define the PMT client devices to create + * @priv_data: private data, usable by parent devices, currently a callback * @caps: bitmask of PMT capabilities for the given headers * @quirks: bitmask of VSEC device quirks * @base_addr: allow a base address to be specified (rather than derived) @@ -78,6 +92,7 @@ enum intel_vsec_quirks { struct intel_vsec_platform_info { struct device *parent; struct intel_vsec_header **headers; + void *priv_data; unsigned long caps; unsigned long quirks; u64 base_addr; -- cgit v1.2.3 From 61b74964536e86445d43acff5cff6ad907ba9321 Mon Sep 17 00:00:00 2001 From: Jithu Joseph Date: Thu, 1 Aug 2024 05:18:14 +0000 Subject: trace: platform/x86/intel/ifs: Add SBAF trace support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tracing support for the SBAF IFS tests, which may be useful for debugging systems that fail these tests. Log details like test content batch number, SBAF bundle ID, program index and the exact errors or warnings encountered by each HT thread during the test. Reviewed-by: Ashok Raj Reviewed-by: Tony Luck Reviewed-by: Ilpo Järvinen Reviewed-by: Steven Rostedt (Google) Signed-off-by: Jithu Joseph Signed-off-by: Kuppuswamy Sathyanarayanan Link: https://lore.kernel.org/r/20240801051814.1935149-5-sathyanarayanan.kuppuswamy@linux.intel.com Signed-off-by: Hans de Goede --- include/trace/events/intel_ifs.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/trace/events/intel_ifs.h b/include/trace/events/intel_ifs.h index 0d88ebf2c980..70323acde1de 100644 --- a/include/trace/events/intel_ifs.h +++ b/include/trace/events/intel_ifs.h @@ -35,6 +35,33 @@ TRACE_EVENT(ifs_status, __entry->status) ); +TRACE_EVENT(ifs_sbaf, + + TP_PROTO(int batch, union ifs_sbaf activate, union ifs_sbaf_status status), + + TP_ARGS(batch, activate, status), + + TP_STRUCT__entry( + __field( u64, status ) + __field( int, batch ) + __field( u16, bundle ) + __field( u16, pgm ) + ), + + TP_fast_assign( + __entry->status = status.data; + __entry->batch = batch; + __entry->bundle = activate.bundle_idx; + __entry->pgm = activate.pgm_idx; + ), + + TP_printk("batch: 0x%.2x, bundle_idx: 0x%.4x, pgm_idx: 0x%.4x, status: 0x%.16llx", + __entry->batch, + __entry->bundle, + __entry->pgm, + __entry->status) +); + #endif /* _TRACE_IFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 29bc83e4d90546aa794a9584786086b141a6ba4d Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Mon, 15 Jul 2024 16:23:24 -0700 Subject: srcu: faster gp seq wrap-around Using a higher value for the initial gp sequence counters allows for wrapping to occur faster. It can help with surfacing any issues that may be happening as a result of the wrap around. Signed-off-by: JP Kobryn Tested-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay --- include/linux/rcupdate.h | 3 +++ include/linux/srcutree.h | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 13f6f00aecf9..8d56db70d417 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -34,6 +34,9 @@ #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) +#define RCU_SEQ_CTR_SHIFT 2 +#define RCU_SEQ_STATE_MASK ((1 << RCU_SEQ_CTR_SHIFT) - 1) + /* Exported common interfaces */ void call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier_tasks(void); diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 8f3f72480e78..ed57598394de 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -129,10 +129,23 @@ struct srcu_struct { #define SRCU_STATE_SCAN1 1 #define SRCU_STATE_SCAN2 2 +/* + * Values for initializing gp sequence fields. Higher values allow wrap arounds to + * occur earlier. + * The second value with state is useful in the case of static initialization of + * srcu_usage where srcu_gp_seq_needed is expected to have some state value in its + * lower bits (or else it will appear to be already initialized within + * the call check_init_srcu_struct()). + */ +#define SRCU_GP_SEQ_INITIAL_VAL ((0UL - 100UL) << RCU_SEQ_CTR_SHIFT) +#define SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE (SRCU_GP_SEQ_INITIAL_VAL - 1) + #define __SRCU_USAGE_INIT(name) \ { \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ - .srcu_gp_seq_needed = -1UL, \ + .srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL, \ + .srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE, \ + .srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL, \ .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \ } -- cgit v1.2.3 From 711f5c5ce6c2c640c1b3b569ab2a8847be5ab21f Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 15 Jul 2024 21:22:51 -0400 Subject: lsm: cleanup lsm_hooks.h Some cleanup and style corrections for lsm_hooks.h. * Drop the lsm_inode_alloc() extern declaration, it is not needed. * Relocate lsm_get_xattr_slot() and extern variables in the file to improve grouping of related objects. * Don't use tabs to needlessly align structure fields. Reviewed-by: Casey Schaufler Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 87 +++++++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index f1ca8082075a..11ea0063228f 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -51,8 +51,8 @@ struct security_hook_heads { * Contains the information that identifies the LSM. */ struct lsm_id { - const char *name; - u64 id; + const char *name; + u64 id; }; /* @@ -60,49 +60,31 @@ struct lsm_id { * For use with generic list macros for common operations. */ struct security_hook_list { - struct hlist_node list; - struct hlist_head *head; - union security_list_options hook; - const struct lsm_id *lsmid; + struct hlist_node list; + struct hlist_head *head; + union security_list_options hook; + const struct lsm_id *lsmid; } __randomize_layout; /* * Security blob size or offset data. */ struct lsm_blob_sizes { - int lbs_cred; - int lbs_file; - int lbs_ib; - int lbs_inode; - int lbs_sock; - int lbs_superblock; - int lbs_ipc; - int lbs_key; - int lbs_msg_msg; - int lbs_perf_event; - int lbs_task; - int lbs_xattr_count; /* number of xattr slots in new_xattrs array */ - int lbs_tun_dev; + int lbs_cred; + int lbs_file; + int lbs_ib; + int lbs_inode; + int lbs_sock; + int lbs_superblock; + int lbs_ipc; + int lbs_key; + int lbs_msg_msg; + int lbs_perf_event; + int lbs_task; + int lbs_xattr_count; /* number of xattr slots in new_xattrs array */ + int lbs_tun_dev; }; -/** - * lsm_get_xattr_slot - Return the next available slot and increment the index - * @xattrs: array storing LSM-provided xattrs - * @xattr_count: number of already stored xattrs (updated) - * - * Retrieve the first available slot in the @xattrs array to fill with an xattr, - * and increment @xattr_count. - * - * Return: The slot to fill in @xattrs if non-NULL, NULL otherwise. - */ -static inline struct xattr *lsm_get_xattr_slot(struct xattr *xattrs, - int *xattr_count) -{ - if (unlikely(!xattrs)) - return NULL; - return &xattrs[(*xattr_count)++]; -} - /* * LSM_RET_VOID is used as the default value in LSM_HOOK definitions for void * LSM hooks (in include/linux/lsm_hook_defs.h). @@ -118,9 +100,6 @@ static inline struct xattr *lsm_get_xattr_slot(struct xattr *xattrs, #define LSM_HOOK_INIT(HEAD, HOOK) \ { .head = &security_hook_heads.HEAD, .hook = { .HEAD = HOOK } } -extern struct security_hook_heads security_hook_heads; -extern char *lsm_names; - extern void security_add_hooks(struct security_hook_list *hooks, int count, const struct lsm_id *lsmid); @@ -142,9 +121,6 @@ struct lsm_info { struct lsm_blob_sizes *blobs; /* Optional: for blob sharing. */ }; -extern struct lsm_info __start_lsm_info[], __end_lsm_info[]; -extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[]; - #define DEFINE_LSM(lsm) \ static struct lsm_info __lsm_##lsm \ __used __section(".lsm_info.init") \ @@ -155,6 +131,29 @@ extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[]; __used __section(".early_lsm_info.init") \ __aligned(sizeof(unsigned long)) -extern int lsm_inode_alloc(struct inode *inode); +/* DO NOT tamper with these variables outside of the LSM framework */ +extern char *lsm_names; +extern struct security_hook_heads security_hook_heads; +extern struct lsm_static_calls_table static_calls_table __ro_after_init; +extern struct lsm_info __start_lsm_info[], __end_lsm_info[]; +extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[]; + +/** + * lsm_get_xattr_slot - Return the next available slot and increment the index + * @xattrs: array storing LSM-provided xattrs + * @xattr_count: number of already stored xattrs (updated) + * + * Retrieve the first available slot in the @xattrs array to fill with an xattr, + * and increment @xattr_count. + * + * Return: The slot to fill in @xattrs if non-NULL, NULL otherwise. + */ +static inline struct xattr *lsm_get_xattr_slot(struct xattr *xattrs, + int *xattr_count) +{ + if (unlikely(!xattrs)) + return NULL; + return &xattrs[(*xattr_count)++]; +} #endif /* ! __LINUX_LSM_HOOKS_H */ -- cgit v1.2.3 From 63dff3e48871b0583be5032ff8fb7260c349a18c Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 9 Jul 2024 19:43:06 -0400 Subject: lsm: add the inode_free_security_rcu() LSM implementation hook The LSM framework has an existing inode_free_security() hook which is used by LSMs that manage state associated with an inode, but due to the use of RCU to protect the inode, special care must be taken to ensure that the LSMs do not fully release the inode state until it is safe from a RCU perspective. This patch implements a new inode_free_security_rcu() implementation hook which is called when it is safe to free the LSM's internal inode state. Unfortunately, this new hook does not have access to the inode itself as it may already be released, so the existing inode_free_security() hook is retained for those LSMs which require access to the inode. Cc: stable@vger.kernel.org Reported-by: syzbot+5446fbf332b0602ede0b@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/00000000000076ba3b0617f65cc8@google.com Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 63e2656d1d56..520730fe2d94 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -114,6 +114,7 @@ LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask, unsigned int obj_type) LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode) LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode) +LSM_HOOK(void, LSM_RET_VOID, inode_free_security_rcu, void *inode_security) LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode, struct inode *dir, const struct qstr *qstr, struct xattr *xattrs, int *xattr_count) -- cgit v1.2.3 From 42b0f8da3acc87953161baeb24f756936eb4d4b2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 31 Jul 2024 07:47:27 +0200 Subject: nsfs: fix ioctl declaration The kernel is writing an object of type __u64, so the ioctl has to be defined to _IOR(NSIO, 0x5, __u64) instead of _IO(NSIO, 0x5). Reported-by: Dmitry V. Levin Link: https://lore.kernel.org/r/20240730164554.GA18486@altlinux.org Signed-off-by: Christian Brauner --- include/uapi/linux/nsfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index b133211331f6..5fad3d0fcd70 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -3,6 +3,7 @@ #define __LINUX_NSFS_H #include +#include #define NSIO 0xb7 @@ -16,7 +17,7 @@ /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) /* Get the id for a mount namespace */ -#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) +#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64) /* Translate pid from target pid namespace into the caller's pid namespace. */ #define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) /* Return thread-group leader id of pid in the callers pid namespace. */ -- cgit v1.2.3 From 86509e38a80da34d7800985fa2be183475242c8c Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 9 Aug 2024 15:50:35 +0200 Subject: file: fix typo in take_fd() comment The explanatory comment above take_fd() contains a typo, fix that to not confuse readers. Signed-off-by: Mathias Krause Link: https://lore.kernel.org/r/20240809135035.748109-1-minipli@grsecurity.net Signed-off-by: Christian Brauner --- include/linux/file.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/file.h b/include/linux/file.h index 237931f20739..59b146a14dca 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -110,7 +110,7 @@ DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), * * f = dentry_open(&path, O_RDONLY, current_cred()); * if (IS_ERR(f)) - * return PTR_ERR(fd); + * return PTR_ERR(f); * * fd_install(fd, f); * return take_fd(fd); -- cgit v1.2.3 From 8e5ced7804cb9184c4a23f8054551240562a8eda Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jul 2024 17:01:40 +0100 Subject: netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeback flag" This reverts commit ae678317b95e760607c7b20b97c9cd4ca9ed6e1a. Revert the patch that removes the deprecated use of PG_private_2 in netfslib for the moment as Ceph is actually still using this to track data copied to the cache. Fixes: ae678317b95e ("netfs: Remove deprecated use of PG_private_2 as a second writeback flag") Reported-by: Max Kellermann Signed-off-by: David Howells cc: Ilya Dryomov cc: Xiubo Li cc: Jeff Layton cc: Matthew Wilcox cc: ceph-devel@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org https: //lore.kernel.org/r/3575457.1722355300@warthog.procyon.org.uk Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index da23484268df..24ec3434d32e 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -145,6 +145,7 @@ EM(netfs_folio_trace_clear_g, "clear-g") \ EM(netfs_folio_trace_clear_s, "clear-s") \ EM(netfs_folio_trace_copy_to_cache, "mark-copy") \ + EM(netfs_folio_trace_end_copy, "end-copy") \ EM(netfs_folio_trace_filled_gaps, "filled-gaps") \ EM(netfs_folio_trace_kill, "kill") \ EM(netfs_folio_trace_kill_cc, "kill-cc") \ -- cgit v1.2.3 From 7b589a9b45ae32aa9d7bece597490e141198d7a6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Aug 2024 19:38:46 +0100 Subject: netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags The NETFS_RREQ_USE_PGPRIV2 and NETFS_RREQ_WRITE_TO_CACHE flags aren't used correctly. The problem is that we try to set them up in the request initialisation, but we the cache may be in the process of setting up still, and so the state may not be correct. Further, we secondarily sample the cache state and make contradictory decisions later. The issue arises because we set up the cache resources, which allows the cache's ->prepare_read() to switch on NETFS_SREQ_COPY_TO_CACHE - which triggers cache writing even if we didn't set the flags when allocating. Fix this in the following way: (1) Drop NETFS_ICTX_USE_PGPRIV2 and instead set NETFS_RREQ_USE_PGPRIV2 in ->init_request() rather than trying to juggle that in netfs_alloc_request(). (2) Repurpose NETFS_RREQ_USE_PGPRIV2 to merely indicate that if caching is to be done, then PG_private_2 is to be used rather than only setting it if we decide to cache and then having netfs_rreq_unlock_folios() set the non-PG_private_2 writeback-to-cache if it wasn't set. (3) Split netfs_rreq_unlock_folios() into two functions, one of which contains the deprecated code for using PG_private_2 to avoid accidentally doing the writeback path - and always use it if USE_PGPRIV2 is set. (4) As NETFS_ICTX_USE_PGPRIV2 is removed, make netfs_write_begin() always wait for PG_private_2. This function is deprecated and only used by ceph anyway, and so label it so. (5) Drop the NETFS_RREQ_WRITE_TO_CACHE flag and use fscache_operation_valid() on the cache_resources instead. This has the advantage of picking up the result of netfs_begin_cache_read() and fscache_begin_write_operation() - which are called after the object is initialised and will wait for the cache to come to a usable state. Just reverting ae678317b95e[1] isn't a sufficient fix, so this need to be applied on top of that. Without this as well, things like: rcu: INFO: rcu_sched detected expedited stalls on CPUs/tasks: { and: WARNING: CPU: 13 PID: 3621 at fs/ceph/caps.c:3386 may happen, along with some UAFs due to PG_private_2 not getting used to wait on writeback completion. Fixes: 2ff1e97587f4 ("netfs: Replace PG_fscache by setting folio->private and marking dirty") Reported-by: Max Kellermann Signed-off-by: David Howells cc: Ilya Dryomov cc: Xiubo Li cc: Hristo Venev cc: Jeff Layton cc: Matthew Wilcox cc: ceph-devel@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/3575457.1722355300@warthog.procyon.org.uk/ [1] Link: https://lore.kernel.org/r/1173209.1723152682@warthog.procyon.org.uk Signed-off-by: Christian Brauner --- include/linux/netfs.h | 3 --- include/trace/events/netfs.h | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 5d0288938cc2..983816608f15 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -73,8 +73,6 @@ struct netfs_inode { #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ -#define NETFS_ICTX_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark - * write to cache on read */ }; /* @@ -269,7 +267,6 @@ struct netfs_io_request { #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ -#define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ #define NETFS_RREQ_BLOCKED 10 /* We blocked */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 24ec3434d32e..606b4a0f92da 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -51,6 +51,7 @@ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ EM(netfs_rreq_trace_set_pause, "PAUSE ") \ EM(netfs_rreq_trace_unlock, "UNLOCK ") \ + EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \ EM(netfs_rreq_trace_unmark, "UNMARK ") \ EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ -- cgit v1.2.3 From fdad456cbcca739bae1849549c7a999857c56f88 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Sun, 28 Jul 2024 19:46:11 +0800 Subject: bpf: Fix updating attached freplace prog in prog_array map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit f7866c358733 ("bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT") fixed a NULL pointer dereference panic, but didn't fix the issue that fails to update attached freplace prog to prog_array map. Since commit 1c123c567fb1 ("bpf: Resolve fext program type when checking map compatibility"), freplace prog and its target prog are able to tail call each other. And the commit 3aac1ead5eb6 ("bpf: Move prog->aux->linked_prog and trampoline into bpf_link on attach") sets prog->aux->dst_prog as NULL after attaching freplace prog to its target prog. After loading freplace the prog_array's owner type is BPF_PROG_TYPE_SCHED_CLS. Then, after attaching freplace its prog->aux->dst_prog is NULL. Then, while updating freplace in prog_array the bpf_prog_map_compatible() incorrectly returns false because resolve_prog_type() returns BPF_PROG_TYPE_EXT instead of BPF_PROG_TYPE_SCHED_CLS. After this patch the resolve_prog_type() returns BPF_PROG_TYPE_SCHED_CLS and update to prog_array can succeed. Fixes: f7866c358733 ("bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT") Cc: Toke Høiland-Jørgensen Cc: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20240728114612.48486-2-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 6503c85b10a3..7b776dae36e5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -856,8 +856,8 @@ static inline u32 type_flag(u32 type) /* only use after check_attach_btf_id() */ static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog) { - return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->dst_prog) ? - prog->aux->dst_prog->type : prog->type; + return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->saved_dst_prog_type) ? + prog->aux->saved_dst_prog_type : prog->type; } static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) -- cgit v1.2.3 From 246ef40670b71fef0c3e2cd11404279bc6d6468e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Mon, 29 Jul 2024 17:30:10 -0700 Subject: ipv6: eliminate ndisc_ops_is_useropt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit as it doesn't seem to offer anything of value. There's only 1 trivial user: int lowpan_ndisc_is_useropt(u8 nd_opt_type) { return nd_opt_type == ND_OPT_6CO; } but there's no harm to always treating that as a useropt... Cc: David Ahern Cc: YOSHIFUJI Hideaki / 吉藤英明 Signed-off-by: Maciej Żenczykowski Link: https://patch.msgid.link/20240730003010.156977-1-maze@google.com Signed-off-by: Jakub Kicinski --- include/net/ndisc.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 7a533d5b1d59..3c88d5bc5eed 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -147,11 +147,6 @@ void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, * The following hooks can be defined; unless noted otherwise, they are * optional and can be filled with a null pointer. * - * int (*is_useropt)(u8 nd_opt_type): - * This function is called when IPv6 decide RA userspace options. if - * this function returns 1 then the option given by nd_opt_type will - * be handled as userspace option additional to the IPv6 options. - * * int (*parse_options)(const struct net_device *dev, * struct nd_opt_hdr *nd_opt, * struct ndisc_options *ndopts): @@ -200,7 +195,6 @@ void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, * addresses. E.g. 802.15.4 6LoWPAN. */ struct ndisc_ops { - int (*is_useropt)(u8 nd_opt_type); int (*parse_options)(const struct net_device *dev, struct nd_opt_hdr *nd_opt, struct ndisc_options *ndopts); @@ -224,15 +218,6 @@ struct ndisc_ops { }; #if IS_ENABLED(CONFIG_IPV6) -static inline int ndisc_ops_is_useropt(const struct net_device *dev, - u8 nd_opt_type) -{ - if (dev->ndisc_ops && dev->ndisc_ops->is_useropt) - return dev->ndisc_ops->is_useropt(nd_opt_type); - else - return 0; -} - static inline int ndisc_ops_parse_options(const struct net_device *dev, struct nd_opt_hdr *nd_opt, struct ndisc_options *ndopts) -- cgit v1.2.3 From 75bab45e6b2da379fe2ebda48ed35f8ce371a2ef Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 7 Aug 2024 16:13:46 +0200 Subject: net: nexthop: Add flag to assert that NHGRP reserved fields are zero There are many unpatched kernel versions out there that do not initialize the reserved fields of struct nexthop_grp. The issue with that is that if those fields were to be used for some end (i.e. stop being reserved), old kernels would still keep sending random data through the field, and a new userspace could not rely on the value. In this patch, use the existing NHA_OP_FLAGS, which is currently inbound only, to carry flags back to the userspace. Add a flag to indicate that the reserved fields in struct nexthop_grp are zeroed before dumping. This is reliant on the actual fix from commit 6d745cd0e972 ("net: nexthop: Initialize all fields in dumped nexthops"). Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/21037748d4f9d8ff486151f4c09083bcf12d5df8.1723036486.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/nexthop.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index dd8787f9cf39..f4f060a87cc2 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -33,6 +33,9 @@ enum { #define NHA_OP_FLAG_DUMP_STATS BIT(0) #define NHA_OP_FLAG_DUMP_HW_STATS BIT(1) +/* Response OP_FLAGS. */ +#define NHA_OP_FLAG_RESP_GRP_RESVD_0 BIT(31) /* Dump clears resvd fields. */ + enum { NHA_UNSPEC, NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */ -- cgit v1.2.3 From b72a6a7ab9573e06d5c2fcb92eaa28614a735bfd Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 7 Aug 2024 16:13:47 +0200 Subject: net: nexthop: Increase weight to u16 In CLOS networks, as link failures occur at various points in the network, ECMP weights of the involved nodes are adjusted to compensate. With high fan-out of the involved nodes, and overall high number of nodes, a (non-)ECMP weight ratio that we would like to configure does not fit into 8 bits. Instead of, say, 255:254, we might like to configure something like 1000:999. For these deployments, the 8-bit weight may not be enough. To that end, in this patch increase the next hop weight from u8 to u16. Increasing the width of an integral type can be tricky, because while the code still compiles, the types may not check out anymore, and numerical errors come up. To prevent this, the conversion was done in two steps. First the type was changed from u8 to a single-member structure, which invalidated all uses of the field. This allowed going through them one by one and audit for type correctness. Then the structure was replaced with a vanilla u16 again. This should ensure that no place was missed. The UAPI for configuring nexthop group members is that an attribute NHA_GROUP carries an array of struct nexthop_grp entries: struct nexthop_grp { __u32 id; /* nexthop id - must exist */ __u8 weight; /* weight of this nexthop */ __u8 resvd1; __u16 resvd2; }; The field resvd1 is currently validated and required to be zero. We can lift this requirement and carry high-order bits of the weight in the reserved field: struct nexthop_grp { __u32 id; /* nexthop id - must exist */ __u8 weight; /* weight of this nexthop */ __u8 weight_high; __u16 resvd2; }; Keeping the fields split this way was chosen in case an existing userspace makes assumptions about the width of the weight field, and to sidestep any endianness issues. The weight field is currently encoded as the weight value minus one, because weight of 0 is invalid. This same trick is impossible for the new weight_high field, because zero must mean actual zero. With this in place: - Old userspace is guaranteed to carry weight_high of 0, therefore configuring 8-bit weights as appropriate. When dumping nexthops with 16-bit weight, it would only show the lower 8 bits. But configuring such nexthops implies existence of userspace aware of the extension in the first place. - New userspace talking to an old kernel will work as long as it only attempts to configure 8-bit weights, where the high-order bits are zero. Old kernel will bounce attempts at configuring >8-bit weights. Renaming reserved fields as they are allocated for some purpose is commonly done in Linux. Whoever touches a reserved field is doing so at their own risk. nexthop_grp::resvd1 in particular is currently used by at least strace, however they carry an own copy of UAPI headers, and the conversion should be trivial. A helper is provided for decoding the weight out of the two fields. Forcing a conversion seems preferable to bending backwards and introducing anonymous unions or whatever. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/483e2fcf4beb0d9135d62e7d27b46fa2685479d4.1723036486.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/nexthop.h | 4 ++-- include/uapi/linux/nexthop.h | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 68463aebcc05..d9fb44e8b321 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -105,7 +105,7 @@ struct nh_grp_entry_stats { struct nh_grp_entry { struct nexthop *nh; struct nh_grp_entry_stats __percpu *stats; - u8 weight; + u16 weight; union { struct { @@ -192,7 +192,7 @@ struct nh_notifier_single_info { }; struct nh_notifier_grp_entry_info { - u8 weight; + u16 weight; struct nh_notifier_single_info nh; }; diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index f4f060a87cc2..bc49baf4a267 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -16,10 +16,15 @@ struct nhmsg { struct nexthop_grp { __u32 id; /* nexthop id - must exist */ __u8 weight; /* weight of this nexthop */ - __u8 resvd1; + __u8 weight_high; /* high order bits of weight */ __u16 resvd2; }; +static inline __u16 nexthop_grp_weight(const struct nexthop_grp *entry) +{ + return ((entry->weight_high << 8) | entry->weight) + 1; +} + enum { NEXTHOP_GRP_TYPE_MPATH, /* hash-threshold nexthop group * default type if not specified -- cgit v1.2.3 From 1da91ea87aefe2c25b68c9f96947a9271ba6325d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 31 May 2024 14:12:01 -0400 Subject: introduce fd_file(), convert all accessors to it. For any changes of struct fd representation we need to turn existing accesses to fields into calls of wrappers. Accesses to struct fd::flags are very few (3 in linux/file.h, 1 in net/socket.c, 3 in fs/overlayfs/file.c and 3 more in explicit initializers). Those can be dealt with in the commit converting to new layout; accesses to struct fd::file are too many for that. This commit converts (almost) all of f.file to fd_file(f). It's not entirely mechanical ('file' is used as a member name more than just in struct fd) and it does not even attempt to distinguish the uses in pointer context from those in boolean context; the latter will be eventually turned into a separate helper (fd_empty()). NOTE: mass conversion to fd_empty(), tempting as it might be, is a bad idea; better do that piecewise in commit that convert from fdget...() to CLASS(...). [conflicts in fs/fhandle.c, kernel/bpf/syscall.c, mm/memcontrol.c caught by git; fs/stat.c one got caught by git grep] [fs/xattr.c conflict] Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/cleanup.h | 2 +- include/linux/file.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index d9e613803df1..a3d3e888cf1f 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -98,7 +98,7 @@ const volatile void * __must_check_fn(const volatile void *val) * DEFINE_CLASS(fdget, struct fd, fdput(_T), fdget(fd), int fd) * * CLASS(fdget, f)(fd); - * if (!f.file) + * if (!fd_file(f)) * return -EBADF; * * // use 'f' without concern diff --git a/include/linux/file.h b/include/linux/file.h index 237931f20739..0f3f369f2450 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -42,10 +42,12 @@ struct fd { #define FDPUT_FPUT 1 #define FDPUT_POS_UNLOCK 2 +#define fd_file(f) ((f).file) + static inline void fdput(struct fd fd) { if (fd.flags & FDPUT_FPUT) - fput(fd.file); + fput(fd_file(fd)); } extern struct file *fget(unsigned int fd); @@ -79,7 +81,7 @@ static inline struct fd fdget_pos(int fd) static inline void fdput_pos(struct fd f) { if (f.flags & FDPUT_POS_UNLOCK) - __f_unlock_pos(f.file); + __f_unlock_pos(fd_file(f)); fdput(f); } -- cgit v1.2.3 From 88a2f6468d013ca1163490dbddfc95135d1c27a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 31 May 2024 15:45:12 -0400 Subject: struct fd: representation change We want the compiler to see that fdput() on empty instance is a no-op. The emptiness check is that file reference is NULL, while fdput() is "fput() if FDPUT_FPUT is present in flags". The reason why fdput() on empty instance is a no-op is something compiler can't see - it's that we never generate instances with NULL file reference combined with non-zero flags. It's not that hard to deal with - the real primitives behind fdget() et.al. are returning an unsigned long value, unpacked by (inlined) __to_fd() into the current struct file * + int. The lower bits are used to store flags, while the rest encodes the pointer. Linus suggested that keeping this unsigned long around with the extractions done by inlined accessors should generate a sane code and that turns out to be the case. Namely, turning struct fd into a struct-wrapped unsinged long, with fd_empty(f) => unlikely(f.word == 0) fd_file(f) => (struct file *)(f.word & ~3) fdput(f) => if (f.word & 1) fput(fd_file(f)) ends up with compiler doing the right thing. The cost is the patch footprint, of course - we need to switch f.file to fd_file(f) all over the tree, and it's not doable with simple search and replace; there are false positives, etc. Note that the sole member of that structure is an opaque unsigned long - all accesses should be done via wrappers and I don't want to use a name that would invite manual casts to file pointers, etc. The value of that member is equal either to (unsigned long)p | flags, p being an address of some struct file instance, or to 0 for an empty fd. For now the new predicate (fd_empty(f)) has no users; all the existing checks have form (!fd_file(f)). We will convert to fd_empty() use later; here we only define it (and tell the compiler that it's unlikely to return true). This commit only deals with representation change; there will be followups. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/file.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/file.h b/include/linux/file.h index 0f3f369f2450..eb28469b1c16 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -35,18 +35,28 @@ static inline void fput_light(struct file *file, int fput_needed) fput(file); } +/* either a reference to struct file + flags + * (cloned vs. borrowed, pos locked), with + * flags stored in lower bits of value, + * or empty (represented by 0). + */ struct fd { - struct file *file; - unsigned int flags; + unsigned long word; }; #define FDPUT_FPUT 1 #define FDPUT_POS_UNLOCK 2 -#define fd_file(f) ((f).file) +#define fd_file(f) ((struct file *)((f).word & ~(FDPUT_FPUT|FDPUT_POS_UNLOCK))) +static inline bool fd_empty(struct fd f) +{ + return unlikely(!f.word); +} + +#define EMPTY_FD (struct fd){0} static inline void fdput(struct fd fd) { - if (fd.flags & FDPUT_FPUT) + if (fd.word & FDPUT_FPUT) fput(fd_file(fd)); } @@ -60,7 +70,7 @@ extern void __f_unlock_pos(struct file *); static inline struct fd __to_fd(unsigned long v) { - return (struct fd){(struct file *)(v & ~3),v & 3}; + return (struct fd){v}; } static inline struct fd fdget(unsigned int fd) @@ -80,7 +90,7 @@ static inline struct fd fdget_pos(int fd) static inline void fdput_pos(struct fd f) { - if (f.flags & FDPUT_POS_UNLOCK) + if (f.word & FDPUT_POS_UNLOCK) __f_unlock_pos(fd_file(f)); fdput(f); } -- cgit v1.2.3 From de12c3391bce10504c0e7bd767516c74110cfce1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 31 May 2024 16:34:25 -0400 Subject: add struct fd constructors, get rid of __to_fd() Make __fdget() et.al. return struct fd directly. New helpers: BORROWED_FD(file) and CLONED_FD(file), for borrowed and cloned file references resp. NOTE: this might need tuning; in particular, inline on __fget_light() is there to keep the code generation same as before - we probably want to keep it inlined in fdget() et.al. (especially so in fdget_pos()), but that needs profiling. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/file.h | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/file.h b/include/linux/file.h index eb28469b1c16..aab1caff6713 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -53,6 +53,14 @@ static inline bool fd_empty(struct fd f) } #define EMPTY_FD (struct fd){0} +static inline struct fd BORROWED_FD(struct file *f) +{ + return (struct fd){(unsigned long)f}; +} +static inline struct fd CLONED_FD(struct file *f) +{ + return (struct fd){(unsigned long)f | FDPUT_FPUT}; +} static inline void fdput(struct fd fd) { @@ -63,30 +71,11 @@ static inline void fdput(struct fd fd) extern struct file *fget(unsigned int fd); extern struct file *fget_raw(unsigned int fd); extern struct file *fget_task(struct task_struct *task, unsigned int fd); -extern unsigned long __fdget(unsigned int fd); -extern unsigned long __fdget_raw(unsigned int fd); -extern unsigned long __fdget_pos(unsigned int fd); extern void __f_unlock_pos(struct file *); -static inline struct fd __to_fd(unsigned long v) -{ - return (struct fd){v}; -} - -static inline struct fd fdget(unsigned int fd) -{ - return __to_fd(__fdget(fd)); -} - -static inline struct fd fdget_raw(unsigned int fd) -{ - return __to_fd(__fdget_raw(fd)); -} - -static inline struct fd fdget_pos(int fd) -{ - return __to_fd(__fdget_pos(fd)); -} +struct fd fdget(unsigned int fd); +struct fd fdget_raw(unsigned int fd); +struct fd fdget_pos(unsigned int fd); static inline void fdput_pos(struct fd f) { -- cgit v1.2.3 From b9d104465a6c2f1bb27ae247bd90b4ab079a1699 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Sun, 11 Aug 2024 17:37:56 +0300 Subject: scsi: ufs: Prepare to add HCI capabilities sysfs Prepare so we'll be able to read various other HCI registers. While at it, fix the HCPID & HCMID register names to stand for what they really are. Also replace the pm_runtime_{get/put}_sync() calls in auto_hibern8_show to ufshcd_rpm_{get/put}_sync() as any host controller register reads should. Reviewed-by: Keoseong Park Reviewed-by: Bart Van Assche Reviewed-by: Bean Huo Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20240811143757.2538212-2-avri.altman@wdc.com Signed-off-by: Martin K. Petersen --- include/ufs/ufshci.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index 38fe97971a65..194e3655902e 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -25,8 +25,9 @@ enum { REG_CONTROLLER_CAPABILITIES = 0x00, REG_MCQCAP = 0x04, REG_UFS_VERSION = 0x08, - REG_CONTROLLER_DEV_ID = 0x10, - REG_CONTROLLER_PROD_ID = 0x14, + REG_EXT_CONTROLLER_CAPABILITIES = 0x0C, + REG_CONTROLLER_PID = 0x10, + REG_CONTROLLER_MID = 0x14, REG_AUTO_HIBERNATE_IDLE_TIMER = 0x18, REG_INTERRUPT_STATUS = 0x20, REG_INTERRUPT_ENABLE = 0x24, -- cgit v1.2.3 From 12dbc67c3b0bcd7414b511fd8a42ba4e388705bc Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Wed, 7 Aug 2024 21:45:28 +0800 Subject: net: stmmac: Move the atds flag to the stmmac_dma_cfg structure ATDS (Alternate Descriptor Size) is a part of the DMA Bus Mode configs (together with PBL, ALL, EME, etc) of the DW GMAC controllers. Seeing it's not changed at runtime but is activated as long as the IP-core has it supported (at least due to the Type 2 Full Checksum Offload Engine feature), move the respective parameter from the stmmac_dma_ops::init() callback argument to the stmmac_dma_cfg structure, which already have the rest of the DMA-related configs defined. Besides the being added in the next commit DW GMAC multi-channels support will require to add the stmmac_dma_ops::init_chan() callback and have the ATDS flag set/cleared for each channel in there. Having the atds-flag in the stmmac_dma_cfg structure will make the parameter accessible from stmmac_dma_ops::init_chan() callback too. Signed-off-by: Feiyang Chen Signed-off-by: Yinggang Gu Reviewed-by: Serge Semin Acked-by: Huacai Chen Signed-off-by: Yanteng Si Tested-by: Serge Semin Signed-off-by: Paolo Abeni --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 84e13bd5df28..338991c08f00 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -100,6 +100,7 @@ struct stmmac_dma_cfg { bool eame; bool multi_msi_en; bool dche; + bool atds; }; #define AXI_BLEN 7 -- cgit v1.2.3 From c343e66ed009fdb6206787558130ccbd504ffc12 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 10 Aug 2024 22:52:17 +0200 Subject: usb: gadget: configfs: Make check_user_usb_string() static "linux/usb/gadget_configfs.h" is only included in "drivers/usb/gadget/configfs.c", so there is no need to declare a function in the header file. it is only used in this .c file. It's better to have it static. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/958cb49dca1bff4254a3492c018efbf3b01918b4.1723323107.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/gadget_configfs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/usb/gadget_configfs.h b/include/linux/usb/gadget_configfs.h index d61aebd68128..6a552dd4dec9 100644 --- a/include/linux/usb/gadget_configfs.h +++ b/include/linux/usb/gadget_configfs.h @@ -4,9 +4,6 @@ #include -int check_user_usb_string(const char *name, - struct usb_gadget_strings *stringtab_dev); - #define GS_STRINGS_W(__struct, __name) \ static ssize_t __struct##_##__name##_store(struct config_item *item, \ const char *page, size_t len) \ -- cgit v1.2.3 From d1e14e06810a96ef0cdabf572513594031d4dad6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 10 Aug 2024 23:05:46 +0200 Subject: usb: gadget: configfs: Constify struct config_item_type 'struct config_item_type' is not modified in this file. Apparently, these structures are only used with config_group_init_type_name() which takes a const struct config_item_type* as a 3rd argument. Constifying this structure moves some data to a read-only section, so increase overall security, especially when the structure holds some function pointers. On a x86_64, with allmodconfig: Before: ====== text data bss dec hex filename 40834 5112 64 46010 b3ba drivers/usb/gadget/configfs.o After: ===== text data bss dec hex filename 41218 4728 64 46010 b3ba drivers/usb/gadget/configfs.o Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/513223e97082e1bb758e36d55c175ec9ea34a71c.1723323896.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/gadget_configfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/usb/gadget_configfs.h b/include/linux/usb/gadget_configfs.h index 6a552dd4dec9..6b5d6838f865 100644 --- a/include/linux/usb/gadget_configfs.h +++ b/include/linux/usb/gadget_configfs.h @@ -34,7 +34,7 @@ static struct configfs_item_operations struct_in##_langid_item_ops = { \ .release = struct_in##_attr_release, \ }; \ \ -static struct config_item_type struct_in##_langid_type = { \ +static const struct config_item_type struct_in##_langid_type = { \ .ct_item_ops = &struct_in##_langid_item_ops, \ .ct_attrs = struct_in##_langid_attrs, \ .ct_owner = THIS_MODULE, \ @@ -91,7 +91,7 @@ static struct configfs_group_operations struct_in##_strings_ops = { \ .drop_item = &struct_in##_strings_drop, \ }; \ \ -static struct config_item_type struct_in##_strings_type = { \ +static const struct config_item_type struct_in##_strings_type = { \ .ct_group_ops = &struct_in##_strings_ops, \ .ct_owner = THIS_MODULE, \ } -- cgit v1.2.3 From c26cee817f8bd9a22bfade20f739ec2fc6f20221 Mon Sep 17 00:00:00 2001 From: David Sands Date: Sat, 10 Aug 2024 20:00:05 -0400 Subject: usb: gadget: f_fs: add capability for dfu functional descriptor Add the ability for the USB FunctionFS (FFS) gadget driver to be able to create Device Firmware Upgrade (DFU) functional descriptors. [1] This patch allows implementation of DFU in userspace using the FFS gadget. The DFU protocol uses the control pipe (ep0) for all messaging so only the addition of the DFU functional descriptor is needed in the kernel driver. The DFU functional descriptor is written to the ep0 file along with any other descriptors during FFS setup. DFU requires an interface descriptor followed by the DFU functional descriptor. This patch includes documentation of the added descriptor for DFU and conversion of some existing documentation to kernel-doc format so that it can be included in the generated docs. An implementation of DFU 1.1 that implements just the runtime descriptor using the FunctionFS gadget (with rebooting into u-boot for DFU mode) has been tested on an i.MX8 Nano. An implementation of DFU 1.1 that implements both runtime and DFU mode using the FunctionFS gadget has been tested on Xilinx Zynq UltraScale+. Note that for the best performance of firmware update file transfers, the userspace program should respond as quick as possible to the setup packets. [1] https://www.usb.org/sites/default/files/DFU_1.1.pdf Signed-off-by: David Sands Co-developed-by: Chris Wulff Signed-off-by: Chris Wulff Link: https://lore.kernel.org/r/20240811000004.1395888-2-crwulff@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/ch9.h | 8 ++- include/uapi/linux/usb/functionfs.h | 97 ++++++++++++++++++++++++++++++++++--- 2 files changed, 95 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 44d73ba8788d..91f0f7e214a5 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -254,6 +254,9 @@ struct usb_ctrlrequest { #define USB_DT_DEVICE_CAPABILITY 0x10 #define USB_DT_WIRELESS_ENDPOINT_COMP 0x11 #define USB_DT_WIRE_ADAPTER 0x21 +/* From USB Device Firmware Upgrade Specification, Revision 1.1 */ +#define USB_DT_DFU_FUNCTIONAL 0x21 +/* these are from the Wireless USB spec */ #define USB_DT_RPIPE 0x22 #define USB_DT_CS_RADIO_CONTROL 0x23 /* From the T10 UAS specification */ @@ -329,9 +332,10 @@ struct usb_device_descriptor { #define USB_CLASS_USB_TYPE_C_BRIDGE 0x12 #define USB_CLASS_MISC 0xef #define USB_CLASS_APP_SPEC 0xfe -#define USB_CLASS_VENDOR_SPEC 0xff +#define USB_SUBCLASS_DFU 0x01 -#define USB_SUBCLASS_VENDOR_SPEC 0xff +#define USB_CLASS_VENDOR_SPEC 0xff +#define USB_SUBCLASS_VENDOR_SPEC 0xff /*-------------------------------------------------------------------------*/ diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h index 9f88de9c3d66..2ebdba111a8f 100644 --- a/include/uapi/linux/usb/functionfs.h +++ b/include/uapi/linux/usb/functionfs.h @@ -3,6 +3,7 @@ #define _UAPI__LINUX_FUNCTIONFS_H__ +#include #include #include @@ -37,6 +38,31 @@ struct usb_endpoint_descriptor_no_audio { __u8 bInterval; } __attribute__((packed)); +/** + * struct usb_dfu_functional_descriptor - DFU Functional descriptor + * @bLength: Size of the descriptor (bytes) + * @bDescriptorType: USB_DT_DFU_FUNCTIONAL + * @bmAttributes: DFU attributes + * @wDetachTimeOut: Maximum time to wait after DFU_DETACH (ms, le16) + * @wTransferSize: Maximum number of bytes per control-write (le16) + * @bcdDFUVersion: DFU Spec version (BCD, le16) + */ +struct usb_dfu_functional_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __u8 bmAttributes; + __le16 wDetachTimeOut; + __le16 wTransferSize; + __le16 bcdDFUVersion; +} __attribute__ ((packed)); + +/* from DFU functional descriptor bmAttributes */ +#define DFU_FUNC_ATT_CAN_DOWNLOAD _BITUL(0) +#define DFU_FUNC_ATT_CAN_UPLOAD _BITUL(1) +#define DFU_FUNC_ATT_MANIFEST_TOLERANT _BITUL(2) +#define DFU_FUNC_ATT_WILL_DETACH _BITUL(3) + + struct usb_functionfs_descs_head_v2 { __le32 magic; __le32 length; @@ -104,23 +130,38 @@ struct usb_ffs_dmabuf_transfer_req { #ifndef __KERNEL__ -/* +/** + * DOC: descriptors + * * Descriptors format: * + * +-----+-----------+--------------+--------------------------------------+ * | off | name | type | description | - * |-----+-----------+--------------+--------------------------------------| + * +-----+-----------+--------------+--------------------------------------+ * | 0 | magic | LE32 | FUNCTIONFS_DESCRIPTORS_MAGIC_V2 | + * +-----+-----------+--------------+--------------------------------------+ * | 4 | length | LE32 | length of the whole data chunk | + * +-----+-----------+--------------+--------------------------------------+ * | 8 | flags | LE32 | combination of functionfs_flags | + * +-----+-----------+--------------+--------------------------------------+ * | | eventfd | LE32 | eventfd file descriptor | + * +-----+-----------+--------------+--------------------------------------+ * | | fs_count | LE32 | number of full-speed descriptors | + * +-----+-----------+--------------+--------------------------------------+ * | | hs_count | LE32 | number of high-speed descriptors | + * +-----+-----------+--------------+--------------------------------------+ * | | ss_count | LE32 | number of super-speed descriptors | + * +-----+-----------+--------------+--------------------------------------+ * | | os_count | LE32 | number of MS OS descriptors | + * +-----+-----------+--------------+--------------------------------------+ * | | fs_descrs | Descriptor[] | list of full-speed descriptors | + * +-----+-----------+--------------+--------------------------------------+ * | | hs_descrs | Descriptor[] | list of high-speed descriptors | + * +-----+-----------+--------------+--------------------------------------+ * | | ss_descrs | Descriptor[] | list of super-speed descriptors | + * +-----+-----------+--------------+--------------------------------------+ * | | os_descrs | OSDesc[] | list of MS OS descriptors | + * +-----+-----------+--------------+--------------------------------------+ * * Depending on which flags are set, various fields may be missing in the * structure. Any flags that are not recognised cause the whole block to be @@ -128,71 +169,111 @@ struct usb_ffs_dmabuf_transfer_req { * * Legacy descriptors format (deprecated as of 3.14): * + * +-----+-----------+--------------+--------------------------------------+ * | off | name | type | description | - * |-----+-----------+--------------+--------------------------------------| + * +-----+-----------+--------------+--------------------------------------+ * | 0 | magic | LE32 | FUNCTIONFS_DESCRIPTORS_MAGIC | + * +-----+-----------+--------------+--------------------------------------+ * | 4 | length | LE32 | length of the whole data chunk | + * +-----+-----------+--------------+--------------------------------------+ * | 8 | fs_count | LE32 | number of full-speed descriptors | + * +-----+-----------+--------------+--------------------------------------+ * | 12 | hs_count | LE32 | number of high-speed descriptors | + * +-----+-----------+--------------+--------------------------------------+ * | 16 | fs_descrs | Descriptor[] | list of full-speed descriptors | + * +-----+-----------+--------------+--------------------------------------+ * | | hs_descrs | Descriptor[] | list of high-speed descriptors | + * +-----+-----------+--------------+--------------------------------------+ * * All numbers must be in little endian order. * * Descriptor[] is an array of valid USB descriptors which have the following * format: * + * +-----+-----------------+------+--------------------------+ * | off | name | type | description | - * |-----+-----------------+------+--------------------------| + * +-----+-----------------+------+--------------------------+ * | 0 | bLength | U8 | length of the descriptor | + * +-----+-----------------+------+--------------------------+ * | 1 | bDescriptorType | U8 | descriptor type | + * +-----+-----------------+------+--------------------------+ * | 2 | payload | | descriptor's payload | + * +-----+-----------------+------+--------------------------+ * * OSDesc[] is an array of valid MS OS Feature Descriptors which have one of * the following formats: * + * +-----+-----------------+------+--------------------------+ * | off | name | type | description | - * |-----+-----------------+------+--------------------------| + * +-----+-----------------+------+--------------------------+ * | 0 | inteface | U8 | related interface number | + * +-----+-----------------+------+--------------------------+ * | 1 | dwLength | U32 | length of the descriptor | + * +-----+-----------------+------+--------------------------+ * | 5 | bcdVersion | U16 | currently supported: 1 | + * +-----+-----------------+------+--------------------------+ * | 7 | wIndex | U16 | currently supported: 4 | + * +-----+-----------------+------+--------------------------+ * | 9 | bCount | U8 | number of ext. compat. | + * +-----+-----------------+------+--------------------------+ * | 10 | Reserved | U8 | 0 | + * +-----+-----------------+------+--------------------------+ * | 11 | ExtCompat[] | | list of ext. compat. d. | + * +-----+-----------------+------+--------------------------+ * + * +-----+-----------------+------+--------------------------+ * | off | name | type | description | - * |-----+-----------------+------+--------------------------| + * +-----+-----------------+------+--------------------------+ * | 0 | inteface | U8 | related interface number | + * +-----+-----------------+------+--------------------------+ * | 1 | dwLength | U32 | length of the descriptor | + * +-----+-----------------+------+--------------------------+ * | 5 | bcdVersion | U16 | currently supported: 1 | + * +-----+-----------------+------+--------------------------+ * | 7 | wIndex | U16 | currently supported: 5 | + * +-----+-----------------+------+--------------------------+ * | 9 | wCount | U16 | number of ext. compat. | + * +-----+-----------------+------+--------------------------+ * | 11 | ExtProp[] | | list of ext. prop. d. | + * +-----+-----------------+------+--------------------------+ * * ExtCompat[] is an array of valid Extended Compatiblity descriptors * which have the following format: * + * +-----+-----------------------+------+-------------------------------------+ * | off | name | type | description | - * |-----+-----------------------+------+-------------------------------------| + * +-----+-----------------------+------+-------------------------------------+ * | 0 | bFirstInterfaceNumber | U8 | index of the interface or of the 1st| + * +-----+-----------------------+------+-------------------------------------+ * | | | | interface in an IAD group | + * +-----+-----------------------+------+-------------------------------------+ * | 1 | Reserved | U8 | 1 | + * +-----+-----------------------+------+-------------------------------------+ * | 2 | CompatibleID | U8[8]| compatible ID string | + * +-----+-----------------------+------+-------------------------------------+ * | 10 | SubCompatibleID | U8[8]| subcompatible ID string | + * +-----+-----------------------+------+-------------------------------------+ * | 18 | Reserved | U8[6]| 0 | + * +-----+-----------------------+------+-------------------------------------+ * * ExtProp[] is an array of valid Extended Properties descriptors * which have the following format: * + * +-----+-----------------------+------+-------------------------------------+ * | off | name | type | description | - * |-----+-----------------------+------+-------------------------------------| + * +-----+-----------------------+------+-------------------------------------+ * | 0 | dwSize | U32 | length of the descriptor | + * +-----+-----------------------+------+-------------------------------------+ * | 4 | dwPropertyDataType | U32 | 1..7 | + * +-----+-----------------------+------+-------------------------------------+ * | 8 | wPropertyNameLength | U16 | bPropertyName length (NL) | + * +-----+-----------------------+------+-------------------------------------+ * | 10 | bPropertyName |U8[NL]| name of this property | + * +-----+-----------------------+------+-------------------------------------+ * |10+NL| dwPropertyDataLength | U32 | bPropertyData length (DL) | + * +-----+-----------------------+------+-------------------------------------+ * |14+NL| bProperty |U8[DL]| payload of this property | + * +-----+-----------------------+------+-------------------------------------+ */ struct usb_functionfs_strings_head { -- cgit v1.2.3 From 92567a5f92bc947fb7aa4351979db1b7b71a554c Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 8 Aug 2024 22:06:19 +0800 Subject: iommu: Remove unused declaration iommu_sva_unbind_gpasid() Commit 0c9f17877891 ("iommu: Remove guest pasid related interfaces and definitions") removed the implementation but leave declaration. Signed-off-by: Yue Haibing Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240808140619.2498535-1-yuehaibing@huawei.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 4d47f2c33311..04cbdae0052e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -795,8 +795,6 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); -extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, -- cgit v1.2.3 From 1ef33652d22c69a2a7519d003cd6c79b8e514f44 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 8 Aug 2024 05:25:07 -0700 Subject: net: netpoll: extract core of netpoll_cleanup Extract the core part of netpoll_cleanup(), so, it could be called from a caller that has the rtnl lock already. Netconsole uses this in a weird way right now: __netpoll_cleanup(&nt->np); spin_lock_irqsave(&target_list_lock, flags); netdev_put(nt->np.dev, &nt->np.dev_tracker); nt->np.dev = NULL; nt->enabled = false; This will be replaced by do_netpoll_cleanup() as the locking situation is overhauled. Signed-off-by: Breno Leitao Reviewed-by: Rik van Riel Signed-off-by: Paolo Abeni --- include/linux/netpoll.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index bd19c4b91e31..cd4e28db0cbd 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -64,6 +64,7 @@ int netpoll_setup(struct netpoll *np); void __netpoll_cleanup(struct netpoll *np); void __netpoll_free(struct netpoll *np); void netpoll_cleanup(struct netpoll *np); +void do_netpoll_cleanup(struct netpoll *np); netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb); #ifdef CONFIG_NETPOLL -- cgit v1.2.3 From 844efaef48e846b5310b0e7af5e7578762eb7e8d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 22 Apr 2024 17:33:38 +0300 Subject: drm: fixed: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Signed-off-by: Andy Shevchenko Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240422143338.2026791-1-andriy.shevchenko@linux.intel.com Signed-off-by: Jani Nikula --- include/drm/drm_fixed.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index ef8bc8d72039..1922188f00e8 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -25,8 +25,9 @@ #ifndef DRM_FIXED_H #define DRM_FIXED_H -#include #include +#include +#include typedef union dfixed { u32 full; -- cgit v1.2.3 From 58a63729c957621f1990c3494c702711188ca347 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 9 Aug 2024 08:58:58 -0700 Subject: net: mana: Fix doorbell out of order violation and avoid unnecessary doorbell rings After napi_complete_done() is called when NAPI is polling in the current process context, another NAPI may be scheduled and start running in softirq on another CPU and may ring the doorbell before the current CPU does. When combined with unnecessary rings when there is no need to arm the CQ, it triggers error paths in the hardware. This patch fixes this by calling napi_complete_done() after doorbell rings. It limits the number of unnecessary rings when there is no need to arm. MANA hardware specifies that there must be one doorbell ring every 8 CQ wraparounds. This driver guarantees one doorbell ring as soon as the number of consumed CQEs exceeds 4 CQ wraparounds. In practical workloads, the 4 CQ wraparounds proves to be big enough that it rarely exceeds this limit before all the napi weight is consumed. To implement this, add a per-CQ counter cq->work_done_since_doorbell, and make sure the CQ is armed as soon as passing 4 wraparounds of the CQ. Cc: stable@vger.kernel.org Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ") Reviewed-by: Haiyang Zhang Signed-off-by: Long Li Link: https://patch.msgid.link/1723219138-29887-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Paolo Abeni --- include/net/mana/mana.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 6439fd8b437b..7caa334f4888 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -275,6 +275,7 @@ struct mana_cq { /* NAPI data */ struct napi_struct napi; int work_done; + int work_done_since_doorbell; int budget; }; -- cgit v1.2.3 From 779bac9994452f6a894524f70c00cfb0cd4b6364 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 12 Aug 2024 15:08:04 +0200 Subject: Revert "ACPI: EC: Evaluate orphan _REG under EC device" This reverts commit 0e6b6dedf168 ("Revert "ACPI: EC: Evaluate orphan _REG under EC device") because the problem addressed by it will be addressed differently in what follows. Signed-off-by: Rafael J. Wysocki Reviewed-by: Hans de Goede Cc: All applicable Link: https://patch.msgid.link/3236716.5fSG56mABF@rjwysocki.net --- include/acpi/acpixf.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 80dc36f9d527..94d0fc3bd412 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -662,10 +662,6 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id)) -ACPI_EXTERNAL_RETURN_STATUS(acpi_status - acpi_execute_orphan_reg_method(acpi_handle device, - acpi_adr_space_type - space_id)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_remove_address_space_handler(acpi_handle device, -- cgit v1.2.3 From cdf65d73e001fde600b18d7e45afadf559425ce5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 12 Aug 2024 15:11:42 +0200 Subject: ACPICA: Add a depth argument to acpi_execute_reg_methods() A subsequent change will need to pass a depth argument to acpi_execute_reg_methods(), so prepare that function for it. No intentional functional changes. Signed-off-by: Rafael J. Wysocki Reviewed-by: Hans de Goede Cc: All applicable Link: https://patch.msgid.link/8451567.NyiUUSuA9g@rjwysocki.net --- include/acpi/acpixf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 94d0fc3bd412..9f1c1d225e32 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -660,6 +660,7 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status void *context)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_execute_reg_methods(acpi_handle device, + u32 nax_depth, acpi_adr_space_type space_id)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status -- cgit v1.2.3 From dde286ee57704226b500cb9eb59547fec07aad3d Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Thu, 8 Aug 2024 15:36:28 +0300 Subject: regmap: Allow setting IRQ domain name suffix When multiple IRQ domains are created from the same device-tree node they will get the same name based on the device-tree path. This will cause a naming collision in debugFS when IRQ domain specific entries are created. The regmap-IRQ creates per instance IRQ domains. This will lead to a domain name conflict when a device which provides more than one interrupt line uses the regmap-IRQ. Add support for specifying an IRQ domain name suffix when creating a regmap-IRQ controller. Signed-off-by: Matti Vaittinen Link: https://patch.msgid.link/776bc4996969e5081bcf61b9bdb5517e537147a3.1723120028.git.mazziesaccount@gmail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 122e38161acb..f9ccad32fc5c 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1521,6 +1521,9 @@ struct regmap_irq_chip_data; * struct regmap_irq_chip - Description of a generic regmap irq_chip. * * @name: Descriptive name for IRQ controller. + * @domain_suffix: Name suffix to be appended to end of IRQ domain name. Needed + * when multiple regmap-IRQ controllers are created from same + * device. * * @main_status: Base main status register address. For chips which have * interrupts arranged in separate sub-irq blocks with own IRQ @@ -1606,6 +1609,7 @@ struct regmap_irq_chip_data; */ struct regmap_irq_chip { const char *name; + const char *domain_suffix; unsigned int main_status; unsigned int num_main_status_bits; -- cgit v1.2.3 From 2a0629834cd82f05d424bbc193374f9a43d1f87d Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Fri, 9 Aug 2024 11:16:28 +0800 Subject: vfs: Don't evict inode under the inode lru traversing context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The inode reclaiming process(See function prune_icache_sb) collects all reclaimable inodes and mark them with I_FREEING flag at first, at that time, other processes will be stuck if they try getting these inodes (See function find_inode_fast), then the reclaiming process destroy the inodes by function dispose_list(). Some filesystems(eg. ext4 with ea_inode feature, ubifs with xattr) may do inode lookup in the inode evicting callback function, if the inode lookup is operated under the inode lru traversing context, deadlock problems may happen. Case 1: In function ext4_evict_inode(), the ea inode lookup could happen if ea_inode feature is enabled, the lookup process will be stuck under the evicting context like this: 1. File A has inode i_reg and an ea inode i_ea 2. getfattr(A, xattr_buf) // i_ea is added into lru // lru->i_ea 3. Then, following three processes running like this: PA PB echo 2 > /proc/sys/vm/drop_caches shrink_slab prune_dcache_sb // i_reg is added into lru, lru->i_ea->i_reg prune_icache_sb list_lru_walk_one inode_lru_isolate i_ea->i_state |= I_FREEING // set inode state inode_lru_isolate __iget(i_reg) spin_unlock(&i_reg->i_lock) spin_unlock(lru_lock) rm file A i_reg->nlink = 0 iput(i_reg) // i_reg->nlink is 0, do evict ext4_evict_inode ext4_xattr_delete_inode ext4_xattr_inode_dec_ref_all ext4_xattr_inode_iget ext4_iget(i_ea->i_ino) iget_locked find_inode_fast __wait_on_freeing_inode(i_ea) ----→ AA deadlock dispose_list // cannot be executed by prune_icache_sb wake_up_bit(&i_ea->i_state) Case 2: In deleted inode writing function ubifs_jnl_write_inode(), file deleting process holds BASEHD's wbuf->io_mutex while getting the xattr inode, which could race with inode reclaiming process(The reclaiming process could try locking BASEHD's wbuf->io_mutex in inode evicting function), then an ABBA deadlock problem would happen as following: 1. File A has inode ia and a xattr(with inode ixa), regular file B has inode ib and a xattr. 2. getfattr(A, xattr_buf) // ixa is added into lru // lru->ixa 3. Then, following three processes running like this: PA PB PC echo 2 > /proc/sys/vm/drop_caches shrink_slab prune_dcache_sb // ib and ia are added into lru, lru->ixa->ib->ia prune_icache_sb list_lru_walk_one inode_lru_isolate ixa->i_state |= I_FREEING // set inode state inode_lru_isolate __iget(ib) spin_unlock(&ib->i_lock) spin_unlock(lru_lock) rm file B ib->nlink = 0 rm file A iput(ia) ubifs_evict_inode(ia) ubifs_jnl_delete_inode(ia) ubifs_jnl_write_inode(ia) make_reservation(BASEHD) // Lock wbuf->io_mutex ubifs_iget(ixa->i_ino) iget_locked find_inode_fast __wait_on_freeing_inode(ixa) | iput(ib) // ib->nlink is 0, do evict | ubifs_evict_inode | ubifs_jnl_delete_inode(ib) ↓ ubifs_jnl_write_inode ABBA deadlock ←-----make_reservation(BASEHD) dispose_list // cannot be executed by prune_icache_sb wake_up_bit(&ixa->i_state) Fix the possible deadlock by using new inode state flag I_LRU_ISOLATING to pin the inode in memory while inode_lru_isolate() reclaims its pages instead of using ordinary inode reference. This way inode deletion cannot be triggered from inode_lru_isolate() thus avoiding the deadlock. evict() is made to wait for I_LRU_ISOLATING to be cleared before proceeding with inode cleanup. Link: https://lore.kernel.org/all/37c29c42-7685-d1f0-067d-63582ffac405@huaweicloud.com/ Link: https://bugzilla.kernel.org/show_bug.cgi?id=219022 Fixes: e50e5129f384 ("ext4: xattr-in-inode support") Fixes: 7959cf3a7506 ("ubifs: journal: Handle xattrs like files") Cc: stable@vger.kernel.org Signed-off-by: Zhihao Cheng Link: https://lore.kernel.org/r/20240809031628.1069873-1-chengzhihao@huaweicloud.com Reviewed-by: Jan Kara Suggested-by: Jan Kara Suggested-by: Mateusz Guzik Signed-off-by: Christian Brauner --- include/linux/fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index fd34b5755c0b..fb0426f349fc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2392,6 +2392,9 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. * + * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding + * i_count. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2415,6 +2418,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_DONTCACHE (1 << 16) #define I_SYNC_QUEUED (1 << 17) #define I_PINNING_NETFS_WB (1 << 18) +#define __I_LRU_ISOLATING 19 +#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) -- cgit v1.2.3 From bcc954c6caba01fca143162d5fbb90e46aa1ad80 Mon Sep 17 00:00:00 2001 From: Ryo Takakura Date: Mon, 12 Aug 2024 16:27:03 +0900 Subject: printk/panic: Allow cpu backtraces to be written into ringbuffer during panic commit 779dbc2e78d7 ("printk: Avoid non-panic CPUs writing to ringbuffer") disabled non-panic CPUs to further write messages to ringbuffer after panicked. Since the commit, non-panicked CPU's are not allowed to write to ring buffer after panicked and CPU backtrace which is triggered after panicked to sample non-panicked CPUs' backtrace no longer serves its function as it has nothing to print. Fix the issue by allowing non-panicked CPUs to write into ringbuffer while CPU backtrace is in flight. Fixes: 779dbc2e78d7 ("printk: Avoid non-panic CPUs writing to ringbuffer") Signed-off-by: Ryo Takakura Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240812072703.339690-1-takakura@valinux.co.jp Signed-off-by: Petr Mladek --- include/linux/panic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/panic.h b/include/linux/panic.h index 3130e0b5116b..54d90b6c5f47 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -16,6 +16,7 @@ extern void oops_enter(void); extern void oops_exit(void); extern bool oops_may_print(void); +extern bool panic_triggering_all_cpu_backtrace; extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; -- cgit v1.2.3 From 6c22aadbf6fd0240181eb4897308153c2aabec2a Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 12 Aug 2024 10:28:28 +0200 Subject: drm/fbdev-helper: Remove drm_fb_helper_output_poll_changed() The function is unused. Remove it. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240812083000.337744-8-tzimmermann@suse.de --- include/drm/drm_fb_helper.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index 375737fd6c36..699f2790b9ac 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -271,9 +271,7 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper); int drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper); int drm_fb_helper_debug_enter(struct fb_info *info); int drm_fb_helper_debug_leave(struct fb_info *info); - void drm_fb_helper_lastclose(struct drm_device *dev); -void drm_fb_helper_output_poll_changed(struct drm_device *dev); #else static inline void drm_fb_helper_prepare(struct drm_device *dev, struct drm_fb_helper *helper, @@ -401,10 +399,6 @@ static inline int drm_fb_helper_debug_leave(struct fb_info *info) static inline void drm_fb_helper_lastclose(struct drm_device *dev) { } - -static inline void drm_fb_helper_output_poll_changed(struct drm_device *dev) -{ -} #endif #endif -- cgit v1.2.3 From b5757a5be2fac24f5c138e8ddb3b2c7be8ba1cb3 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 12 Aug 2024 10:28:29 +0200 Subject: drm: Remove struct drm_driver.lastclose The lastclose callback in struct drm_driver is unused. Remove it. Also update documentation. v2: - update to use drm_lastclose() - fix typo in documentation Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240812083000.337744-9-tzimmermann@suse.de --- include/drm/drm_drv.h | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'include') diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index cd37936c3926..02ea4e3248fd 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -228,34 +228,6 @@ struct drm_driver { */ void (*postclose) (struct drm_device *, struct drm_file *); - /** - * @lastclose: - * - * Called when the last &struct drm_file has been closed and there's - * currently no userspace client for the &struct drm_device. - * - * Modern drivers should only use this to force-restore the fbdev - * framebuffer using drm_fb_helper_restore_fbdev_mode_unlocked(). - * Anything else would indicate there's something seriously wrong. - * Modern drivers can also use this to execute delayed power switching - * state changes, e.g. in conjunction with the :ref:`vga_switcheroo` - * infrastructure. - * - * This is called after @postclose hook has been called. - * - * NOTE: - * - * All legacy drivers use this callback to de-initialize the hardware. - * This is purely because of the shadow-attach model, where the DRM - * kernel driver does not really own the hardware. Instead ownershipe is - * handled with the help of userspace through an inheritedly racy dance - * to set/unset the VT into raw mode. - * - * Legacy drivers initialize the hardware in the @firstopen callback, - * which isn't even called for modern drivers. - */ - void (*lastclose) (struct drm_device *); - /** * @unload: * -- cgit v1.2.3 From 446d0f4849b101bfc35c0d00835c3e3a4804616d Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 12 Aug 2024 10:28:30 +0200 Subject: drm: Remove struct drm_mode_config_funcs.output_poll_changed The output_poll_changed hook in struct drm_mode_config_funcs is unused. Remove it. The helper drm_client_dev_hotplug() implements the callback's functionality. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240812083000.337744-10-tzimmermann@suse.de --- include/drm/drm_mode_config.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index ab0f167474b1..271765e2e9f2 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -97,22 +97,6 @@ struct drm_mode_config_funcs { */ const struct drm_format_info *(*get_format_info)(const struct drm_mode_fb_cmd2 *mode_cmd); - /** - * @output_poll_changed: - * - * Callback used by helpers to inform the driver of output configuration - * changes. - * - * Drivers implementing fbdev emulation use drm_kms_helper_hotplug_event() - * to call this hook to inform the fbdev helper of output changes. - * - * This hook is deprecated, drivers should instead implement fbdev - * support with struct drm_client, which takes care of any necessary - * hotplug event forwarding already without further involvement by - * the driver. - */ - void (*output_poll_changed)(struct drm_device *dev); - /** * @mode_valid: * -- cgit v1.2.3 From 5ddb0a8aa8e4754a8fb77e284e0d6f46c2350f88 Mon Sep 17 00:00:00 2001 From: Tejas Vipin Date: Tue, 6 Aug 2024 19:29:48 +0530 Subject: drm/mipi-dsi: add more multi functions for better error handling Add more functions that can benefit from being multi style and mark older variants as deprecated to eventually convert all mipi_dsi functions to multi style. Acked-by: Maxime Ripard Signed-off-by: Tejas Vipin Reviewed-by: Douglas Anderson Acked-by: Jessica Zhang [dianders: Fixed whitespace warning when applying] Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240806135949.468636-2-tejasvipin76@gmail.com --- include/drm/drm_mipi_dsi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 0f520eeeaa8e..b78aae45cae7 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -365,6 +365,16 @@ void mipi_dsi_dcs_set_display_off_multi(struct mipi_dsi_multi_context *ctx); void mipi_dsi_dcs_set_display_on_multi(struct mipi_dsi_multi_context *ctx); void mipi_dsi_dcs_set_tear_on_multi(struct mipi_dsi_multi_context *ctx, enum mipi_dsi_dcs_tear_mode mode); +void mipi_dsi_turn_on_peripheral_multi(struct mipi_dsi_multi_context *ctx); +void mipi_dsi_dcs_soft_reset_multi(struct mipi_dsi_multi_context *ctx); +void mipi_dsi_dcs_set_display_brightness_multi(struct mipi_dsi_multi_context *ctx, + u16 brightness); +void mipi_dsi_dcs_set_pixel_format_multi(struct mipi_dsi_multi_context *ctx, + u8 format); +void mipi_dsi_dcs_set_column_address_multi(struct mipi_dsi_multi_context *ctx, + u16 start, u16 end); +void mipi_dsi_dcs_set_page_address_multi(struct mipi_dsi_multi_context *ctx, + u16 start, u16 end); /** * mipi_dsi_generic_write_seq - transmit data using a generic write packet -- cgit v1.2.3 From ec0a7d44b358afaaf52856d03c72e20587bc888b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 15:28:25 -0700 Subject: workqueue: Add interface for user-defined workqueue lockdep map Add an interface for a user-defined workqueue lockdep map, which is helpful when multiple workqueues are created for the same purpose. This also helps avoid leaking lockdep maps on each workqueue creation. v2: - Add alloc_workqueue_lockdep_map (Tejun) v3: - Drop __WQ_USER_OWNED_LOCKDEP (Tejun) - static inline alloc_ordered_workqueue_lockdep_map (Tejun) Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Matthew Brost Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4eb8f9563136..8ccbf510880b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -507,6 +507,58 @@ void workqueue_softirq_dead(unsigned int cpu); __printf(1, 4) struct workqueue_struct * alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); +#ifdef CONFIG_LOCKDEP +/** + * alloc_workqueue_lockdep_map - allocate a workqueue with user-defined lockdep_map + * @fmt: printf format for the name of the workqueue + * @flags: WQ_* flags + * @max_active: max in-flight work items, 0 for default + * @lockdep_map: user-defined lockdep_map + * @...: args for @fmt + * + * Same as alloc_workqueue but with the a user-define lockdep_map. Useful for + * workqueues created with the same purpose and to avoid leaking a lockdep_map + * on each workqueue creation. + * + * RETURNS: + * Pointer to the allocated workqueue on success, %NULL on failure. + */ +__printf(1, 5) struct workqueue_struct * +alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, + struct lockdep_map *lockdep_map, ...); + +/** + * alloc_ordered_workqueue_lockdep_map - allocate an ordered workqueue with + * user-defined lockdep_map + * + * @fmt: printf format for the name of the workqueue + * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) + * @lockdep_map: user-defined lockdep_map + * @args: args for @fmt + * + * Same as alloc_ordered_workqueue but with the a user-define lockdep_map. + * Useful for workqueues created with the same purpose and to avoid leaking a + * lockdep_map on each workqueue creation. + * + * RETURNS: + * Pointer to the allocated workqueue on success, %NULL on failure. + */ +__printf(1, 4) static inline struct workqueue_struct * +alloc_ordered_workqueue_lockdep_map(const char *fmt, unsigned int flags, + struct lockdep_map *lockdep_map, ...) +{ + struct workqueue_struct *wq; + va_list args; + + va_start(args, lockdep_map); + wq = alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | flags, + 1, lockdep_map, args); + va_end(args); + + return wq; +} +#endif + /** * alloc_ordered_workqueue - allocate an ordered workqueue * @fmt: printf format for the name of the workqueue -- cgit v1.2.3 From fb4dae4ca315fe48f1a8a452172f29196b2a7f3d Mon Sep 17 00:00:00 2001 From: Junfeng Guo Date: Thu, 25 Jul 2024 16:08:05 -0600 Subject: virtchnl: support raw packet in protocol header The patch extends existing virtchnl_proto_hdrs structure to allow VF to pass a pair of buffers as packet data and mask that describe a match pattern of a filter rule. Then the kernel PF driver is requested to parse the pair of buffer and figure out low level hardware metadata (ptype, profile, field vector.. ) to program the expected FDIR or RSS rules. Reviewed-by: Simon Horman Reviewed-by: Marcin Szycik Signed-off-by: Qi Zhang Signed-off-by: Junfeng Guo Signed-off-by: Ahmed Zaki Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 8e177b67e82f..4f78a65e33dc 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -1121,6 +1121,7 @@ enum virtchnl_vfr_states { }; #define VIRTCHNL_MAX_NUM_PROTO_HDRS 32 +#define VIRTCHNL_MAX_SIZE_RAW_PACKET 1024 #define PROTO_HDR_SHIFT 5 #define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT) #define PROTO_HDR_FIELD_MASK ((1UL << PROTO_HDR_SHIFT) - 1) @@ -1266,13 +1267,22 @@ struct virtchnl_proto_hdrs { u8 pad[3]; /** * specify where protocol header start from. + * must be 0 when sending a raw packet request. * 0 - from the outer layer * 1 - from the first inner layer * 2 - from the second inner layer * .... **/ int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */ - struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS]; + union { + struct virtchnl_proto_hdr + proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS]; + struct { + u16 pkt_len; + u8 spec[VIRTCHNL_MAX_SIZE_RAW_PACKET]; + u8 mask[VIRTCHNL_MAX_SIZE_RAW_PACKET]; + } raw; + }; }; VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs); -- cgit v1.2.3 From 623122ac1c4074791ea319df4676fb2875817fe4 Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Thu, 25 Jul 2024 16:08:09 -0600 Subject: iavf: add support for offloading tc U32 cls filters Add support for offloading cls U32 filters. Only "skbedit queue_mapping" and "drop" actions are supported. Also, only "ip" and "802_3" tc protocols are allowed. The PF must advertise the VIRTCHNL_VF_OFFLOAD_TC_U32 capability flag. Since the filters will be enabled via the FD stage at the PF, a new type of FDIR filters is added and the existing list and state machine are used. The new filters can be used to configure flow directors based on raw (binary) pattern in the rx packet. Examples: 0. # tc qdisc add dev enp175s0v0 ingress 1. Redirect UDP from src IP 192.168.2.1 to queue 12: # tc filter add dev protocol ip ingress u32 \ match u32 0x45000000 0xff000000 at 0 \ match u32 0x00110000 0x00ff0000 at 8 \ match u32 0xC0A80201 0xffffffff at 12 \ match u32 0x00000000 0x00000000 at 24 \ action skbedit queue_mapping 12 skip_sw 2. Drop all ICMP: # tc filter add dev protocol ip ingress u32 \ match u32 0x45000000 0xff000000 at 0 \ match u32 0x00010000 0x00ff0000 at 8 \ match u32 0x00000000 0x00000000 at 24 \ action drop skip_sw 3. Redirect ICMP traffic from MAC 3c:fd:fe:a5:47:e0 to queue 7 (note proto: 802_3): # tc filter add dev protocol 802_3 ingress u32 \ match u32 0x00003CFD 0x0000ffff at 4 \ match u32 0xFEA547E0 0xffffffff at 8 \ match u32 0x08004500 0xffffff00 at 12 \ match u32 0x00000001 0x000000ff at 20 \ match u32 0x0000 0x0000 at 40 \ action skbedit queue_mapping 7 skip_sw Notes on matches: 1 - All intermediate fields that are needed to parse the correct PTYPE must be provided (in e.g. 3: Ethernet Type 0x0800 in MAC, IP version and IP length: 0x45 and protocol: 0x01 (ICMP)). 2 - The last match must provide an offset that guarantees all required headers are accounted for, even if the last header is not matched. For example, in #2, the last match is 4 bytes at offset 24 starting from IP header, so the total is 14 (MAC) + 24 + 4 = 42, which is the sum of MAC+IP+ICMP headers. Reviewed-by: Sridhar Samudrala Reviewed-by: Marcin Szycik Signed-off-by: Ahmed Zaki Tested-by: Rafal Romanowski Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 4f78a65e33dc..f41395264dca 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -247,6 +247,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); /* used to negotiate communicating link speeds in Mbps */ #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED BIT(7) #define VIRTCHNL_VF_OFFLOAD_CRC BIT(10) +#define VIRTCHNL_VF_OFFLOAD_TC_U32 BIT(11) #define VIRTCHNL_VF_OFFLOAD_VLAN_V2 BIT(15) #define VIRTCHNL_VF_OFFLOAD_VLAN BIT(16) #define VIRTCHNL_VF_OFFLOAD_RX_POLLING BIT(17) -- cgit v1.2.3 From 55f325958ccc41eaea43eb4546d4dc77c1b5ef8a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 30 Jul 2024 01:16:04 -0400 Subject: bpf: switch maps to CLASS(fd, ...) Calling conventions for __bpf_map_get() would be more convenient if it left fpdut() on failure to callers. Makes for simpler logics in the callers. Among other things, the proof of memory safety no longer has to rely upon file->private_data never being ERR_PTR(...) for bpffs files. Original calling conventions made it impossible for the caller to tell whether __bpf_map_get() has returned ERR_PTR(-EINVAL) because it has found the file not be a bpf map one (in which case it would've done fdput()) or because it found that ERR_PTR(-EINVAL) in file->private_data of a bpf map file (in which case fdput() would _not_ have been done). Signed-off-by: Al Viro Reviewed-by: Christian Brauner Signed-off-by: Andrii Nakryiko --- include/linux/bpf.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b9425e410bcb..9f35df07e86d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2241,7 +2241,16 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); -struct bpf_map *__bpf_map_get(struct fd f); + +static inline struct bpf_map *__bpf_map_get(struct fd f) +{ + if (fd_empty(f)) + return ERR_PTR(-EBADF); + if (unlikely(fd_file(f)->f_op != &bpf_map_fops)) + return ERR_PTR(-EINVAL); + return fd_file(f)->private_data; +} + void bpf_map_inc(struct bpf_map *map); void bpf_map_inc_with_uref(struct bpf_map *map); struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref); -- cgit v1.2.3 From 03a979b74dc1ad5aeed8026a84d8771842cb1631 Mon Sep 17 00:00:00 2001 From: Yunke Cao Date: Wed, 14 Aug 2024 11:06:43 +0900 Subject: media: videobuf2-core: attach once if multiple planes share the same dbuf When multiple planes use the same dma buf, each plane will have its own dma buf attachment and mapping. It is a waste of IOVA space. This patch adds a dbuf_duplicated boolean in vb2_plane. If a plane's dbuf is the same as an existing plane, do not create another attachment and mapping. Signed-off-by: Yunke Cao Acked-by: Tomasz Figa Signed-off-by: Hans Verkuil --- include/media/videobuf2-core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index 955237ac503d..9b02aeba4108 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -154,6 +154,8 @@ struct vb2_mem_ops { * @mem_priv: private data with this plane. * @dbuf: dma_buf - shared buffer object. * @dbuf_mapped: flag to show whether dbuf is mapped or not + * @dbuf_duplicated: boolean to show whether dbuf is duplicated with a + * previous plane of the buffer. * @bytesused: number of bytes occupied by data in the plane (payload). * @length: size of this plane (NOT the payload) in bytes. The maximum * valid size is MAX_UINT - PAGE_SIZE. @@ -179,6 +181,7 @@ struct vb2_plane { void *mem_priv; struct dma_buf *dbuf; unsigned int dbuf_mapped; + bool dbuf_duplicated; unsigned int bytesused; unsigned int length; unsigned int min_length; -- cgit v1.2.3 From dfa5543193f303a7270ec7c725e656970faf7d57 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 12 Aug 2024 17:28:49 +0300 Subject: drm/edid: make drm_edid_block_valid() static drm_edid_block_valid() is no longer used outside of drm_edid.c. Make it static. Acked-by: Zhi Wang Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240812142849.1588006-2-jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/drm_edid.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index 6bdfa254a1c1..eaac5e665892 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -440,8 +440,6 @@ int drm_add_modes_noedid(struct drm_connector *connector, int hdisplay, int vdisplay); int drm_edid_header_is_valid(const void *edid); -bool drm_edid_block_valid(u8 *raw_edid, int block, bool print_bad_edid, - bool *edid_corrupt); bool drm_edid_is_valid(struct edid *edid); void drm_edid_get_monitor_name(const struct edid *edid, char *name, int buflen); -- cgit v1.2.3 From 7945b741d1fc071a621366c512a060ea08848955 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 Jul 2024 19:47:47 -0700 Subject: rcu-tasks: Remove RCU Tasks Rude asynchronous APIs The call_rcu_tasks_rude() and rcu_barrier_tasks_rude() APIs are currently unused. This commit therefore removes their definitions and boot-time self-tests. Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Signed-off-by: Neeraj Upadhyay --- include/linux/rcupdate.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 13f6f00aecf9..31e679c7110e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -37,7 +37,6 @@ /* Exported common interfaces */ void call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier_tasks(void); -void rcu_barrier_tasks_rude(void); void synchronize_rcu(void); struct rcu_gp_oldstate; @@ -202,7 +201,6 @@ do { \ } while (0) # ifdef CONFIG_TASKS_RUDE_RCU -void call_rcu_tasks_rude(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks_rude(void); # endif -- cgit v1.2.3 From fe91cf39db0939ac8d523b5a1c31840f7cbe205c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 1 Aug 2024 17:34:25 -0700 Subject: rcu/tasks: Add detailed grace-period and barrier diagnostics This commit adds rcu_tasks_torture_stats_print(), rcu_tasks_trace_torture_stats_print(), and rcu_tasks_rude_torture_stats_print() functions that provide detailed diagnostics on grace-period, callback, and barrier state. Signed-off-by: "Paul E. McKenney" Signed-off-by: Neeraj Upadhyay --- include/linux/rcupdate.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 31e679c7110e..17463e95b6ef 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -164,6 +164,7 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { } } while (0) void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks(void); +void rcu_tasks_torture_stats_print(char *tt, char *tf); # else # define rcu_tasks_classic_qs(t, preempt) do { } while (0) # define call_rcu_tasks call_rcu @@ -190,6 +191,7 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t); rcu_tasks_trace_qs_blkd(t); \ } \ } while (0) +void rcu_tasks_trace_torture_stats_print(char *tt, char *tf); # else # define rcu_tasks_trace_qs(t) do { } while (0) # endif @@ -202,6 +204,7 @@ do { \ # ifdef CONFIG_TASKS_RUDE_RCU void synchronize_rcu_tasks_rude(void); +void rcu_tasks_rude_torture_stats_print(char *tt, char *tf); # endif #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false) -- cgit v1.2.3 From ac79beb913dc40b1a49b628e31afdfeb20194ab6 Mon Sep 17 00:00:00 2001 From: Paul Elder Date: Thu, 8 Aug 2024 22:41:05 +0200 Subject: media: rkisp1: Add support for the companding block Add support to the rkisp1 driver for the companding block that exists on the i.MX8MP version of the ISP. This requires usage of the new extensible parameters format, and showcases how the format allows for extensions without breaking backward compatibility. Signed-off-by: Paul Elder Reviewed-by: Jacopo Mondi Reviewed-by: Paul Elder Signed-off-by: Jacopo Mondi Tested-by: Kieran Bingham Acked-by: Sakari Ailus Signed-off-by: Laurent Pinchart --- include/uapi/linux/rkisp1-config.h | 89 +++++++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h index 9767bb19c72d..430daceafac7 100644 --- a/include/uapi/linux/rkisp1-config.h +++ b/include/uapi/linux/rkisp1-config.h @@ -164,6 +164,11 @@ #define RKISP1_CIF_ISP_DPF_MAX_NLF_COEFFS 17 #define RKISP1_CIF_ISP_DPF_MAX_SPATIAL_COEFFS 6 +/* + * Compand + */ +#define RKISP1_CIF_ISP_COMPAND_NUM_POINTS 64 + /* * Measurement types */ @@ -851,6 +856,39 @@ struct rkisp1_params_cfg { struct rkisp1_cif_isp_isp_other_cfg others; }; +/** + * struct rkisp1_cif_isp_compand_bls_config - Rockchip ISP1 Companding parameters (BLS) + * @r: Fixed subtraction value for Bayer pattern R + * @gr: Fixed subtraction value for Bayer pattern Gr + * @gb: Fixed subtraction value for Bayer pattern Gb + * @b: Fixed subtraction value for Bayer pattern B + * + * The values will be subtracted from the sensor values. Note that unlike the + * dedicated BLS block, the BLS values in the compander are 20-bit unsigned. + */ +struct rkisp1_cif_isp_compand_bls_config { + __u32 r; + __u32 gr; + __u32 gb; + __u32 b; +}; + +/** + * struct rkisp1_cif_isp_compand_curve_config - Rockchip ISP1 Companding + * parameters (expand and compression curves) + * @px: Compand curve x-values. Each value stores the distance from the + * previous x-value, expressed as log2 of the distance on 5 bits. + * @x: Compand curve x-values. The functionality of these parameters are + * unknown due to do a lack of hardware documentation, but these are left + * here for future compatibility purposes. + * @y: Compand curve y-values + */ +struct rkisp1_cif_isp_compand_curve_config { + __u8 px[RKISP1_CIF_ISP_COMPAND_NUM_POINTS]; + __u32 x[RKISP1_CIF_ISP_COMPAND_NUM_POINTS]; + __u32 y[RKISP1_CIF_ISP_COMPAND_NUM_POINTS]; +}; + /*---------- PART2: Measurement Statistics ------------*/ /** @@ -1018,6 +1056,9 @@ struct rkisp1_stat_buffer { * @RKISP1_EXT_PARAMS_BLOCK_TYPE_HST_MEAS: Histogram statistics * @RKISP1_EXT_PARAMS_BLOCK_TYPE_AEC_MEAS: Auto exposure statistics * @RKISP1_EXT_PARAMS_BLOCK_TYPE_AFC_MEAS: Auto-focus statistics + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_BLS: BLS in the compand block + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_EXPAND: Companding expand curve + * @RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_COMPRESS: Companding compress curve */ enum rkisp1_ext_params_block_type { RKISP1_EXT_PARAMS_BLOCK_TYPE_BLS, @@ -1037,6 +1078,9 @@ enum rkisp1_ext_params_block_type { RKISP1_EXT_PARAMS_BLOCK_TYPE_HST_MEAS, RKISP1_EXT_PARAMS_BLOCK_TYPE_AEC_MEAS, RKISP1_EXT_PARAMS_BLOCK_TYPE_AFC_MEAS, + RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_BLS, + RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_EXPAND, + RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_COMPRESS, }; #define RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE (1U << 0) @@ -1380,6 +1424,46 @@ struct rkisp1_ext_params_afc_config { struct rkisp1_cif_isp_afc_config config; } __attribute__((aligned(8))); +/** + * struct rkisp1_ext_params_compand_bls_config - RkISP1 extensible params + * Compand BLS config + * + * RkISP1 extensible parameters Companding configuration block (black level + * subtraction). Identified by :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_BLS`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Companding BLS configuration, see + * :c:type:`rkisp1_cif_isp_compand_bls_config` + */ +struct rkisp1_ext_params_compand_bls_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_compand_bls_config config; +} __attribute__((aligned(8))); + +/** + * struct rkisp1_ext_params_compand_curve_config - RkISP1 extensible params + * Compand curve config + * + * RkISP1 extensible parameters Companding configuration block (expand and + * compression curves). Identified by + * :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_EXPAND` or + * :c:type:`RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_COMPRESS`. + * + * @header: The RkISP1 extensible parameters header, see + * :c:type:`rkisp1_ext_params_block_header` + * @config: Companding curve configuration, see + * :c:type:`rkisp1_cif_isp_compand_curve_config` + */ +struct rkisp1_ext_params_compand_curve_config { + struct rkisp1_ext_params_block_header header; + struct rkisp1_cif_isp_compand_curve_config config; +} __attribute__((aligned(8))); + +/* + * The rkisp1_ext_params_compand_curve_config structure is counted twice as it + * is used for both the COMPAND_EXPAND and COMPAND_COMPRESS block types. + */ #define RKISP1_EXT_PARAMS_MAX_SIZE \ (sizeof(struct rkisp1_ext_params_bls_config) +\ sizeof(struct rkisp1_ext_params_dpcc_config) +\ @@ -1397,7 +1481,10 @@ struct rkisp1_ext_params_afc_config { sizeof(struct rkisp1_ext_params_awb_meas_config) +\ sizeof(struct rkisp1_ext_params_hst_config) +\ sizeof(struct rkisp1_ext_params_aec_config) +\ - sizeof(struct rkisp1_ext_params_afc_config)) + sizeof(struct rkisp1_ext_params_afc_config) +\ + sizeof(struct rkisp1_ext_params_compand_bls_config) +\ + sizeof(struct rkisp1_ext_params_compand_curve_config) +\ + sizeof(struct rkisp1_ext_params_compand_curve_config)) /** * enum rksip1_ext_param_buffer_version - RkISP1 extensible parameters version -- cgit v1.2.3 From 66155de93bcf4f2967e602a4b3bf7ebe58f34b11 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 9 Aug 2024 12:02:58 -0700 Subject: KVM: x86: Disallow read-only memslots for SEV-ES and SEV-SNP (and TDX) Disallow read-only memslots for SEV-{ES,SNP} VM types, as KVM can't directly emulate instructions for ES/SNP, and instead the guest must explicitly request emulation. Unless the guest explicitly requests emulation without accessing memory, ES/SNP relies on KVM creating an MMIO SPTE, with the subsequent #NPF being reflected into the guest as a #VC. But for read-only memslots, KVM deliberately doesn't create MMIO SPTEs, because except for ES/SNP, doing so requires setting reserved bits in the SPTE, i.e. the SPTE can't be readable while also generating a #VC on writes. Because KVM never creates MMIO SPTEs and jumps directly to emulation, the guest never gets a #VC. And since KVM simply resumes the guest if ES/SNP guests trigger emulation, KVM effectively puts the vCPU into an infinite #NPF loop if the vCPU attempts to write read-only memory. Disallow read-only memory for all VMs with protected state, i.e. for upcoming TDX VMs as well as ES/SNP VMs. For TDX, it's actually possible to support read-only memory, as TDX uses EPT Violation #VE to reflect the fault into the guest, e.g. KVM could configure read-only SPTEs with RX protections and SUPPRESS_VE=0. But there is no strong use case for supporting read-only memslots on TDX, e.g. the main historical usage is to emulate option ROMs, but TDX disallows executing from shared memory. And if someone comes along with a legitimate, strong use case, the restriction can always be lifted for TDX. Don't bother trying to retroactively apply the restriction to SEV-ES VMs that are created as type KVM_X86_DEFAULT_VM. Read-only memslots can't possibly work for SEV-ES, i.e. disallowing such memslots is really just means reporting an error to userspace instead of silently hanging vCPUs. Trying to deal with the ordering between KVM_SEV_INIT and memslot creation isn't worth the marginal benefit it would provide userspace. Fixes: 26c44aa9e076 ("KVM: SEV: define VM types for SEV and SEV-ES") Fixes: 1dfe571c12cf ("KVM: SEV: Add initial SEV-SNP support") Cc: Peter Gonda Cc: Michael Roth Cc: Vishal Annapurve Cc: Ackerly Tng Signed-off-by: Sean Christopherson Message-ID: <20240809190319.1710470-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 79a6b1a63027..b23c6d48392f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -715,6 +715,13 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm) } #endif +#ifndef kvm_arch_has_readonly_mem +static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) +{ + return IS_ENABLED(CONFIG_HAVE_KVM_READONLY_MEM); +} +#endif + struct kvm_memslots { u64 generation; atomic_long_t last_used_slot; -- cgit v1.2.3 From 44f65d900698278a8451988abe0d5ca37fd46882 Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Tue, 6 Aug 2024 21:49:27 +0000 Subject: binfmt_elf: mseal address zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In load_elf_binary as part of the execve(), when the current task’s personality has MMAP_PAGE_ZERO set, the kernel allocates one page at address 0. According to the comment: /* Why this, you ask??? Well SVr4 maps page 0 as read-only, and some applications "depend" upon this behavior. Since we do not have the power to recompile these, we emulate the SVr4 behavior. Sigh. */ At one point, Linus suggested removing this [1]. Code search in debian didn't see much use of MMAP_PAGE_ZERO [2], it exists in util and test (rr). Sealing this is probably safe, the comment doesn't say the app ever wanting to change the mapping to rwx. Sealing also ensures that never happens. If there is a complaint, we can make this configurable. Link: https://lore.kernel.org/lkml/CAHk-=whVa=nm_GW=NVfPHqcxDbWt4JjjK1YWb0cLjO4ZSGyiDA@mail.gmail.com/ [1] Link: https://codesearch.debian.net/search?q=MMAP_PAGE_ZERO&literal=1&perpkg=1&page=1 [2] Signed-off-by: Jeff Xu Link: https://lore.kernel.org/r/20240806214931.2198172-2-jeffxu@google.com Signed-off-by: Kees Cook --- include/linux/mm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c4b238a20b76..a178c15812eb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4201,4 +4201,14 @@ void vma_pgtable_walk_end(struct vm_area_struct *vma); int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size); +#ifdef CONFIG_64BIT +int do_mseal(unsigned long start, size_t len_in, unsigned long flags); +#else +static inline int do_mseal(unsigned long start, size_t len_in, unsigned long flags) +{ + /* noop on 32 bit */ + return 0; +} +#endif + #endif /* _LINUX_MM_H */ -- cgit v1.2.3 From 1c0e5881691a787a9399a99bff4d56ead6e75e91 Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Wed, 14 Aug 2024 10:31:13 +0200 Subject: KVM: SEV: uapi: fix typo in SEV_RET_INVALID_CONFIG "INVALID" is misspelt in "SEV_RET_INAVLID_CONFIG". Since this is part of the UAPI, keep the current definition and add a new one with the fix. Fix-suggested-by: Marc Zyngier Signed-off-by: Amit Shah Message-ID: <20240814083113.21622-1-amit@kernel.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/psp-sev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index 2289b7c76c59..832c15d9155b 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h @@ -51,6 +51,7 @@ typedef enum { SEV_RET_INVALID_PLATFORM_STATE, SEV_RET_INVALID_GUEST_STATE, SEV_RET_INAVLID_CONFIG, + SEV_RET_INVALID_CONFIG = SEV_RET_INAVLID_CONFIG, SEV_RET_INVALID_LEN, SEV_RET_ALREADY_OWNED, SEV_RET_INVALID_CERTIFICATE, -- cgit v1.2.3 From 71833e79a42178d8a50b5081c98c78ace9325628 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 14 Aug 2024 13:16:49 +0100 Subject: i2c: Use IS_REACHABLE() for substituting empty ACPI functions Replace IS_ENABLED() with IS_REACHABLE() to substitute empty stubs for: i2c_acpi_get_i2c_resource() i2c_acpi_client_count() i2c_acpi_find_bus_speed() i2c_acpi_new_device_by_fwnode() i2c_adapter *i2c_acpi_find_adapter_by_handle() i2c_acpi_waive_d0_probe() commit f17c06c6608a ("i2c: Fix conditional for substituting empty ACPI functions") partially fixed this conditional to depend on CONFIG_I2C, but used IS_ENABLED(), which is wrong since CONFIG_I2C is tristate. CONFIG_ACPI is boolean but let's also change it to use IS_REACHABLE() to future-proof it against becoming tristate. Somehow despite testing various combinations of CONFIG_I2C and CONFIG_ACPI we missed the combination CONFIG_I2C=m, CONFIG_ACPI=y. Signed-off-by: Richard Fitzgerald Fixes: f17c06c6608a ("i2c: Fix conditional for substituting empty ACPI functions") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408141333.gYnaitcV-lkp@intel.com/ Reviewed-by: Takashi Iwai Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 7eedd0c662da..377def497298 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -1066,7 +1066,7 @@ static inline int of_i2c_get_board_info(struct device *dev, struct acpi_resource; struct acpi_resource_i2c_serialbus; -#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_I2C) +#if IS_REACHABLE(CONFIG_ACPI) && IS_REACHABLE(CONFIG_I2C) bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c); int i2c_acpi_client_count(struct acpi_device *adev); -- cgit v1.2.3 From ac9d45544cd571decca395715d0b0a3b617d02f4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 1 Jul 2024 13:33:58 -0700 Subject: locking/csd_lock: Provide an indication of ongoing CSD-lock stall If a CSD-lock stall goes on long enough, it will cause an RCU CPU stall warning. This additional warning provides much additional console-log traffic and little additional information. Therefore, provide a new csd_lock_is_stuck() function that returns true if there is an ongoing CSD-lock stall. This function will be used by the RCU CPU stall warnings to provide a one-line indication of the stall when this function returns true. [ neeraj.upadhyay: Apply Rik van Riel feedback. ] [ neeraj.upadhyay: Apply kernel test robot feedback. ] Signed-off-by: Paul E. McKenney Cc: Imran Khan Cc: Ingo Molnar Cc: Leonardo Bras Cc: "Peter Zijlstra (Intel)" Cc: Rik van Riel Signed-off-by: Neeraj Upadhyay --- include/linux/smp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index fcd61dfe2af3..3871bd32018f 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -294,4 +294,10 @@ int smpcfd_prepare_cpu(unsigned int cpu); int smpcfd_dead_cpu(unsigned int cpu); int smpcfd_dying_cpu(unsigned int cpu); +#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG +bool csd_lock_is_stuck(void); +#else +static inline bool csd_lock_is_stuck(void) { return false; } +#endif + #endif /* __LINUX_SMP_H */ -- cgit v1.2.3 From 5115bcaf68d87e7a25fd238332b402a24bc534d0 Mon Sep 17 00:00:00 2001 From: Ajit Pandey Date: Tue, 11 Jun 2024 19:07:46 +0530 Subject: dt-bindings: clock: qcom: add DISPCC clocks on SM4450 Add device tree bindings for the display clock controller on Qualcomm SM4450 platform. Signed-off-by: Ajit Pandey Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240611133752.2192401-3-quic_ajipan@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sm4450-dispcc.h | 51 ++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sm4450-dispcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sm4450-dispcc.h b/include/dt-bindings/clock/qcom,sm4450-dispcc.h new file mode 100644 index 000000000000..ca6f2ef90157 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm4450-dispcc.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_DISP_CC_SM4450_H +#define _DT_BINDINGS_CLK_QCOM_DISP_CC_SM4450_H + +/* DISP_CC clocks */ +#define DISP_CC_MDSS_AHB1_CLK 0 +#define DISP_CC_MDSS_AHB_CLK 1 +#define DISP_CC_MDSS_AHB_CLK_SRC 2 +#define DISP_CC_MDSS_BYTE0_CLK 3 +#define DISP_CC_MDSS_BYTE0_CLK_SRC 4 +#define DISP_CC_MDSS_BYTE0_DIV_CLK_SRC 5 +#define DISP_CC_MDSS_BYTE0_INTF_CLK 6 +#define DISP_CC_MDSS_ESC0_CLK 7 +#define DISP_CC_MDSS_ESC0_CLK_SRC 8 +#define DISP_CC_MDSS_MDP1_CLK 9 +#define DISP_CC_MDSS_MDP_CLK 10 +#define DISP_CC_MDSS_MDP_CLK_SRC 11 +#define DISP_CC_MDSS_MDP_LUT1_CLK 12 +#define DISP_CC_MDSS_MDP_LUT_CLK 13 +#define DISP_CC_MDSS_NON_GDSC_AHB_CLK 14 +#define DISP_CC_MDSS_PCLK0_CLK 15 +#define DISP_CC_MDSS_PCLK0_CLK_SRC 16 +#define DISP_CC_MDSS_ROT1_CLK 17 +#define DISP_CC_MDSS_ROT_CLK 18 +#define DISP_CC_MDSS_ROT_CLK_SRC 19 +#define DISP_CC_MDSS_RSCC_AHB_CLK 20 +#define DISP_CC_MDSS_RSCC_VSYNC_CLK 21 +#define DISP_CC_MDSS_VSYNC1_CLK 22 +#define DISP_CC_MDSS_VSYNC_CLK 23 +#define DISP_CC_MDSS_VSYNC_CLK_SRC 24 +#define DISP_CC_PLL0 25 +#define DISP_CC_PLL1 26 +#define DISP_CC_SLEEP_CLK 27 +#define DISP_CC_SLEEP_CLK_SRC 28 +#define DISP_CC_XO_CLK 29 +#define DISP_CC_XO_CLK_SRC 30 + +/* DISP_CC power domains */ +#define DISP_CC_MDSS_CORE_GDSC 0 +#define DISP_CC_MDSS_CORE_INT2_GDSC 1 + +/* DISP_CC resets */ +#define DISP_CC_MDSS_CORE_BCR 0 +#define DISP_CC_MDSS_CORE_INT2_BCR 1 +#define DISP_CC_MDSS_RSCC_BCR 2 + +#endif -- cgit v1.2.3 From 9bf45e4f317f75db234ef6af1f316cf4676e8863 Mon Sep 17 00:00:00 2001 From: Ajit Pandey Date: Tue, 11 Jun 2024 19:07:48 +0530 Subject: dt-bindings: clock: qcom: add CAMCC clocks on SM4450 Add device tree bindings for the camera clock controller on Qualcomm SM4450 platform. Signed-off-by: Ajit Pandey Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240611133752.2192401-5-quic_ajipan@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sm4450-camcc.h | 106 ++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sm4450-camcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sm4450-camcc.h b/include/dt-bindings/clock/qcom,sm4450-camcc.h new file mode 100644 index 000000000000..bf077951bf1c --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm4450-camcc.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_CAM_CC_SM4450_H +#define _DT_BINDINGS_CLK_QCOM_CAM_CC_SM4450_H + +/* CAM_CC clocks */ +#define CAM_CC_BPS_AHB_CLK 0 +#define CAM_CC_BPS_AREG_CLK 1 +#define CAM_CC_BPS_CLK 2 +#define CAM_CC_BPS_CLK_SRC 3 +#define CAM_CC_CAMNOC_ATB_CLK 4 +#define CAM_CC_CAMNOC_AXI_CLK 5 +#define CAM_CC_CAMNOC_AXI_CLK_SRC 6 +#define CAM_CC_CAMNOC_AXI_HF_CLK 7 +#define CAM_CC_CAMNOC_AXI_SF_CLK 8 +#define CAM_CC_CCI_0_CLK 9 +#define CAM_CC_CCI_0_CLK_SRC 10 +#define CAM_CC_CCI_1_CLK 11 +#define CAM_CC_CCI_1_CLK_SRC 12 +#define CAM_CC_CORE_AHB_CLK 13 +#define CAM_CC_CPAS_AHB_CLK 14 +#define CAM_CC_CPHY_RX_CLK_SRC 15 +#define CAM_CC_CRE_AHB_CLK 16 +#define CAM_CC_CRE_CLK 17 +#define CAM_CC_CRE_CLK_SRC 18 +#define CAM_CC_CSI0PHYTIMER_CLK 19 +#define CAM_CC_CSI0PHYTIMER_CLK_SRC 20 +#define CAM_CC_CSI1PHYTIMER_CLK 21 +#define CAM_CC_CSI1PHYTIMER_CLK_SRC 22 +#define CAM_CC_CSI2PHYTIMER_CLK 23 +#define CAM_CC_CSI2PHYTIMER_CLK_SRC 24 +#define CAM_CC_CSIPHY0_CLK 25 +#define CAM_CC_CSIPHY1_CLK 26 +#define CAM_CC_CSIPHY2_CLK 27 +#define CAM_CC_FAST_AHB_CLK_SRC 28 +#define CAM_CC_ICP_ATB_CLK 29 +#define CAM_CC_ICP_CLK 30 +#define CAM_CC_ICP_CLK_SRC 31 +#define CAM_CC_ICP_CTI_CLK 32 +#define CAM_CC_ICP_TS_CLK 33 +#define CAM_CC_MCLK0_CLK 34 +#define CAM_CC_MCLK0_CLK_SRC 35 +#define CAM_CC_MCLK1_CLK 36 +#define CAM_CC_MCLK1_CLK_SRC 37 +#define CAM_CC_MCLK2_CLK 38 +#define CAM_CC_MCLK2_CLK_SRC 39 +#define CAM_CC_MCLK3_CLK 40 +#define CAM_CC_MCLK3_CLK_SRC 41 +#define CAM_CC_OPE_0_AHB_CLK 42 +#define CAM_CC_OPE_0_AREG_CLK 43 +#define CAM_CC_OPE_0_CLK 44 +#define CAM_CC_OPE_0_CLK_SRC 45 +#define CAM_CC_PLL0 46 +#define CAM_CC_PLL0_OUT_EVEN 47 +#define CAM_CC_PLL0_OUT_ODD 48 +#define CAM_CC_PLL1 49 +#define CAM_CC_PLL1_OUT_EVEN 50 +#define CAM_CC_PLL2 51 +#define CAM_CC_PLL2_OUT_EVEN 52 +#define CAM_CC_PLL3 53 +#define CAM_CC_PLL3_OUT_EVEN 54 +#define CAM_CC_PLL4 55 +#define CAM_CC_PLL4_OUT_EVEN 56 +#define CAM_CC_SLOW_AHB_CLK_SRC 57 +#define CAM_CC_SOC_AHB_CLK 58 +#define CAM_CC_SYS_TMR_CLK 59 +#define CAM_CC_TFE_0_AHB_CLK 60 +#define CAM_CC_TFE_0_CLK 61 +#define CAM_CC_TFE_0_CLK_SRC 62 +#define CAM_CC_TFE_0_CPHY_RX_CLK 63 +#define CAM_CC_TFE_0_CSID_CLK 64 +#define CAM_CC_TFE_0_CSID_CLK_SRC 65 +#define CAM_CC_TFE_1_AHB_CLK 66 +#define CAM_CC_TFE_1_CLK 67 +#define CAM_CC_TFE_1_CLK_SRC 68 +#define CAM_CC_TFE_1_CPHY_RX_CLK 69 +#define CAM_CC_TFE_1_CSID_CLK 70 +#define CAM_CC_TFE_1_CSID_CLK_SRC 71 + +/* CAM_CC power domains */ +#define CAM_CC_CAMSS_TOP_GDSC 0 + +/* CAM_CC resets */ +#define CAM_CC_BPS_BCR 0 +#define CAM_CC_CAMNOC_BCR 1 +#define CAM_CC_CAMSS_TOP_BCR 2 +#define CAM_CC_CCI_0_BCR 3 +#define CAM_CC_CCI_1_BCR 4 +#define CAM_CC_CPAS_BCR 5 +#define CAM_CC_CRE_BCR 6 +#define CAM_CC_CSI0PHY_BCR 7 +#define CAM_CC_CSI1PHY_BCR 8 +#define CAM_CC_CSI2PHY_BCR 9 +#define CAM_CC_ICP_BCR 10 +#define CAM_CC_MCLK0_BCR 11 +#define CAM_CC_MCLK1_BCR 12 +#define CAM_CC_MCLK2_BCR 13 +#define CAM_CC_MCLK3_BCR 14 +#define CAM_CC_OPE_0_BCR 15 +#define CAM_CC_TFE_0_BCR 16 +#define CAM_CC_TFE_1_BCR 17 + +#endif -- cgit v1.2.3 From 47bad234eed970d0d1b03204aab1b3644709ee0b Mon Sep 17 00:00:00 2001 From: Ajit Pandey Date: Tue, 11 Jun 2024 19:07:50 +0530 Subject: dt-bindings: clock: qcom: add GPUCC clocks on SM4450 Add device tree bindings for the graphics clock controller on Qualcomm SM4450 platform. Signed-off-by: Ajit Pandey Reviewed-by: Krzysztof Kozlowski Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240611133752.2192401-7-quic_ajipan@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sm4450-gpucc.h | 62 +++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sm4450-gpucc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sm4450-gpucc.h b/include/dt-bindings/clock/qcom,sm4450-gpucc.h new file mode 100644 index 000000000000..304f83e5f645 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm4450-gpucc.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_SM4450_H +#define _DT_BINDINGS_CLK_QCOM_GPU_CC_SM4450_H + +/* GPU_CC clocks */ +#define GPU_CC_AHB_CLK 0 +#define GPU_CC_CB_CLK 1 +#define GPU_CC_CRC_AHB_CLK 2 +#define GPU_CC_CX_FF_CLK 3 +#define GPU_CC_CX_GFX3D_CLK 4 +#define GPU_CC_CX_GFX3D_SLV_CLK 5 +#define GPU_CC_CX_GMU_CLK 6 +#define GPU_CC_CX_SNOC_DVM_CLK 7 +#define GPU_CC_CXO_AON_CLK 8 +#define GPU_CC_CXO_CLK 9 +#define GPU_CC_DEMET_CLK 10 +#define GPU_CC_DEMET_DIV_CLK_SRC 11 +#define GPU_CC_FF_CLK_SRC 12 +#define GPU_CC_FREQ_MEASURE_CLK 13 +#define GPU_CC_GMU_CLK_SRC 14 +#define GPU_CC_GX_CXO_CLK 15 +#define GPU_CC_GX_FF_CLK 16 +#define GPU_CC_GX_GFX3D_CLK 17 +#define GPU_CC_GX_GFX3D_CLK_SRC 18 +#define GPU_CC_GX_GFX3D_RDVM_CLK 19 +#define GPU_CC_GX_GMU_CLK 20 +#define GPU_CC_GX_VSENSE_CLK 21 +#define GPU_CC_HUB_AHB_DIV_CLK_SRC 22 +#define GPU_CC_HUB_AON_CLK 23 +#define GPU_CC_HUB_CLK_SRC 24 +#define GPU_CC_HUB_CX_INT_CLK 25 +#define GPU_CC_HUB_CX_INT_DIV_CLK_SRC 26 +#define GPU_CC_MEMNOC_GFX_CLK 27 +#define GPU_CC_MND1X_0_GFX3D_CLK 28 +#define GPU_CC_PLL0 29 +#define GPU_CC_PLL1 30 +#define GPU_CC_SLEEP_CLK 31 +#define GPU_CC_XO_CLK_SRC 32 +#define GPU_CC_XO_DIV_CLK_SRC 33 + +/* GPU_CC power domains */ +#define GPU_CC_CX_GDSC 0 +#define GPU_CC_GX_GDSC 1 + +/* GPU_CC resets */ +#define GPU_CC_ACD_BCR 0 +#define GPU_CC_CB_BCR 1 +#define GPU_CC_CX_BCR 2 +#define GPU_CC_FAST_HUB_BCR 3 +#define GPU_CC_FF_BCR 4 +#define GPU_CC_GFX3D_AON_BCR 5 +#define GPU_CC_GMU_BCR 6 +#define GPU_CC_GX_BCR 7 +#define GPU_CC_XO_BCR 8 +#define GPU_CC_GX_ACD_IROOT_BCR 9 +#define GPU_CC_RBCPR_BCR 10 + +#endif -- cgit v1.2.3 From 89817522b1a58970ac1247f65af009b046344fb0 Mon Sep 17 00:00:00 2001 From: Tengfei Fan Date: Mon, 5 Aug 2024 19:08:03 +0200 Subject: dt-bindings: mailbox: qcom-ipcc: Add GPDSP0 and GPDSP1 clients Add GPDSP0 and GPDSP1 clients for SA8775p platform. Signed-off-by: Tengfei Fan Acked-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20240805-topic-sa8775p-iot-remoteproc-v4-2-86affdc72c04@linaro.org Signed-off-by: Bjorn Andersson --- include/dt-bindings/mailbox/qcom-ipcc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/mailbox/qcom-ipcc.h b/include/dt-bindings/mailbox/qcom-ipcc.h index fbfa3febc66d..fd85a79381b3 100644 --- a/include/dt-bindings/mailbox/qcom-ipcc.h +++ b/include/dt-bindings/mailbox/qcom-ipcc.h @@ -33,5 +33,7 @@ #define IPCC_CLIENT_NSP1 18 #define IPCC_CLIENT_TME 23 #define IPCC_CLIENT_WPSS 24 +#define IPCC_CLIENT_GPDSP0 31 +#define IPCC_CLIENT_GPDSP1 32 #endif -- cgit v1.2.3 From 924fc22c282edbf93869b150d9e1b47e0b10485e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 31 Jul 2024 09:44:57 +0200 Subject: firmware: qcom: qseecom: remove unused functions qseecom_scm_dev(), qseecom_dma_alloc() and qseecom_dma_free() are no longer used following the conversion to using tzmem. Remove them. Fixes: 6612103ec35a ("firmware: qcom: qseecom: convert to using the TZ allocator") Reviewed-by: Andrew Halaney Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20240731-tzmem-efivars-fix-v2-2-f0e84071ec07@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/firmware/qcom/qcom_qseecom.h | 45 ------------------------------ 1 file changed, 45 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h index 1dc5b3b50aa9..3387897bf368 100644 --- a/include/linux/firmware/qcom/qcom_qseecom.h +++ b/include/linux/firmware/qcom/qcom_qseecom.h @@ -25,51 +25,6 @@ struct qseecom_client { u32 app_id; }; -/** - * qseecom_scm_dev() - Get the SCM device associated with the QSEECOM client. - * @client: The QSEECOM client device. - * - * Returns the SCM device under which the provided QSEECOM client device - * operates. This function is intended to be used for DMA allocations. - */ -static inline struct device *qseecom_scm_dev(struct qseecom_client *client) -{ - return client->aux_dev.dev.parent->parent; -} - -/** - * qseecom_dma_alloc() - Allocate DMA memory for a QSEECOM client. - * @client: The QSEECOM client to allocate the memory for. - * @size: The number of bytes to allocate. - * @dma_handle: Pointer to where the DMA address should be stored. - * @gfp: Allocation flags. - * - * Wrapper function for dma_alloc_coherent(), allocating DMA memory usable for - * TZ/QSEECOM communication. Refer to dma_alloc_coherent() for details. - */ -static inline void *qseecom_dma_alloc(struct qseecom_client *client, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - return dma_alloc_coherent(qseecom_scm_dev(client), size, dma_handle, gfp); -} - -/** - * dma_free_coherent() - Free QSEECOM DMA memory. - * @client: The QSEECOM client for which the memory has been allocated. - * @size: The number of bytes allocated. - * @cpu_addr: Virtual memory address to free. - * @dma_handle: DMA memory address to free. - * - * Wrapper function for dma_free_coherent(), freeing memory previously - * allocated with qseecom_dma_alloc(). Refer to dma_free_coherent() for - * details. - */ -static inline void qseecom_dma_free(struct qseecom_client *client, size_t size, - void *cpu_addr, dma_addr_t dma_handle) -{ - return dma_free_coherent(qseecom_scm_dev(client), size, cpu_addr, dma_handle); -} - /** * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app. * @client: The QSEECOM client associated with the target app. -- cgit v1.2.3 From 6b34e75c48bb913f3431e66353cad9782e7225c7 Mon Sep 17 00:00:00 2001 From: Jingyi Wang Date: Wed, 14 Aug 2024 15:28:05 +0800 Subject: dt-bindings: arm: qcom,ids: add SoC ID for QCS8275/QCS8300 Add the ID for Qualcomm QCS8275/QCS8300 SoC. Signed-off-by: Jingyi Wang Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240814072806.4107079-3-quic_jingyw@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index d6c9e9472121..a213f6244c5d 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -274,6 +274,8 @@ #define QCOM_ID_QCM8550 604 #define QCOM_ID_IPQ5300 624 #define QCOM_ID_IPQ5321 650 +#define QCOM_ID_QCS8300 674 +#define QCOM_ID_QCS8275 675 /* * The board type and revision information, used by Qualcomm bootloaders and -- cgit v1.2.3 From 216203bdc2280d8fc5baf60707eee2051de1426e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Aug 2024 16:15:02 -0600 Subject: UAPI: net/sched: Use __struct_group() in flex struct tc_u32_sel Use the `__struct_group()` helper to create a new tagged `struct tc_u32_sel_hdr`. This structure groups together all the members of the flexible `struct tc_u32_sel` except the flexible array. As a result, the array is effectively separated from the rest of the members without modifying the memory layout of the flexible structure. This new tagged struct will be used to fix problematic declarations of middle-flex-arrays in composite structs[1]. [1] https://git.kernel.org/linus/d88cabfd9abc Signed-off-by: Gustavo A. R. Silva Link: https://patch.msgid.link/e59fe833564ddc5b2cc83056a4c504be887d6193.1723586870.git.gustavoars@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_cls.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index d36d9cdf0c00..2c32080416b5 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -246,16 +246,19 @@ struct tc_u32_key { }; struct tc_u32_sel { - unsigned char flags; - unsigned char offshift; - unsigned char nkeys; - - __be16 offmask; - __u16 off; - short offoff; - - short hoff; - __be32 hmask; + /* New members MUST be added within the __struct_group() macro below. */ + __struct_group(tc_u32_sel_hdr, hdr, /* no attrs */, + unsigned char flags; + unsigned char offshift; + unsigned char nkeys; + + __be16 offmask; + __u16 off; + short offoff; + + short hoff; + __be32 hmask; + ); struct tc_u32_key keys[]; }; -- cgit v1.2.3 From 69139d2919dd4aa9a553c8245e7c63e82613e3fc Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 11 Aug 2024 19:21:53 -0700 Subject: vsock: fix recursive ->recvmsg calls After a vsock socket has been added to a BPF sockmap, its prot->recvmsg has been replaced with vsock_bpf_recvmsg(). Thus the following recursiion could happen: vsock_bpf_recvmsg() -> __vsock_recvmsg() -> vsock_connectible_recvmsg() -> prot->recvmsg() -> vsock_bpf_recvmsg() again We need to fix it by calling the original ->recvmsg() without any BPF sockmap logic in __vsock_recvmsg(). Fixes: 634f1a7110b4 ("vsock: support sockmap") Reported-by: syzbot+bdb4bd87b5e22058e2a4@syzkaller.appspotmail.com Tested-by: syzbot+bdb4bd87b5e22058e2a4@syzkaller.appspotmail.com Cc: Bobby Eshleman Cc: Michael S. Tsirkin Cc: Stefano Garzarella Signed-off-by: Cong Wang Acked-by: Michael S. Tsirkin Link: https://patch.msgid.link/20240812022153.86512-1-xiyou.wangcong@gmail.com Signed-off-by: Paolo Abeni --- include/net/af_vsock.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 535701efc1e5..24d970f7a4fa 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -230,8 +230,12 @@ struct vsock_tap { int vsock_add_tap(struct vsock_tap *vt); int vsock_remove_tap(struct vsock_tap *vt); void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque); +int __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags); int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); +int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags); int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); -- cgit v1.2.3 From 9bb5e74b2bf88fbb024bb15ded3b011e02c673be Mon Sep 17 00:00:00 2001 From: Griffin Kroah-Hartman Date: Thu, 15 Aug 2024 11:49:20 +0200 Subject: Revert "misc: fastrpc: Restrict untrusted app to attach to privileged PD" This reverts commit bab2f5e8fd5d2f759db26b78d9db57412888f187. Joel reported that this commit breaks userspace and stops sensors in SDM845 from working. Also breaks other qcom SoC devices running postmarketOS. Cc: stable Cc: Ekansh Gupta Cc: Dmitry Baryshkov Reported-by: Joel Selvaraj Link: https://lore.kernel.org/r/9a9f5646-a554-4b65-8122-d212bb665c81@umsystem.edu Signed-off-by: Griffin Kroah-Hartman Acked-by: Srinivas Kandagatla Fixes: bab2f5e8fd5d ("misc: fastrpc: Restrict untrusted app to attach to privileged PD") Link: https://lore.kernel.org/r/20240815094920.8242-1-griffin@kroah.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/fastrpc.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 91583690bddc..f33d914d8f46 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -8,14 +8,11 @@ #define FASTRPC_IOCTL_ALLOC_DMA_BUFF _IOWR('R', 1, struct fastrpc_alloc_dma_buf) #define FASTRPC_IOCTL_FREE_DMA_BUFF _IOWR('R', 2, __u32) #define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_invoke) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH _IO('R', 4) #define FASTRPC_IOCTL_INIT_CREATE _IOWR('R', 5, struct fastrpc_init_create) #define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) #define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static) #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) -- cgit v1.2.3 From aaf8c0b9ae042494cb4585883b15c1332de77840 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 26 Jun 2024 09:47:27 +0800 Subject: f2fs: reduce expensive checkpoint trigger frequency We may trigger high frequent checkpoint for below case: 1. mkdir /mnt/dir1; set dir1 encrypted 2. touch /mnt/file1; fsync /mnt/file1 3. mkdir /mnt/dir2; set dir2 encrypted 4. touch /mnt/file2; fsync /mnt/file2 ... Although, newly created dir and file are not related, due to commit bbf156f7afa7 ("f2fs: fix lost xattrs of directories"), we will trigger checkpoint whenever fsync() comes after a new encrypted dir created. In order to avoid such performance regression issue, let's record an entry including directory's ino in global cache whenever we update directory's xattr data, and then triggerring checkpoint() only if xattr metadata of target file's parent was updated. This patch updates to cover below no encryption case as well: 1) parent is checkpointed 2) set_xattr(dir) w/ new xnid 3) create(file) 4) fsync(file) Fixes: bbf156f7afa7 ("f2fs: fix lost xattrs of directories") Reported-by: wangzijie Reported-by: Zhiguo Niu Tested-by: Zhiguo Niu Reported-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index ed794b5fefbe..2851c823095b 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -139,7 +139,8 @@ TRACE_DEFINE_ENUM(EX_BLOCK_AGE); { CP_NODE_NEED_CP, "node needs cp" }, \ { CP_FASTBOOT_MODE, "fastboot mode" }, \ { CP_SPEC_LOG_NUM, "log type is 2" }, \ - { CP_RECOVER_DIR, "dir needs recovery" }) + { CP_RECOVER_DIR, "dir needs recovery" }, \ + { CP_XATTR_DIR, "dir's xattr updated" }) #define show_shutdown_mode(type) \ __print_symbolic(type, \ -- cgit v1.2.3 From fda70207135b60dd1a7c8f16cc036bb1cc344490 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 16 Apr 2024 11:01:23 +0200 Subject: context_tracking, rcu: Rename rcu_dynticks_curr_cpu_in_eqs() into rcu_is_watching_curr_cpu() The context_tracking.state RCU_DYNTICKS subvariable has been renamed to RCU_WATCHING, reflect that change in the related helpers. Note that "watching" is the opposite of "in EQS", so the negation is lifted out of the helper and into the callsites. Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/linux/context_tracking.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index a6c36780cc3b..d53092ffa9db 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -113,13 +113,17 @@ extern void ct_idle_enter(void); extern void ct_idle_exit(void); /* - * Is the current CPU in an extended quiescent state? + * Is RCU watching the current CPU (IOW, it is not in an extended quiescent state)? + * + * Note that this returns the actual boolean data (watching / not watching), + * whereas ct_rcu_watching() returns the RCU_WATCHING subvariable of + * context_tracking.state. * * No ordering, as we are sampling CPU-local information. */ -static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) +static __always_inline bool rcu_is_watching_curr_cpu(void) { - return !(raw_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_RCU_WATCHING); + return raw_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_RCU_WATCHING; } /* @@ -140,7 +144,7 @@ static __always_inline bool warn_rcu_enter(void) * lots of the actual reporting also relies on RCU. */ preempt_disable_notrace(); - if (rcu_dynticks_curr_cpu_in_eqs()) { + if (!rcu_is_watching_curr_cpu()) { ret = true; ct_state_inc(CT_RCU_WATCHING); } -- cgit v1.2.3 From 32a9f26e5e266f0116cc8536e5e4544523ddd334 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 29 Apr 2024 16:43:36 +0200 Subject: rcu: Rename rcu_momentary_dyntick_idle() into rcu_momentary_eqs() The context_tracking.state RCU_DYNTICKS subvariable has been renamed to RCU_WATCHING, replace "dyntick_idle" into "eqs" to drop the dyntick reference. Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/linux/rcutiny.h | 2 +- include/linux/rcutree.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index d9ac7b136aea..cf2b5a188f78 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -158,7 +158,7 @@ void rcu_scheduler_starting(void); static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_inkernel_boot_has_ended(void) { return true; } static inline bool rcu_is_watching(void) { return true; } -static inline void rcu_momentary_dyntick_idle(void) { } +static inline void rcu_momentary_eqs(void) { } static inline void kfree_rcu_scheduler_running(void) { } static inline bool rcu_gp_might_be_stalled(void) { return false; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 254244202ea9..7dbde2b6f714 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -37,7 +37,7 @@ void synchronize_rcu_expedited(void); void kvfree_call_rcu(struct rcu_head *head, void *ptr); void rcu_barrier(void); -void rcu_momentary_dyntick_idle(void); +void rcu_momentary_eqs(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); -- cgit v1.2.3 From 4f336dc07eceb77d2164bc1121a5ae6003b19f55 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 22 Apr 2024 13:57:29 +0200 Subject: context_tracking, rcu: Rename rcu_dyntick trace event into rcu_watching The "rcu_dyntick" naming convention has been turned into "rcu_watching" for all helpers now, align the trace event to that. To add to the confusion, the strings passed to the trace event are now reversed: when RCU "starts" the dyntick / EQS state, it "stops" watching. Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/trace/events/rcu.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 4066b6d51e46..e81431deaa50 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -466,40 +466,40 @@ TRACE_EVENT(rcu_stall_warning, /* * Tracepoint for dyntick-idle entry/exit events. These take 2 strings * as argument: - * polarity: "Start", "End", "StillNonIdle" for entering, exiting or still not - * being in dyntick-idle mode. + * polarity: "Start", "End", "StillWatching" for entering, exiting or still not + * being in EQS mode. * context: "USER" or "IDLE" or "IRQ". * NMIs nested in IRQs are inferred with nesting > 1 in IRQ context. * * These events also take a pair of numbers, which indicate the nesting * depth before and after the event of interest, and a third number that is - * the ->dynticks counter. Note that task-related and interrupt-related + * the RCU_WATCHING counter. Note that task-related and interrupt-related * events use two separate counters, and that the "++=" and "--=" events * for irq/NMI will change the counter by two, otherwise by one. */ -TRACE_EVENT_RCU(rcu_dyntick, +TRACE_EVENT_RCU(rcu_watching, - TP_PROTO(const char *polarity, long oldnesting, long newnesting, int dynticks), + TP_PROTO(const char *polarity, long oldnesting, long newnesting, int counter), - TP_ARGS(polarity, oldnesting, newnesting, dynticks), + TP_ARGS(polarity, oldnesting, newnesting, counter), TP_STRUCT__entry( __field(const char *, polarity) __field(long, oldnesting) __field(long, newnesting) - __field(int, dynticks) + __field(int, counter) ), TP_fast_assign( __entry->polarity = polarity; __entry->oldnesting = oldnesting; __entry->newnesting = newnesting; - __entry->dynticks = dynticks; + __entry->counter = counter; ), TP_printk("%s %lx %lx %#3x", __entry->polarity, __entry->oldnesting, __entry->newnesting, - __entry->dynticks & 0xfff) + __entry->counter & 0xfff) ); /* -- cgit v1.2.3 From 4040b1139904b3f72b56862fc9a8d3e9abb69ffb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 17 Jun 2024 08:51:14 -0700 Subject: context_tracking: Tag context_tracking_enabled_this_cpu() __always_inline Force context_tracking_enabled_this_cpu() to be inlined so that invoking it from guest_context_enter_irqoff(), which KVM uses in non-instrumentable code, doesn't unexpectedly leave a noinstr section. vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x1c7: call to context_tracking_enabled_this_cpu() leaves .noinstr.text section vmlinux.o: warning: objtool: svm_vcpu_enter_exit+0x83: call to context_tracking_enabled_this_cpu() leaves .noinstr.text section Note, the CONFIG_CONTEXT_TRACKING_USER=n stub is already __always_inline. Signed-off-by: Sean Christopherson Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay --- include/linux/context_tracking_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 0dbda59c9f37..7b8433d5a8ef 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -113,7 +113,7 @@ static __always_inline bool context_tracking_enabled_cpu(int cpu) return context_tracking_enabled() && per_cpu(context_tracking.active, cpu); } -static inline bool context_tracking_enabled_this_cpu(void) +static __always_inline bool context_tracking_enabled_this_cpu(void) { return context_tracking_enabled() && __this_cpu_read(context_tracking.active); } -- cgit v1.2.3 From a98ae7f045b29de4f48b191d5aeb4e803183d759 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 25 Jul 2024 12:18:40 +0200 Subject: lib/string_choices: Add str_up_down() helper Add str_up_down() helper to return "up" or "down" string literal. Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20240725101841.574-1-michal.wajdeczko@intel.com Signed-off-by: Kees Cook --- include/linux/string_choices.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index d9ebe20229f8..bcde3c9cff81 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -42,6 +42,11 @@ static inline const char *str_yes_no(bool v) return v ? "yes" : "no"; } +static inline const char *str_up_down(bool v) +{ + return v ? "up" : "down"; +} + /** * str_plural - Return the simple pluralization based on English counts * @num: Number used for deciding pluralization -- cgit v1.2.3 From f5c1ca3a15fdb867d2b535003f74e0b975eff116 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 12 Aug 2024 11:29:40 -0700 Subject: string_choices: Add wrapper for str_down_up() The string choice functions which are not clearly true/false synonyms also have inverted wrappers. Add this for str_down_up() as well. Suggested-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240812182939.work.424-kees@kernel.org Reviewed-by: Andy Shevchenko Signed-off-by: Kees Cook --- include/linux/string_choices.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index bcde3c9cff81..1320bcdcb89c 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -46,6 +46,7 @@ static inline const char *str_up_down(bool v) { return v ? "up" : "down"; } +#define str_down_up(v) str_up_down(!(v)) /** * str_plural - Return the simple pluralization based on English counts -- cgit v1.2.3 From aae6b81260fd9a7224f7eb4fc440d625852245bb Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 12 Aug 2024 10:43:48 -0400 Subject: Bluetooth: HCI: Invert LE State quirk to be opt-out rather then opt-in This inverts the LE State quirk so by default we assume the controllers would report valid states rather than invalid which is how quirks normally behave, also this would result in HCI command failing it the LE States are really broken thus exposing the controllers that are really broken in this respect. Link: https://github.com/bluez/bluez/issues/584 Fixes: 220915857e29 ("Bluetooth: Adding driver and quirk defs for multi-role LE") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 17 ++++++++++------- include/net/bluetooth/hci_core.h | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index e372a88e8c3f..d1d073089f38 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -206,14 +206,17 @@ enum { */ HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, - /* When this quirk is set, the controller has validated that - * LE states reported through the HCI_LE_READ_SUPPORTED_STATES are - * valid. This mechanism is necessary as many controllers have - * been seen has having trouble initiating a connectable - * advertisement despite the state combination being reported as - * supported. + /* When this quirk is set, the LE states reported through the + * HCI_LE_READ_SUPPORTED_STATES are invalid/broken. + * + * This mechanism is necessary as many controllers have been seen has + * having trouble initiating a connectable advertisement despite the + * state combination being reported as supported. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. */ - HCI_QUIRK_VALID_LE_STATES, + HCI_QUIRK_BROKEN_LE_STATES, /* When this quirk is set, then erroneous data reporting * is ignored. This is mainly due to the fact that the HCI diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 31020891fc68..e449dba698f3 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -825,7 +825,7 @@ extern struct mutex hci_cb_list_lock; } while (0) #define hci_dev_le_state_simultaneous(hdev) \ - (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \ + (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) && \ (hdev->le_states[4] & 0x08) && /* Central */ \ (hdev->le_states[4] & 0x40) && /* Peripheral */ \ (hdev->le_states[3] & 0x10)) /* Simultaneous */ -- cgit v1.2.3 From 648b4bde0aca2980ebc0b90cdfbb80d222370c3d Mon Sep 17 00:00:00 2001 From: Satya Priya Kakitapalli Date: Mon, 12 Aug 2024 10:43:02 +0530 Subject: dt-bindings: clock: qcom: Add GPLL9 support on gcc-sc8180x Add the missing GPLL9 which is required for the gcc sdcc2 clock. Fixes: 0fadcdfdcf57 ("dt-bindings: clock: Add SC8180x GCC binding") Cc: stable@vger.kernel.org Acked-by: Krzysztof Kozlowski Signed-off-by: Satya Priya Kakitapalli Link: https://lore.kernel.org/r/20240812-gcc-sc8180x-fixes-v2-2-8b3eaa5fb856@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,gcc-sc8180x.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-sc8180x.h b/include/dt-bindings/clock/qcom,gcc-sc8180x.h index 487b12c19db5..e364006aa6ea 100644 --- a/include/dt-bindings/clock/qcom,gcc-sc8180x.h +++ b/include/dt-bindings/clock/qcom,gcc-sc8180x.h @@ -248,6 +248,7 @@ #define GCC_USB3_SEC_CLKREF_CLK 238 #define GCC_UFS_MEM_CLKREF_EN 239 #define GCC_UFS_CARD_CLKREF_EN 240 +#define GPLL9 241 #define GCC_EMAC_BCR 0 #define GCC_GPU_BCR 1 -- cgit v1.2.3 From c580e7bfc0cd140b8d3cf73183e08ca8b23326db Mon Sep 17 00:00:00 2001 From: Danila Tikhonov Date: Thu, 8 Aug 2024 21:40:15 +0300 Subject: dt-bindings: arm: qcom,ids: Add IDs for SM7325 family Add Qualcomm SM7325/SM7325P (yupik) SoC IDs. Signed-off-by: Danila Tikhonov Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240808184048.63030-2-danila@jiaxyga.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index a213f6244c5d..8332f8d82f96 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -234,11 +234,13 @@ #define QCOM_ID_SA8540P 461 #define QCOM_ID_QCM4290 469 #define QCOM_ID_QCS4290 470 +#define QCOM_ID_SM7325 475 #define QCOM_ID_SM8450_2 480 #define QCOM_ID_SM8450_3 482 #define QCOM_ID_SC7280 487 #define QCOM_ID_SC7180P 495 #define QCOM_ID_QCM6490 497 +#define QCOM_ID_SM7325P 499 #define QCOM_ID_IPQ5000 503 #define QCOM_ID_IPQ0509 504 #define QCOM_ID_IPQ0518 505 -- cgit v1.2.3 From 015dff12dfdeb8d94115ec829bc2e4b711075935 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 14 Aug 2024 18:20:22 +0200 Subject: dt-bindings: clock: gcc-msm8998: Add Q6 and LPASS clocks definitions Add definitions for the Q6 BIMC, LPASS core and adsp smmu clocks, required to enable audio functionality on MSM8998. Add the GDSC definitions for the LPASS_ADSP_GDSC and LPASS_CORE_GDSC as a final step to enable the required clock tree for the lpass iommu and for the audio dsp itself. Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Marc Gonzalez Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240814-lpass-v1-1-a5bb8f9dfa8b@freebox.fr Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,gcc-msm8998.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-msm8998.h b/include/dt-bindings/clock/qcom,gcc-msm8998.h index b5456a64d421..5b0dde080900 100644 --- a/include/dt-bindings/clock/qcom,gcc-msm8998.h +++ b/include/dt-bindings/clock/qcom,gcc-msm8998.h @@ -193,10 +193,15 @@ #define GCC_MMSS_GPLL0_DIV_CLK 184 #define GCC_GPU_GPLL0_DIV_CLK 185 #define GCC_GPU_GPLL0_CLK 186 +#define HLOS1_VOTE_LPASS_CORE_SMMU_CLK 187 +#define HLOS1_VOTE_LPASS_ADSP_SMMU_CLK 188 +#define GCC_MSS_Q6_BIMC_AXI_CLK 189 #define PCIE_0_GDSC 0 #define UFS_GDSC 1 #define USB_30_GDSC 2 +#define LPASS_ADSP_GDSC 3 +#define LPASS_CORE_GDSC 4 #define GCC_BLSP1_QUP1_BCR 0 #define GCC_BLSP1_QUP2_BCR 1 -- cgit v1.2.3 From a500427c84d1c7c59ebef6a70895fdf28ddb0884 Mon Sep 17 00:00:00 2001 From: Varadarajan Narayanan Date: Tue, 30 Jul 2024 11:18:13 +0530 Subject: dt-bindings: interconnect: Add Qualcomm IPQ5332 support Add interconnect-cells to clock provider so that it can be used as icc provider. Add master/slave ids for Qualcomm IPQ5332 Network-On-Chip interfaces. This will be used by the gcc-ipq5332 driver for providing interconnect services using the icc-clk framework. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20240730054817.1915652-2-quic_varada@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/interconnect/qcom,ipq5332.h | 46 +++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 include/dt-bindings/interconnect/qcom,ipq5332.h (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,ipq5332.h b/include/dt-bindings/interconnect/qcom,ipq5332.h new file mode 100644 index 000000000000..16475bb07a48 --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,ipq5332.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +#ifndef INTERCONNECT_QCOM_IPQ5332_H +#define INTERCONNECT_QCOM_IPQ5332_H + +#define MASTER_SNOC_PCIE3_1_M 0 +#define SLAVE_SNOC_PCIE3_1_M 1 +#define MASTER_ANOC_PCIE3_1_S 2 +#define SLAVE_ANOC_PCIE3_1_S 3 +#define MASTER_SNOC_PCIE3_2_M 4 +#define SLAVE_SNOC_PCIE3_2_M 5 +#define MASTER_ANOC_PCIE3_2_S 6 +#define SLAVE_ANOC_PCIE3_2_S 7 +#define MASTER_SNOC_USB 8 +#define SLAVE_SNOC_USB 9 +#define MASTER_NSSNOC_NSSCC 10 +#define SLAVE_NSSNOC_NSSCC 11 +#define MASTER_NSSNOC_SNOC_0 12 +#define SLAVE_NSSNOC_SNOC_0 13 +#define MASTER_NSSNOC_SNOC_1 14 +#define SLAVE_NSSNOC_SNOC_1 15 +#define MASTER_NSSNOC_ATB 16 +#define SLAVE_NSSNOC_ATB 17 +#define MASTER_NSSNOC_PCNOC_1 18 +#define SLAVE_NSSNOC_PCNOC_1 19 +#define MASTER_NSSNOC_QOSGEN_REF 20 +#define SLAVE_NSSNOC_QOSGEN_REF 21 +#define MASTER_NSSNOC_TIMEOUT_REF 22 +#define SLAVE_NSSNOC_TIMEOUT_REF 23 +#define MASTER_NSSNOC_XO_DCD 24 +#define SLAVE_NSSNOC_XO_DCD 25 + +#define MASTER_NSSNOC_PPE 0 +#define SLAVE_NSSNOC_PPE 1 +#define MASTER_NSSNOC_PPE_CFG 2 +#define SLAVE_NSSNOC_PPE_CFG 3 +#define MASTER_NSSNOC_NSS_CSR 4 +#define SLAVE_NSSNOC_NSS_CSR 5 +#define MASTER_NSSNOC_CE_APB 6 +#define SLAVE_NSSNOC_CE_APB 7 +#define MASTER_NSSNOC_CE_AXI 8 +#define SLAVE_NSSNOC_CE_AXI 9 + +#define MASTER_CNOC_AHB 0 +#define SLAVE_CNOC_AHB 1 + +#endif /* INTERCONNECT_QCOM_IPQ5332_H */ -- cgit v1.2.3 From de67763cbdbba8149eeb5d45ad0c6834f7c7b352 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 9 Aug 2024 16:54:06 -0700 Subject: ip: Move INFINITY_LIFE_TIME to addrconf.h. INFINITY_LIFE_TIME is the common value used in IPv4 and IPv6 but defined in both .c files. Also, 0xffffffff used in addrconf_timeout_fixup() is INFINITY_LIFE_TIME. Let's move INFINITY_LIFE_TIME's definition to addrconf.h Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20240809235406.50187-6-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/addrconf.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index b18e81f0c9e1..c8ed31828db3 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -187,10 +187,12 @@ static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) return 0; } +#define INFINITY_LIFE_TIME 0xFFFFFFFF + static inline unsigned long addrconf_timeout_fixup(u32 timeout, unsigned int unit) { - if (timeout == 0xffffffff) + if (timeout == INFINITY_LIFE_TIME) return ~0UL; /* -- cgit v1.2.3 From 0cb70ee4a6ee6b0a4b0e0f70a64a094c6fe05944 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 14 Aug 2024 13:22:25 +0800 Subject: virtio: rename virtio_config_enabled to virtio_config_core_enabled Following patch will allow the config interrupt to be disabled by a specific driver via another boolean. So this patch renames virtio_config_enabled and relevant helpers to virtio_config_core_enabled. Cc: Venkat Venkatsubra Cc: Gia-Khanh Nguyen Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Link: https://patch.msgid.link/20240814052228.4654-2-jasowang@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/virtio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 4b16844c6bc2..de6d3cc176d9 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -118,7 +118,7 @@ struct virtio_admin_cmd { * struct virtio_device - representation of a device using virtio * @index: unique position on the virtio bus * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore) - * @config_enabled: configuration change reporting enabled + * @config_core_enabled: configuration change reporting enabled by core * @config_change_pending: configuration change reported while disabled * @config_lock: protects configuration change reporting * @vqs_list_lock: protects @vqs. @@ -135,7 +135,7 @@ struct virtio_admin_cmd { struct virtio_device { int index; bool failed; - bool config_enabled; + bool config_core_enabled; bool config_change_pending; spinlock_t config_lock; spinlock_t vqs_list_lock; -- cgit v1.2.3 From 224de6f886f8184fd448700a6d78f216694de948 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 14 Aug 2024 13:22:26 +0800 Subject: virtio: allow driver to disable the configure change notification Sometime, it would be useful to disable the configure change notification from the driver. So this patch allows this by introducing a variable config_change_driver_disabled and only allow the configure change notification callback to be triggered when it is allowed by both the virtio core and the driver. It is set to false by default to hold the current semantic so we don't need to change any drivers. The first user for this would be virtio-net. Cc: Venkat Venkatsubra Cc: Gia-Khanh Nguyen Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Link: https://patch.msgid.link/20240814052228.4654-3-jasowang@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/virtio.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index de6d3cc176d9..306137a15d07 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -119,6 +119,8 @@ struct virtio_admin_cmd { * @index: unique position on the virtio bus * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore) * @config_core_enabled: configuration change reporting enabled by core + * @config_driver_disabled: configuration change reporting disabled by + * a driver * @config_change_pending: configuration change reported while disabled * @config_lock: protects configuration change reporting * @vqs_list_lock: protects @vqs. @@ -136,6 +138,7 @@ struct virtio_device { int index; bool failed; bool config_core_enabled; + bool config_driver_disabled; bool config_change_pending; spinlock_t config_lock; spinlock_t vqs_list_lock; @@ -166,6 +169,10 @@ void __virtqueue_break(struct virtqueue *_vq); void __virtqueue_unbreak(struct virtqueue *_vq); void virtio_config_changed(struct virtio_device *dev); + +void virtio_config_driver_disable(struct virtio_device *dev); +void virtio_config_driver_enable(struct virtio_device *dev); + #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); -- cgit v1.2.3 From 5f75cfbd6bb02295ddaed48adf667b6c828ce07b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 1 Aug 2024 22:47:48 +0200 Subject: mm/hugetlb: fix hugetlb vs. core-mm PT locking We recently made GUP's common page table walking code to also walk hugetlb VMAs without most hugetlb special-casing, preparing for the future of having less hugetlb-specific page table walking code in the codebase. Turns out that we missed one page table locking detail: page table locking for hugetlb folios that are not mapped using a single PMD/PUD. Assume we have hugetlb folio that spans multiple PTEs (e.g., 64 KiB hugetlb folios on arm64 with 4 KiB base page size). GUP, as it walks the page tables, will perform a pte_offset_map_lock() to grab the PTE table lock. However, hugetlb that concurrently modifies these page tables would actually grab the mm->page_table_lock: with USE_SPLIT_PTE_PTLOCKS, the locks would differ. Something similar can happen right now with hugetlb folios that span multiple PMDs when USE_SPLIT_PMD_PTLOCKS. This issue can be reproduced [1], for example triggering: [ 3105.936100] ------------[ cut here ]------------ [ 3105.939323] WARNING: CPU: 31 PID: 2732 at mm/gup.c:142 try_grab_folio+0x11c/0x188 [ 3105.944634] Modules linked in: [...] [ 3105.974841] CPU: 31 PID: 2732 Comm: reproducer Not tainted 6.10.0-64.eln141.aarch64 #1 [ 3105.980406] Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-4.fc40 05/24/2024 [ 3105.986185] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 3105.991108] pc : try_grab_folio+0x11c/0x188 [ 3105.994013] lr : follow_page_pte+0xd8/0x430 [ 3105.996986] sp : ffff80008eafb8f0 [ 3105.999346] x29: ffff80008eafb900 x28: ffffffe8d481f380 x27: 00f80001207cff43 [ 3106.004414] x26: 0000000000000001 x25: 0000000000000000 x24: ffff80008eafba48 [ 3106.009520] x23: 0000ffff9372f000 x22: ffff7a54459e2000 x21: ffff7a546c1aa978 [ 3106.014529] x20: ffffffe8d481f3c0 x19: 0000000000610041 x18: 0000000000000001 [ 3106.019506] x17: 0000000000000001 x16: ffffffffffffffff x15: 0000000000000000 [ 3106.024494] x14: ffffb85477fdfe08 x13: 0000ffff9372ffff x12: 0000000000000000 [ 3106.029469] x11: 1fffef4a88a96be1 x10: ffff7a54454b5f0c x9 : ffffb854771b12f0 [ 3106.034324] x8 : 0008000000000000 x7 : ffff7a546c1aa980 x6 : 0008000000000080 [ 3106.038902] x5 : 00000000001207cf x4 : 0000ffff9372f000 x3 : ffffffe8d481f000 [ 3106.043420] x2 : 0000000000610041 x1 : 0000000000000001 x0 : 0000000000000000 [ 3106.047957] Call trace: [ 3106.049522] try_grab_folio+0x11c/0x188 [ 3106.051996] follow_pmd_mask.constprop.0.isra.0+0x150/0x2e0 [ 3106.055527] follow_page_mask+0x1a0/0x2b8 [ 3106.058118] __get_user_pages+0xf0/0x348 [ 3106.060647] faultin_page_range+0xb0/0x360 [ 3106.063651] do_madvise+0x340/0x598 Let's make huge_pte_lockptr() effectively use the same PT locks as any core-mm page table walker would. Add ptep_lockptr() to obtain the PTE page table lock using a pte pointer -- unfortunately we cannot convert pte_lockptr() because virt_to_page() doesn't work with kmap'ed page tables we can have with CONFIG_HIGHPTE. Handle CONFIG_PGTABLE_LEVELS correctly by checking in reverse order, such that when e.g., CONFIG_PGTABLE_LEVELS==2 with PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE will work as expected. Document why that works. There is one ugly case: powerpc 8xx, whereby we have an 8 MiB hugetlb folio being mapped using two PTE page tables. While hugetlb wants to take the PMD table lock, core-mm would grab the PTE table lock of one of both PTE page tables. In such corner cases, we have to make sure that both locks match, which is (fortunately!) currently guaranteed for 8xx as it does not support SMP and consequently doesn't use split PT locks. [1] https://lore.kernel.org/all/1bbfcc7f-f222-45a5-ac44-c5a1381c596d@redhat.com/ Link: https://lkml.kernel.org/r/20240801204748.99107-1-david@redhat.com Fixes: 9cb28da54643 ("mm/gup: handle hugetlb in the generic follow_page_mask code") Signed-off-by: David Hildenbrand Acked-by: Peter Xu Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Peter Xu Cc: Oscar Salvador Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 33 ++++++++++++++++++++++++++++++--- include/linux/mm.h | 11 +++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c9bf68c239a0..45bf05ad5c53 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -944,10 +944,37 @@ static inline bool htlb_allow_alloc_fallback(int reason) static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { - if (huge_page_size(h) == PMD_SIZE) + const unsigned long size = huge_page_size(h); + + VM_WARN_ON(size == PAGE_SIZE); + + /* + * hugetlb must use the exact same PT locks as core-mm page table + * walkers would. When modifying a PTE table, hugetlb must take the + * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD + * PT lock etc. + * + * The expectation is that any hugetlb folio smaller than a PMD is + * always mapped into a single PTE table and that any hugetlb folio + * smaller than a PUD (but at least as big as a PMD) is always mapped + * into a single PMD table. + * + * If that does not hold for an architecture, then that architecture + * must disable split PT locks such that all *_lockptr() functions + * will give us the same result: the per-MM PT lock. + * + * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where + * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr() + * and core-mm would use pmd_lockptr(). However, in such configurations + * split PMD locks are disabled -- they don't make sense on a single + * PGDIR page table -- and the end result is the same. + */ + if (size >= PUD_SIZE) + return pud_lockptr(mm, (pud_t *) pte); + else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE)) return pmd_lockptr(mm, (pmd_t *) pte); - VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); - return &mm->page_table_lock; + /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */ + return ptep_lockptr(mm, pte); } #ifndef hugepages_supported diff --git a/include/linux/mm.h b/include/linux/mm.h index c4b238a20b76..6549d0979b28 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2920,6 +2920,13 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); } +static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) +{ + BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE)); + BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE); + return ptlock_ptr(virt_to_ptdesc(pte)); +} + static inline bool ptlock_init(struct ptdesc *ptdesc) { /* @@ -2944,6 +2951,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { return &mm->page_table_lock; } +static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) +{ + return &mm->page_table_lock; +} static inline void ptlock_cache_init(void) {} static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} -- cgit v1.2.3 From f4cb78af91e3b2b7aa76dbf8213b898fa8811b12 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 8 Aug 2024 21:34:35 +0000 Subject: mm: add system wide stats items category /proc/vmstat contains events and stats, events can only grow, but stats can grow and shrink. vmstat has the following: ------------------------- NR_VM_ZONE_STAT_ITEMS: per-zone stats NR_VM_NUMA_EVENT_ITEMS: per-numa events NR_VM_NODE_STAT_ITEMS: per-numa stats NR_VM_WRITEBACK_STAT_ITEMS: system-wide background-writeback and dirty-throttling tresholds. NR_VM_EVENT_ITEMS: system-wide events ------------------------- Rename NR_VM_WRITEBACK_STAT_ITEMS to NR_VM_STAT_ITEMS, to track the system-wide stats, we are going to add per-page metadata stats to this category in the next patch. Also delete unused writeback_stat_name(). Link: https://lkml.kernel.org/r/20240809191020.1142142-2-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240808213437.682006-3-pasha.tatashin@soleen.com Fixes: 15995a352474 ("mm: report per-page metadata information") Signed-off-by: Pasha Tatashin Suggested-by: Yosry Ahmed Tested-by: Alison Schofield Acked-by: David Hildenbrand Acked-by: David Rientjes Cc: Dan Williams Cc: Domenico Cerasuolo Cc: Joel Granados Cc: Johannes Weiner Cc: Li Zhijian Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport Cc: Muchun Song Cc: Nhat Pham Cc: Sourav Panda Cc: Vlastimil Babka Cc: Yi Zhang Cc: Fan Ni Signed-off-by: Andrew Morton --- include/linux/vmstat.h | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 23cd17942036..9ab4fa5e09b5 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -34,10 +34,11 @@ struct reclaim_stat { unsigned nr_lazyfree_fail; }; -enum writeback_stat_item { +/* Stat data for system wide items */ +enum vm_stat_item { NR_DIRTY_THRESHOLD, NR_DIRTY_BG_THRESHOLD, - NR_VM_WRITEBACK_STAT_ITEMS, + NR_VM_STAT_ITEMS, }; #ifdef CONFIG_VM_EVENT_COUNTERS @@ -514,21 +515,13 @@ static inline const char *lru_list_name(enum lru_list lru) return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" } -static inline const char *writeback_stat_name(enum writeback_stat_item item) -{ - return vmstat_text[NR_VM_ZONE_STAT_ITEMS + - NR_VM_NUMA_EVENT_ITEMS + - NR_VM_NODE_STAT_ITEMS + - item]; -} - #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) static inline const char *vm_event_name(enum vm_event_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS + NR_VM_NODE_STAT_ITEMS + - NR_VM_WRITEBACK_STAT_ITEMS + + NR_VM_STAT_ITEMS + item]; } #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ -- cgit v1.2.3 From 9d85731110241fb8ca9445ea4177d816041a8825 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 8 Aug 2024 21:34:36 +0000 Subject: mm: don't account memmap per-node Fix invalid access to pgdat during hot-remove operation: ndctl users reported a GPF when trying to destroy a namespace: $ ndctl destroy-namespace all -r all -f Segmentation fault dmesg: Oops: general protection fault, probably for non-canonical address 0xdffffc0000005650: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: probably user-memory-access in range [0x000000000002b280-0x000000000002b287] CPU: 26 UID: 0 PID: 1868 Comm: ndctl Not tainted 6.11.0-rc1 #1 Hardware name: Dell Inc. PowerEdge R640/08HT8T, BIOS 2.20.1 09/13/2023 RIP: 0010:mod_node_page_state+0x2a/0x110 cxl-test users report a GPF when trying to unload the test module: $ modrpobe -r cxl-test dmesg BUG: unable to handle page fault for address: 0000000000004200 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] PREEMPT SMP PTI CPU: 0 UID: 0 PID: 1076 Comm: modprobe Tainted: G O N 6.11.0-rc1 #197 Tainted: [O]=OOT_MODULE, [N]=TEST Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/15 RIP: 0010:mod_node_page_state+0x6/0x90 Currently, when memory is hot-plugged or hot-removed the accounting is done based on the assumption that memmap is allocated from the same node as the hot-plugged/hot-removed memory, which is not always the case. In addition, there are challenges with keeping the node id of the memory that is being remove to the time when memmap accounting is actually performed: since this is done after remove_pfn_range_from_zone(), and also after remove_memory_block_devices(). Meaning that we cannot use pgdat nor walking though memblocks to get the nid. Given all of that, account the memmap overhead system wide instead. For this we are going to be using global atomic counters, but given that memmap size is rarely modified, and normally is only modified either during early boot when there is only one CPU, or under a hotplug global mutex lock, therefore there is no need for per-cpu optimizations. Also, while we are here rename nr_memmap to nr_memmap_pages, and nr_memmap_boot to nr_memmap_boot_pages to be self explanatory that the units are in page count. [pasha.tatashin@soleen.com: address a few nits from David Hildenbrand] Link: https://lkml.kernel.org/r/20240809191020.1142142-4-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240809191020.1142142-4-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240808213437.682006-4-pasha.tatashin@soleen.com Fixes: 15995a352474 ("mm: report per-page metadata information") Signed-off-by: Pasha Tatashin Reported-by: Yi Zhang Closes: https://lore.kernel.org/linux-cxl/CAHj4cs9Ax1=CoJkgBGP_+sNu6-6=6v=_L-ZBZY0bVLD3wUWZQg@mail.gmail.com Reported-by: Alison Schofield Closes: https://lore.kernel.org/linux-mm/Zq0tPd2h6alFz8XF@aschofie-mobl2/#t Tested-by: Dan Williams Tested-by: Alison Schofield Acked-by: David Hildenbrand Acked-by: David Rientjes Tested-by: Yi Zhang Cc: Domenico Cerasuolo Cc: Fan Ni Cc: Joel Granados Cc: Johannes Weiner Cc: Li Zhijian Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport Cc: Muchun Song Cc: Nhat Pham Cc: Sourav Panda Cc: Vlastimil Babka Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 2 -- include/linux/vmstat.h | 7 ++++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 41458892bc8a..1dc6248feb83 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -220,8 +220,6 @@ enum node_stat_item { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, - NR_MEMMAP, /* page metadata allocated through buddy allocator */ - NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */ NR_VM_NODE_STAT_ITEMS }; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 9ab4fa5e09b5..9eb77c9007e6 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -38,6 +38,8 @@ struct reclaim_stat { enum vm_stat_item { NR_DIRTY_THRESHOLD, NR_DIRTY_BG_THRESHOLD, + NR_MEMMAP_PAGES, /* page metadata allocated through buddy allocator */ + NR_MEMMAP_BOOT_PAGES, /* page metadata allocated through boot allocator */ NR_VM_STAT_ITEMS, }; @@ -618,7 +620,6 @@ static inline void lruvec_stat_sub_folio(struct folio *folio, lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } -void __meminit mod_node_early_perpage_metadata(int nid, long delta); -void __meminit store_early_perpage_metadata(void); - +void memmap_boot_pages_add(long delta); +void memmap_pages_add(long delta); #endif /* _LINUX_VMSTAT_H */ -- cgit v1.2.3 From a8fc28dad6d574582cdf2f7e78c73c59c623df30 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 13 Aug 2024 08:07:56 -0700 Subject: alloc_tag: introduce clear_page_tag_ref() helper function In several cases we are freeing pages which were not allocated using common page allocators. For such cases, in order to keep allocation accounting correct, we should clear the page tag to indicate that the page being freed is expected to not have a valid allocation tag. Introduce clear_page_tag_ref() helper function to be used for this. Link: https://lkml.kernel.org/r/20240813150758.855881-1-surenb@google.com Fixes: d224eb0287fb ("codetag: debug: mark codetags for reserved pages as empty") Signed-off-by: Suren Baghdasaryan Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Reviewed-by: Pasha Tatashin Cc: Kees Cook Cc: Kent Overstreet Cc: Sourav Panda Cc: Vlastimil Babka Cc: [6.10] Signed-off-by: Andrew Morton --- include/linux/pgalloc_tag.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 18cd0c0c73d9..207f0c83c8e9 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -43,6 +43,18 @@ static inline void put_page_tag_ref(union codetag_ref *ref) page_ext_put(page_ext_from_codetag_ref(ref)); } +static inline void clear_page_tag_ref(struct page *page) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + set_codetag_empty(ref); + put_page_tag_ref(ref); + } + } +} + static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) { @@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; } static inline void put_page_tag_ref(union codetag_ref *ref) {} +static inline void clear_page_tag_ref(struct page *page) {} static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) {} static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} -- cgit v1.2.3 From 67c3ca2c5cfe6a50772514e3349b5e7b3b0fac03 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:02 +0300 Subject: net: mscc: ocelot: use ocelot_xmit_get_vlan_info() also for FDMA and register injection Problem description ------------------- On an NXP LS1028A (felix DSA driver) with the following configuration: - ocelot-8021q tagging protocol - VLAN-aware bridge (with STP) spanning at least swp0 and swp1 - 8021q VLAN upper interfaces on swp0 and swp1: swp0.700, swp1.700 - ptp4l on swp0.700 and swp1.700 we see that the ptp4l instances do not see each other's traffic, and they all go to the grand master state due to the ANNOUNCE_RECEIPT_TIMEOUT_EXPIRES condition. Jumping to the conclusion for the impatient ------------------------------------------- There is a zero-day bug in the ocelot switchdev driver in the way it handles VLAN-tagged packet injection. The correct logic already exists in the source code, in function ocelot_xmit_get_vlan_info() added by commit 5ca721c54d86 ("net: dsa: tag_ocelot: set the classified VLAN during xmit"). But it is used only for normal NPI-based injection with the DSA "ocelot" tagging protocol. The other injection code paths (register-based and FDMA-based) roll their own wrong logic. This affects and was noticed on the DSA "ocelot-8021q" protocol because it uses register-based injection. By moving ocelot_xmit_get_vlan_info() to a place that's common for both the DSA tagger and the ocelot switch library, it can also be called from ocelot_port_inject_frame() in ocelot.c. We need to touch the lines with ocelot_ifh_port_set()'s prototype anyway, so let's rename it to something clearer regarding what it does, and add a kernel-doc. ocelot_ifh_set_basic() should do. Investigation notes ------------------- Debugging reveals that PTP event (aka those carrying timestamps, like Sync) frames injected into swp0.700 (but also swp1.700) hit the wire with two VLAN tags: 00000000: 01 1b 19 00 00 00 00 01 02 03 04 05 81 00 02 bc ~~~~~~~~~~~ 00000010: 81 00 02 bc 88 f7 00 12 00 2c 00 00 02 00 00 00 ~~~~~~~~~~~ 00000020: 00 00 00 00 00 00 00 00 00 00 00 01 02 ff fe 03 00000030: 04 05 00 01 00 04 00 00 00 00 00 00 00 00 00 00 00000040: 00 00 The second (unexpected) VLAN tag makes felix_check_xtr_pkt() -> ptp_classify_raw() fail to see these as PTP packets at the link partner's receiving end, and return PTP_CLASS_NONE (because the BPF classifier is not written to expect 2 VLAN tags). The reason why packets have 2 VLAN tags is because the transmission code treats VLAN incorrectly. Neither ocelot switchdev, nor felix DSA, declare the NETIF_F_HW_VLAN_CTAG_TX feature. Therefore, at xmit time, all VLANs should be in the skb head, and none should be in the hwaccel area. This is done by: static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t features) { if (skb_vlan_tag_present(skb) && !vlan_hw_offload_capable(features, skb->vlan_proto)) skb = __vlan_hwaccel_push_inside(skb); return skb; } But ocelot_port_inject_frame() handles things incorrectly: ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb)); void ocelot_ifh_port_set(struct sk_buff *skb, void *ifh, int port, u32 rew_op) { (...) if (vlan_tag) ocelot_ifh_set_vlan_tci(ifh, vlan_tag); (...) } The way __vlan_hwaccel_push_inside() pushes the tag inside the skb head is by calling: static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) { skb->vlan_present = 0; } which does _not_ zero out skb->vlan_tci as seen by skb_vlan_tag_get(). This means that ocelot, when it calls skb_vlan_tag_get(), sees (and uses) a residual skb->vlan_tci, while the same VLAN tag is _already_ in the skb head. The trivial fix for double VLAN headers is to replace the content of ocelot_ifh_port_set() with: if (skb_vlan_tag_present(skb)) ocelot_ifh_set_vlan_tci(ifh, skb_vlan_tag_get(skb)); but this would not be correct either, because, as mentioned, vlan_hw_offload_capable() is false for us, so we'd be inserting dead code and we'd always transmit packets with VID=0 in the injection frame header. I can't actually test the ocelot switchdev driver and rely exclusively on code inspection, but I don't think traffic from 8021q uppers has ever been injected properly, and not double-tagged. Thus I'm blaming the introduction of VLAN fields in the injection header - early driver code. As hinted at in the early conclusion, what we _want_ to happen for VLAN transmission was already described once in commit 5ca721c54d86 ("net: dsa: tag_ocelot: set the classified VLAN during xmit"). ocelot_xmit_get_vlan_info() intends to ensure that if the port through which we're transmitting is under a VLAN-aware bridge, the outer VLAN tag from the skb head is stripped from there and inserted into the injection frame header (so that the packet is processed in hardware through that actual VLAN). And in all other cases, the packet is sent with VID=0 in the injection frame header, since the port is VLAN-unaware and has logic to strip this VID on egress (making it invisible to the wire). Fixes: 08d02364b12f ("net: mscc: fix the injection header") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/ocelot.h | 47 ++++++++++++++++++++++++++++++++++++++++++++++ include/soc/mscc/ocelot.h | 3 ++- 2 files changed, 49 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index dca2969015d8..6fbfbde68a37 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -5,6 +5,8 @@ #ifndef _NET_DSA_TAG_OCELOT_H #define _NET_DSA_TAG_OCELOT_H +#include +#include #include #include #include @@ -273,4 +275,49 @@ static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb) return rew_op; } +/** + * ocelot_xmit_get_vlan_info: Determine VLAN_TCI and TAG_TYPE for injected frame + * @skb: Pointer to socket buffer + * @br: Pointer to bridge device that the port is under, if any + * @vlan_tci: + * @tag_type: + * + * If the port is under a VLAN-aware bridge, remove the VLAN header from the + * payload and move it into the DSA tag, which will make the switch classify + * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero, + * which is the pvid of standalone ports (OCELOT_STANDALONE_PVID), although not + * of VLAN-unaware bridge ports (that would be ocelot_vlan_unaware_pvid()). + * Anyway, VID 0 is fine because it is stripped on egress for these port modes, + * and source address learning is not performed for packets injected from the + * CPU anyway, so it doesn't matter that the VID is "wrong". + */ +static inline void ocelot_xmit_get_vlan_info(struct sk_buff *skb, + struct net_device *br, + u64 *vlan_tci, u64 *tag_type) +{ + struct vlan_ethhdr *hdr; + u16 proto, tci; + + if (!br || !br_vlan_enabled(br)) { + *vlan_tci = 0; + *tag_type = IFH_TAG_TYPE_C; + return; + } + + hdr = (struct vlan_ethhdr *)skb_mac_header(skb); + br_vlan_get_proto(br, &proto); + + if (ntohs(hdr->h_vlan_proto) == proto) { + vlan_remove_tag(skb, &tci); + *vlan_tci = tci; + } else { + rcu_read_lock(); + br_vlan_get_pvid_rcu(br, &tci); + rcu_read_unlock(); + *vlan_tci = tci; + } + + *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C; +} + #endif diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 6a37b29f4b4c..ed18e6bafc8d 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -969,7 +969,8 @@ void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target, bool ocelot_can_inject(struct ocelot *ocelot, int grp); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb); -void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag); +void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port, + u32 rew_op, struct sk_buff *skb); int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp); void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb, -- cgit v1.2.3 From c5e12ac3beb0dd3a718296b2d8af5528e9ab728e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:04 +0300 Subject: net: mscc: ocelot: serialize access to the injection/extraction groups As explained by Horatiu Vultur in commit 603ead96582d ("net: sparx5: Add spinlock for frame transmission from CPU") which is for a similar hardware design, multiple CPUs can simultaneously perform injection or extraction. There are only 2 register groups for injection and 2 for extraction, and the driver only uses one of each. So we'd better serialize access using spin locks, otherwise frame corruption is possible. Note that unlike in sparx5, FDMA in ocelot does not have this issue because struct ocelot_fdma_tx_ring already contains an xmit_lock. I guess this is mostly a problem for NXP LS1028A, as that is dual core. I don't think VSC7514 is. So I'm blaming the commit where LS1028A (aka the felix DSA driver) started using register-based packet injection and extraction. Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index ed18e6bafc8d..462c653e1017 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -813,6 +813,9 @@ struct ocelot { const u32 *const *map; struct list_head stats_regions; + spinlock_t inj_lock; + spinlock_t xtr_lock; + u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM]; int packet_buffer_size; int num_frame_refs; @@ -966,6 +969,12 @@ void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target, u32 val, u32 reg, u32 offset); /* Packet I/O */ +void ocelot_lock_inj_grp(struct ocelot *ocelot, int grp); +void ocelot_unlock_inj_grp(struct ocelot *ocelot, int grp); +void ocelot_lock_xtr_grp(struct ocelot *ocelot, int grp); +void ocelot_unlock_xtr_grp(struct ocelot *ocelot, int grp); +void ocelot_lock_xtr_grp_bh(struct ocelot *ocelot, int grp); +void ocelot_unlock_xtr_grp_bh(struct ocelot *ocelot, int grp); bool ocelot_can_inject(struct ocelot *ocelot, int grp); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb); -- cgit v1.2.3 From 93e4649efa964201c73b0a03c35c04a0d6fc809f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:05 +0300 Subject: net: dsa: provide a software untagging function on RX for VLAN-aware bridges Through code analysis, I realized that the ds->untag_bridge_pvid logic is contradictory - see the newly added FIXME above the kernel-doc for dsa_software_untag_vlan_unaware_bridge(). Moreover, for the Felix driver, I need something very similar, but which is actually _not_ contradictory: untag the bridge PVID on RX, but for VLAN-aware bridges. The existing logic does it for VLAN-unaware bridges. Since I don't want to change the functionality of drivers which were supposedly properly tested with the ds->untag_bridge_pvid flag, I have introduced a new one: ds->untag_vlan_aware_bridge_pvid, and I have refactored the DSA reception code into a common path for both flags. TODO: both flags should be unified under a single ds->software_vlan_untag, which users of both current flags should set. This is not something that can be carried out right away. It needs very careful examination of all drivers which make use of this functionality, since some of them actually get this wrong in the first place. For example, commit 9130c2d30c17 ("net: dsa: microchip: ksz8795: Use software untagging on CPU port") uses this in a driver which has ds->configure_vlan_while_not_filtering = true. The latter mechanism has been known for many years to be broken by design: https://lore.kernel.org/netdev/CABumfLzJmXDN_W-8Z=p9KyKUVi_HhS7o_poBkeKHS2BkAiyYpw@mail.gmail.com/ and we have the situation of 2 bugs canceling each other. There is no private VLAN, and the port follows the PVID of the VLAN-unaware bridge. So, it's kinda ok for that driver to use the ds->untag_bridge_pvid mechanism, in a broken way. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index b06f97ae3da1..d7a6c2930277 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -403,14 +403,18 @@ struct dsa_switch { */ u32 configure_vlan_while_not_filtering:1; - /* If the switch driver always programs the CPU port as egress tagged - * despite the VLAN configuration indicating otherwise, then setting - * @untag_bridge_pvid will force the DSA receive path to pop the - * bridge's default_pvid VLAN tagged frames to offer a consistent - * behavior between a vlan_filtering=0 and vlan_filtering=1 bridge - * device. + /* Pop the default_pvid of VLAN-unaware bridge ports from tagged frames. + * DEPRECATED: Do NOT set this field in new drivers. Instead look at + * the dsa_software_vlan_untag() comments. */ u32 untag_bridge_pvid:1; + /* Pop the default_pvid of VLAN-aware bridge ports from tagged frames. + * Useful if the switch cannot preserve the VLAN tag as seen on the + * wire for user port ingress, and chooses to send all frames as + * VLAN-tagged to the CPU, including those which were originally + * untagged. + */ + u32 untag_vlan_aware_bridge_pvid:1; /* Let DSA manage the FDB entries towards the * CPU, based on the software bridge database. -- cgit v1.2.3 From 36dd1141be70b5966906919714dc504a24c65ddf Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:07 +0300 Subject: net: mscc: ocelot: treat 802.1ad tagged traffic as 802.1Q-untagged I was revisiting the topic of 802.1ad treatment in the Ocelot switch [0] and realized that not only is its basic VLAN classification pipeline improper for offloading vlan_protocol 802.1ad bridges, but also improper for offloading regular 802.1Q bridges already. Namely, 802.1ad-tagged traffic should be treated as VLAN-untagged by bridged ports, but this switch treats it as if it was 802.1Q-tagged with the same VID as in the 802.1ad header. This is markedly different to what the Linux bridge expects; see the "other_tpid()" function in tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh. An idea came to me that the VCAP IS1 TCAM is more powerful than I'm giving it credit for, and that it actually overwrites the classified VID before the VLAN Table lookup takes place. In other words, it can be used even to save a packet from being dropped on ingress due to VLAN membership. Add a sophisticated TCAM rule hardcoded into the driver to force the switch to behave like a Linux bridge with vlan_filtering 1 vlan_protocol 802.1Q. Regarding the lifetime of the filter: eventually the bridge will disappear, and vlan_filtering on the port will be restored to 0 for standalone mode. Then the filter will be deleted. [0]: https://lore.kernel.org/netdev/20201009122947.nvhye4hvcha3tljh@skbuf/ Fixes: 7142529f1688 ("net: mscc: ocelot: add VLAN filtering") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot_vcap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index c601a4598b0d..eb19668a06db 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -13,6 +13,7 @@ */ #define OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port, upstream) ((upstream) << 16 | (port)) #define OCELOT_VCAP_IS1_TAG_8021Q_TXVLAN(ocelot, port) (port) +#define OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port) ((ocelot)->num_phys_ports + (port)) #define OCELOT_VCAP_IS2_TAG_8021Q_TXVLAN(ocelot, port) (port) #define OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port) ((ocelot)->num_phys_ports + (port)) #define OCELOT_VCAP_IS2_MRP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2) @@ -499,6 +500,7 @@ struct ocelot_vcap_key_vlan { struct ocelot_vcap_u8 pcp; /* PCP (3 bit) */ enum ocelot_vcap_bit dei; /* DEI */ enum ocelot_vcap_bit tagged; /* Tagged/untagged frame */ + enum ocelot_vcap_bit tpid; }; struct ocelot_vcap_key_etype { -- cgit v1.2.3 From 3942bb49728ad9e1f94d953a88af169a8f5d8099 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 14 Aug 2024 13:00:34 +0300 Subject: string: add mem_is_zero() helper to check if memory area is all zeros Almost two thirds of the memchr_inv() usages check if the memory area is all zeros, with no interest in where in the buffer the first non-zero byte is located. Checking for !memchr_inv(s, 0, n) is also not very intuitive or discoverable. Add an explicit mem_is_zero() helper for this use case. Reviewed-by: Kees Cook Reviewed-by: Andy Shevchenko Link: https://patchwork.freedesktop.org/patch/msgid/20240814100035.3100852-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/linux/string.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index 9edace076ddb..5855c5626b4b 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -279,6 +279,18 @@ static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) void *memchr_inv(const void *s, int c, size_t n); char *strreplace(char *str, char old, char new); +/** + * mem_is_zero - Check if an area of memory is all 0's. + * @s: The memory area + * @n: The size of the area + * + * Return: True if the area of memory is all 0's. + */ +static inline bool mem_is_zero(const void *s, size_t n) +{ + return !memchr_inv(s, 0, n); +} + extern void kfree_const(const void *x); extern char *kstrdup(const char *s, gfp_t gfp) __malloc; -- cgit v1.2.3 From 6e6f58a170ea98e44075b761f2da42a5aec47dfb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Aug 2024 16:29:11 +0200 Subject: thermal: gov_bang_bang: Use governor_data to reduce overhead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After running once, the for_each_trip_desc() loop in bang_bang_manage() is pure needless overhead because it is not going to make any changes unless a new cooling device has been bound to one of the trips in the thermal zone or the system is resuming from sleep. For this reason, make bang_bang_manage() set governor_data for the thermal zone and check it upfront to decide whether or not it needs to do anything. However, governor_data needs to be reset in some cases to let bang_bang_manage() know that it should walk the trips again, so add an .update_tz() callback to the governor and make the core additionally invoke it during system resume. To avoid affecting the other users of that callback unnecessarily, add a special notification reason for system resume, THERMAL_TZ_RESUME, and also pass it to __thermal_zone_device_update() called during system resume for consistency. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Kästle Reviewed-by: Zhang Rui Cc: 6.10+ # 6.10+ Link: https://patch.msgid.link/2285575.iZASKD2KPV@rjwysocki.net --- include/linux/thermal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 25fbf960b474..b86ddca46b9e 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -55,6 +55,7 @@ enum thermal_notify_event { THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the thermal zone */ THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */ THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */ + THERMAL_TZ_RESUME, /* Thermal zone is resuming after system sleep */ }; /** -- cgit v1.2.3 From bf5ffc8c80e0cf5205849cd0c9c3cb261d2beee6 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 31 Jul 2024 10:51:18 -0600 Subject: perf: arm_pmu: Remove event index to counter remapping Xscale and Armv6 PMUs defined the cycle counter at 0 and event counters starting at 1 and had 1:1 event index to counter numbering. On Armv7 and later, this changed the cycle counter to 31 and event counters start at 0. The drivers for Armv7 and PMUv3 kept the old event index numbering and introduced an event index to counter conversion. The conversion uses masking to convert from event index to a counter number. This operation relies on having at most 32 counters so that the cycle counter index 0 can be transformed to counter number 31. Armv9.4 adds support for an additional fixed function counter (instructions) which increases possible counters to more than 32, and the conversion won't work anymore as a simple subtract and mask. The primary reason for the translation (other than history) seems to be to have a contiguous mask of counters 0-N. Keeping that would result in more complicated index to counter conversions. Instead, store a mask of available counters rather than just number of events. That provides more information in addition to the number of events. No (intended) functional changes. Acked-by: Mark Rutland Signed-off-by: Rob Herring (Arm) Tested-by: James Clark Link: https://lore.kernel.org/r/20240731-arm-pmu-3-9-icntr-v3-1-280a8d7ff465@kernel.org Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 2 +- include/linux/perf/arm_pmuv3.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index b3b34f6670cf..e5d6d204beab 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -96,7 +96,7 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); - int num_events; + DECLARE_BITMAP(cntr_mask, ARMPMU_MAX_HWEVENTS); bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 7867db04ec98..eccbdd8eb98f 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -6,6 +6,7 @@ #ifndef __PERF_ARM_PMUV3_H #define __PERF_ARM_PMUV3_H +#define ARMV8_PMU_MAX_GENERAL_COUNTERS 31 #define ARMV8_PMU_MAX_COUNTERS 32 #define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) -- cgit v1.2.3 From a4a6e2078d85a9d94bcc7eab77845cb8cd39f680 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 31 Jul 2024 10:51:19 -0600 Subject: perf: arm_pmuv3: Prepare for more than 32 counters Various PMUv3 registers which are a mask of counters are 64-bit registers, but the accessor functions take a u32. This has been fine as the upper 32-bits have been RES0 as there has been a maximum of 32 counters prior to Armv9.4/8.9. With Armv9.4/8.9, a 33rd counter is added. Update the accessor functions to use a u64 instead. Acked-by: Mark Rutland Signed-off-by: Rob Herring (Arm) Tested-by: James Clark Link: https://lore.kernel.org/r/20240731-arm-pmu-3-9-icntr-v3-2-280a8d7ff465@kernel.org Signed-off-by: Will Deacon --- include/kvm/arm_pmu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 35d4ca4f6122..334d7c5503cf 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -19,8 +19,8 @@ struct kvm_pmc { }; struct kvm_pmu_events { - u32 events_host; - u32 events_guest; + u64 events_host; + u64 events_guest; }; struct kvm_pmu { -- cgit v1.2.3 From f9b11aa00708d94a0cd78bfde34b68c0f95d8b50 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 31 Jul 2024 10:51:21 -0600 Subject: KVM: arm64: pmu: Use generated define for PMSELR_EL0.SEL access ARMV8_PMU_COUNTER_MASK is really a mask for the PMSELR_EL0.SEL register field. Make that clear by adding a standard sysreg definition for the register, and using it instead. Reviewed-by: Mark Rutland Acked-by: Mark Rutland Reviewed-by: Marc Zyngier Signed-off-by: Rob Herring (Arm) Tested-by: James Clark Link: https://lore.kernel.org/r/20240731-arm-pmu-3-9-icntr-v3-4-280a8d7ff465@kernel.org Signed-off-by: Will Deacon --- include/linux/perf/arm_pmuv3.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index eccbdd8eb98f..792b8e10b72a 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -8,7 +8,6 @@ #define ARMV8_PMU_MAX_GENERAL_COUNTERS 31 #define ARMV8_PMU_MAX_COUNTERS 32 -#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) /* * Common architectural and microarchitectural event numbers. -- cgit v1.2.3 From 126d7d7cce5e048fb82477a9842d088d10ff0df6 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 31 Jul 2024 10:51:22 -0600 Subject: arm64: perf/kvm: Use a common PMU cycle counter define The PMUv3 and KVM code each have a define for the PMU cycle counter index. Move KVM's define to a shared location and use it for PMUv3 driver. Reviewed-by: Marc Zyngier Acked-by: Mark Rutland Signed-off-by: Rob Herring (Arm) Tested-by: James Clark Link: https://lore.kernel.org/r/20240731-arm-pmu-3-9-icntr-v3-5-280a8d7ff465@kernel.org Signed-off-by: Will Deacon --- include/kvm/arm_pmu.h | 1 - include/linux/perf/arm_pmuv3.h | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 334d7c5503cf..871067fb2616 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -10,7 +10,6 @@ #include #include -#define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) #if IS_ENABLED(CONFIG_HW_PERF_EVENTS) && IS_ENABLED(CONFIG_KVM) struct kvm_pmc { diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 792b8e10b72a..f4ec76f725a3 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -9,6 +9,9 @@ #define ARMV8_PMU_MAX_GENERAL_COUNTERS 31 #define ARMV8_PMU_MAX_COUNTERS 32 +#define ARMV8_PMU_CYCLE_IDX 31 + + /* * Common architectural and microarchitectural event numbers. */ -- cgit v1.2.3 From 2f62701fa5b0ee94c68d2fcfc470d08aef195441 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 31 Jul 2024 10:51:23 -0600 Subject: KVM: arm64: Refine PMU defines for number of counters There are 2 defines for the number of PMU counters: ARMV8_PMU_MAX_COUNTERS and ARMPMU_MAX_HWEVENTS. Both are the same currently, but Armv9.4/8.9 increases the number of possible counters from 32 to 33. With this change, the maximum number of counters will differ for KVM's PMU emulation which is PMUv3.4. Give KVM PMU emulation its own define to decouple it from the rest of the kernel's number PMU counters. The VHE PMU code needs to match the PMU driver, so switch it to use ARMPMU_MAX_HWEVENTS instead. Acked-by: Mark Rutland Reviewed-by: Marc Zyngier Signed-off-by: Rob Herring (Arm) Tested-by: James Clark Link: https://lore.kernel.org/r/20240731-arm-pmu-3-9-icntr-v3-6-280a8d7ff465@kernel.org Signed-off-by: Will Deacon --- include/kvm/arm_pmu.h | 3 ++- include/linux/perf/arm_pmuv3.h | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 871067fb2616..e08aeec5d936 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -10,6 +10,7 @@ #include #include +#define KVM_ARMV8_PMU_MAX_COUNTERS 32 #if IS_ENABLED(CONFIG_HW_PERF_EVENTS) && IS_ENABLED(CONFIG_KVM) struct kvm_pmc { @@ -25,7 +26,7 @@ struct kvm_pmu_events { struct kvm_pmu { struct irq_work overflow_work; struct kvm_pmu_events events; - struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; + struct kvm_pmc pmc[KVM_ARMV8_PMU_MAX_COUNTERS]; int irq_num; bool created; bool irq_level; diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index f4ec76f725a3..4f7a7f2222e5 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -7,8 +7,6 @@ #define __PERF_ARM_PMUV3_H #define ARMV8_PMU_MAX_GENERAL_COUNTERS 31 -#define ARMV8_PMU_MAX_COUNTERS 32 - #define ARMV8_PMU_CYCLE_IDX 31 -- cgit v1.2.3 From d8226d8cfbaf5eb9771af8ad8b4e58697e2ffb74 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 31 Jul 2024 10:51:24 -0600 Subject: perf: arm_pmuv3: Add support for Armv9.4 PMU instruction counter Armv9.4/8.9 PMU adds optional support for a fixed instruction counter similar to the fixed cycle counter. Support for the feature is indicated in the ID_AA64DFR1_EL1 register PMICNTR field. The counter is not accessible in AArch32. Existing userspace using direct counter access won't know how to handle the fixed instruction counter, so we have to avoid using the counter when user access is requested. Acked-by: Mark Rutland Signed-off-by: Rob Herring (Arm) Tested-by: James Clark Link: https://lore.kernel.org/r/20240731-arm-pmu-3-9-icntr-v3-7-280a8d7ff465@kernel.org Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 8 ++++++-- include/linux/perf/arm_pmuv3.h | 6 ++++-- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index e5d6d204beab..4b5b83677e3f 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -17,10 +17,14 @@ #ifdef CONFIG_ARM_PMU /* - * The ARMv7 CPU PMU supports up to 32 event counters. + * The Armv7 and Armv8.8 or less CPU PMU supports up to 32 event counters. + * The Armv8.9/9.4 CPU PMU supports up to 33 event counters. */ +#ifdef CONFIG_ARM #define ARMPMU_MAX_HWEVENTS 32 - +#else +#define ARMPMU_MAX_HWEVENTS 33 +#endif /* * ARM PMU hw_event flags */ diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 4f7a7f2222e5..3372c1b56486 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -8,7 +8,7 @@ #define ARMV8_PMU_MAX_GENERAL_COUNTERS 31 #define ARMV8_PMU_CYCLE_IDX 31 - +#define ARMV8_PMU_INSTR_IDX 32 /* Not accessible from AArch32 */ /* * Common architectural and microarchitectural event numbers. @@ -228,8 +228,10 @@ */ #define ARMV8_PMU_OVSR_P GENMASK(30, 0) #define ARMV8_PMU_OVSR_C BIT(31) +#define ARMV8_PMU_OVSR_F BIT_ULL(32) /* arm64 only */ /* Mask for writable bits is both P and C fields */ -#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C) +#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C | \ + ARMV8_PMU_OVSR_F) /* * PMXEVTYPER: Event selection reg -- cgit v1.2.3 From 2140e63cd87fa707acf514d594725097bed018fd Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 12 Aug 2024 09:30:44 +0200 Subject: ethtool: Add new result codes for TDR diagnostics Add new result codes to support TDR diagnostics in preparation for Open Alliance 1000BaseT1 TDR support: - ETHTOOL_A_CABLE_RESULT_CODE_NOISE: TDR not possible due to high noise level. - ETHTOOL_A_CABLE_RESULT_CODE_RESOLUTION_NOT_POSSIBLE: TDR resolution not possible / out of distance. Reviewed-by: Andrew Lunn Signed-off-by: Oleksij Rempel Link: https://patch.msgid.link/20240812073046.1728288-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 93c57525a975..9074fa309bd6 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -556,6 +556,10 @@ enum { * a regular 100 Ohm cable and a part with the abnormal impedance value */ ETHTOOL_A_CABLE_RESULT_CODE_IMPEDANCE_MISMATCH, + /* TDR not possible due to high noise level */ + ETHTOOL_A_CABLE_RESULT_CODE_NOISE, + /* TDR resolution not possible / out of distance */ + ETHTOOL_A_CABLE_RESULT_CODE_RESOLUTION_NOT_POSSIBLE, }; enum { -- cgit v1.2.3 From f40a455d01f80c6638be382d75cb1c4e7748d8af Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 13 Aug 2024 14:33:47 +0100 Subject: ipv6: Add ipv6_addr_{cpu_to_be32,be32_to_cpu} helpers Add helpers to convert an ipv6 addr, expressed as an array of words, from CPU to big-endian byte order, and vice versa. No functional change intended. Compile tested only. Suggested-by: Andrew Lunn Link: https://lore.kernel.org/netdev/c7684349-535c-45a4-9a74-d47479a50020@lunn.ch/ Reviewed-by: Andrew Lunn Signed-off-by: Simon Horman Link: https://patch.msgid.link/20240813-ipv6_addr-helpers-v2-1-5c974f8cca3e@kernel.org Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 88a8e554f7a1..e7113855a10f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1365,4 +1365,16 @@ static inline void ip6_sock_set_recvpktinfo(struct sock *sk) release_sock(sk); } +#define IPV6_ADDR_WORDS 4 + +static inline void ipv6_addr_cpu_to_be32(__be32 *dst, const u32 *src) +{ + cpu_to_be32_array(dst, src, IPV6_ADDR_WORDS); +} + +static inline void ipv6_addr_be32_to_cpu(u32 *dst, const __be32 *src) +{ + be32_to_cpu_array(dst, src, IPV6_ADDR_WORDS); +} + #endif /* _NET_IPV6_H */ -- cgit v1.2.3 From 1fc2ac428ef7d2ab9e8e19efe7ec3e58aea51bf3 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 16 Aug 2024 12:15:23 -0600 Subject: io_uring: fix user_data field name in comment io_uring_cqe's user_data field refers to `sqe->data`, but io_uring_sqe does not have a data field. Fix the comment to say `sqe->user_data`. Signed-off-by: Caleb Sander Mateos Link: https://github.com/axboe/liburing/pull/1206 Link: https://lore.kernel.org/r/20240816181526.3642732-1-csander@purestorage.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 2aaf7ee256ac..adc2524fd8e3 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -421,7 +421,7 @@ enum io_uring_msg_ring_flags { * IO completion data structure (Completion Queue Entry) */ struct io_uring_cqe { - __u64 user_data; /* sqe->data submission passed back */ + __u64 user_data; /* sqe->user_data value passed back */ __s32 res; /* result code for this event */ __u32 flags; -- cgit v1.2.3 From f03e94f23b04c2b71c0044c1534921b3975ef10c Mon Sep 17 00:00:00 2001 From: Chaotian Jing Date: Tue, 13 Aug 2024 13:34:10 +0800 Subject: scsi: core: Fix the return value of scsi_logical_block_count() scsi_logical_block_count() should return the block count of a given SCSI command. The original implementation ended up shifting twice, leading to an incorrect count being returned. Fix the conversion between bytes and logical blocks. Cc: stable@vger.kernel.org Fixes: 6a20e21ae1e2 ("scsi: core: Add helper to return number of logical blocks in a request") Signed-off-by: Chaotian Jing Link: https://lore.kernel.org/r/20240813053534.7720-1-chaotian.jing@mediatek.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/scsi/scsi_cmnd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 45c40d200154..8ecfb94049db 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -234,7 +234,7 @@ static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd) static inline unsigned int scsi_logical_block_count(struct scsi_cmnd *scmd) { - unsigned int shift = ilog2(scmd->device->sector_size) - SECTOR_SHIFT; + unsigned int shift = ilog2(scmd->device->sector_size); return blk_rq_bytes(scsi_cmd_to_rq(scmd)) >> shift; } -- cgit v1.2.3 From cd06b713a6880997ca5aecac8e33d5f9c541749e Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 16 Aug 2024 11:55:10 +0530 Subject: scsi: ufs: core: Add a quirk for handling broken LSDBS field in controller capabilities register 'Legacy Queue & Single Doorbell Support (LSDBS)' field in the controller capabilities register is supposed to report whether the legacy single doorbell mode is supported in the controller or not. But some controllers report '1' in this field which corresponds to 'LSDB not supported', but they indeed support LSDB. So let's add a quirk to handle those controllers. If the quirk is enabled by the controller driver, then LSDBS register field will be ignored and legacy single doorbell mode is assumed to be enabled always. Tested-by: Amit Pundir Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240816-ufs-bug-fix-v3-1-e6fe0e18e2a3@linaro.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index cac0cdb9a916..0fd2aebac728 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -676,6 +676,14 @@ enum ufshcd_quirks { * the standard best practice for managing keys). */ UFSHCD_QUIRK_KEYS_IN_PRDT = 1 << 24, + + /* + * This quirk indicates that the controller reports the value 1 (not + * supported) in the Legacy Single DoorBell Support (LSDBS) bit of the + * Controller Capabilities register although it supports the legacy + * single doorbell mode. + */ + UFSHCD_QUIRK_BROKEN_LSDBS_CAP = 1 << 25, }; enum ufshcd_caps { -- cgit v1.2.3 From fca5cb4dd2b4a9423cb6d112cc71c33899955a1f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 10 Aug 2024 14:20:55 +0800 Subject: Revert "lib/mpi: Extend the MPI library" This partially reverts commit a8ea8bdd9df92a0e5db5b43900abb7a288b8a53e. Most of it is no longer needed since sm2 has been removed. However, the following functions have been kept as they have developed other uses: mpi_copy mpi_mod mpi_test_bit mpi_set_bit mpi_rshift mpi_add mpi_sub mpi_addm mpi_subm mpi_mul mpi_mulm mpi_tdiv_r mpi_fdiv_r Signed-off-by: Herbert Xu --- include/linux/mpi.h | 65 ----------------------------------------------------- 1 file changed, 65 deletions(-) (limited to 'include') diff --git a/include/linux/mpi.h b/include/linux/mpi.h index 89b720893e12..e081428b91ef 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -40,87 +40,33 @@ struct gcry_mpi { typedef struct gcry_mpi *MPI; #define mpi_get_nlimbs(a) ((a)->nlimbs) -#define mpi_has_sign(a) ((a)->sign) /*-- mpiutil.c --*/ MPI mpi_alloc(unsigned nlimbs); -void mpi_clear(MPI a); void mpi_free(MPI a); int mpi_resize(MPI a, unsigned nlimbs); -static inline MPI mpi_new(unsigned int nbits) -{ - return mpi_alloc((nbits + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB); -} - MPI mpi_copy(MPI a); -MPI mpi_alloc_like(MPI a); -void mpi_snatch(MPI w, MPI u); -MPI mpi_set(MPI w, MPI u); -MPI mpi_set_ui(MPI w, unsigned long u); -MPI mpi_alloc_set_ui(unsigned long u); -void mpi_swap_cond(MPI a, MPI b, unsigned long swap); - -/* Constants used to return constant MPIs. See mpi_init if you - * want to add more constants. - */ -#define MPI_NUMBER_OF_CONSTANTS 6 -enum gcry_mpi_constants { - MPI_C_ZERO, - MPI_C_ONE, - MPI_C_TWO, - MPI_C_THREE, - MPI_C_FOUR, - MPI_C_EIGHT -}; - -MPI mpi_const(enum gcry_mpi_constants no); /*-- mpicoder.c --*/ - -/* Different formats of external big integer representation. */ -enum gcry_mpi_format { - GCRYMPI_FMT_NONE = 0, - GCRYMPI_FMT_STD = 1, /* Twos complement stored without length. */ - GCRYMPI_FMT_PGP = 2, /* As used by OpenPGP (unsigned only). */ - GCRYMPI_FMT_SSH = 3, /* As used by SSH (like STD but with length). */ - GCRYMPI_FMT_HEX = 4, /* Hex format. */ - GCRYMPI_FMT_USG = 5, /* Like STD but unsigned. */ - GCRYMPI_FMT_OPAQUE = 8 /* Opaque format (some functions only). */ -}; - MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes); MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread); -int mpi_fromstr(MPI val, const char *str); -MPI mpi_scanval(const char *string); MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len); void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign); int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, int *sign); int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned nbytes, int *sign); -int mpi_print(enum gcry_mpi_format format, unsigned char *buffer, - size_t buflen, size_t *nwritten, MPI a); /*-- mpi-mod.c --*/ void mpi_mod(MPI rem, MPI dividend, MPI divisor); -/* Context used with Barrett reduction. */ -struct barrett_ctx_s; -typedef struct barrett_ctx_s *mpi_barrett_t; - -mpi_barrett_t mpi_barrett_init(MPI m, int copy); -void mpi_barrett_free(mpi_barrett_t ctx); -void mpi_mod_barrett(MPI r, MPI x, mpi_barrett_t ctx); -void mpi_mul_barrett(MPI w, MPI u, MPI v, mpi_barrett_t ctx); - /*-- mpi-pow.c --*/ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod); /*-- mpi-cmp.c --*/ int mpi_cmp_ui(MPI u, ulong v); int mpi_cmp(MPI u, MPI v); -int mpi_cmpabs(MPI u, MPI v); /*-- mpi-sub-ui.c --*/ int mpi_sub_ui(MPI w, MPI u, unsigned long vval); @@ -130,16 +76,9 @@ void mpi_normalize(MPI a); unsigned mpi_get_nbits(MPI a); int mpi_test_bit(MPI a, unsigned int n); void mpi_set_bit(MPI a, unsigned int n); -void mpi_set_highbit(MPI a, unsigned int n); -void mpi_clear_highbit(MPI a, unsigned int n); -void mpi_clear_bit(MPI a, unsigned int n); -void mpi_rshift_limbs(MPI a, unsigned int count); void mpi_rshift(MPI x, MPI a, unsigned int n); -void mpi_lshift_limbs(MPI a, unsigned int count); -void mpi_lshift(MPI x, MPI a, unsigned int n); /*-- mpi-add.c --*/ -void mpi_add_ui(MPI w, MPI u, unsigned long v); void mpi_add(MPI w, MPI u, MPI v); void mpi_sub(MPI w, MPI u, MPI v); void mpi_addm(MPI w, MPI u, MPI v, MPI m); @@ -152,10 +91,6 @@ void mpi_mulm(MPI w, MPI u, MPI v, MPI m); /*-- mpi-div.c --*/ void mpi_tdiv_r(MPI rem, MPI num, MPI den); void mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor); -void mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor); - -/*-- mpi-inv.c --*/ -int mpi_invm(MPI x, MPI a, MPI n); /* inline functions */ -- cgit v1.2.3 From 8e3a67f2de87ee94ac11ea69beb4edc2993b17a0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 10 Aug 2024 14:20:57 +0800 Subject: crypto: lib/mpi - Add error checks to extension The remaining functions added by commit a8ea8bdd9df92a0e5db5b43900abb7a288b8a53e did not check for memory allocation errors. Add the checks and change the API to allow errors to be returned. Fixes: a8ea8bdd9df9 ("lib/mpi: Extend the MPI library") Signed-off-by: Herbert Xu --- include/linux/mpi.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/mpi.h b/include/linux/mpi.h index e081428b91ef..47be46f36435 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -59,7 +59,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned nbytes, int *sign); /*-- mpi-mod.c --*/ -void mpi_mod(MPI rem, MPI dividend, MPI divisor); +int mpi_mod(MPI rem, MPI dividend, MPI divisor); /*-- mpi-pow.c --*/ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod); @@ -75,22 +75,22 @@ int mpi_sub_ui(MPI w, MPI u, unsigned long vval); void mpi_normalize(MPI a); unsigned mpi_get_nbits(MPI a); int mpi_test_bit(MPI a, unsigned int n); -void mpi_set_bit(MPI a, unsigned int n); -void mpi_rshift(MPI x, MPI a, unsigned int n); +int mpi_set_bit(MPI a, unsigned int n); +int mpi_rshift(MPI x, MPI a, unsigned int n); /*-- mpi-add.c --*/ -void mpi_add(MPI w, MPI u, MPI v); -void mpi_sub(MPI w, MPI u, MPI v); -void mpi_addm(MPI w, MPI u, MPI v, MPI m); -void mpi_subm(MPI w, MPI u, MPI v, MPI m); +int mpi_add(MPI w, MPI u, MPI v); +int mpi_sub(MPI w, MPI u, MPI v); +int mpi_addm(MPI w, MPI u, MPI v, MPI m); +int mpi_subm(MPI w, MPI u, MPI v, MPI m); /*-- mpi-mul.c --*/ -void mpi_mul(MPI w, MPI u, MPI v); -void mpi_mulm(MPI w, MPI u, MPI v, MPI m); +int mpi_mul(MPI w, MPI u, MPI v); +int mpi_mulm(MPI w, MPI u, MPI v, MPI m); /*-- mpi-div.c --*/ -void mpi_tdiv_r(MPI rem, MPI num, MPI den); -void mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor); +int mpi_tdiv_r(MPI rem, MPI num, MPI den); +int mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor); /* inline functions */ -- cgit v1.2.3 From abc158c82ae555078aa5dd2d8407c3df0f868904 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 May 2024 10:55:59 +0200 Subject: sched: Prepare generic code for delayed dequeue While most of the delayed dequeue code can be done inside the sched_class itself, there is one location where we do not have an appropriate hook, namely ttwu_runnable(). Add an ENQUEUE_DELAYED call to the on_rq path to deal with waking delayed dequeue tasks. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20240727105029.200000445@infradead.org --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c1b4ee3234f..f4a648e739e1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -544,6 +544,7 @@ struct sched_entity { struct list_head group_node; unsigned int on_rq; + unsigned int sched_delayed; u64 exec_start; u64 sum_exec_runtime; -- cgit v1.2.3 From a1c446611e31ca5363d4db51e398271da1dce0af Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 1 Jul 2024 21:30:09 +0200 Subject: sched,freezer: Mark TASK_FROZEN special The special task states are those that do not suffer spurious wakeups, TASK_FROZEN is very much one of those, mark it as such. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20240727105029.998329901@infradead.org --- include/linux/sched.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f4a648e739e1..8a3a389bd623 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -149,8 +149,9 @@ struct user_event_mm; * Special states are those that do not use the normal wait-loop pattern. See * the comment with set_special_state(). */ -#define is_special_task_state(state) \ - ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD)) +#define is_special_task_state(state) \ + ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | \ + TASK_DEAD | TASK_FROZEN)) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP # define debug_normal_state_change(state_value) \ -- cgit v1.2.3 From 82e9d0456e06cebe2c89f3c73cdbc9e3805e9437 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 31 May 2024 15:49:40 +0200 Subject: sched/fair: Avoid re-setting virtual deadline on 'migrations' During OSPM24 Youssef noted that migrations are re-setting the virtual deadline. Notably everything that does a dequeue-enqueue, like setting nice, changing preferred numa-node, and a myriad of other random crap, will cause this to happen. This shouldn't be. Preserve the relative virtual deadline across such dequeue/enqueue cycles. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20240727105030.625119246@infradead.org --- include/linux/sched.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8a3a389bd623..d25e1cfd5766 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -544,8 +544,10 @@ struct sched_entity { u64 min_vruntime; struct list_head group_node; - unsigned int on_rq; - unsigned int sched_delayed; + unsigned char on_rq; + unsigned char sched_delayed; + unsigned char rel_deadline; + /* hole */ u64 exec_start; u64 sum_exec_runtime; -- cgit v1.2.3 From 857b158dc5e81c6de795ef6be006eed146098fc6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 May 2023 13:46:30 +0200 Subject: sched/eevdf: Use sched_attr::sched_runtime to set request/slice suggestion Allow applications to directly set a suggested request/slice length using sched_attr::sched_runtime. The implementation clamps the value to: 0.1[ms] <= slice <= 100[ms] which is 1/10 the size of HZ=1000 and 10 times the size of HZ=100. Applications should strive to use their periodic runtime at a high confidence interval (95%+) as the target slice. Using a smaller slice will introduce undue preemptions, while using a larger value will increase latency. For all the following examples assume a scheduling quantum of 8, and for consistency all examples have W=4: {A,B,C,D}(w=1,r=8): ABCD... +---+---+---+--- t=0, V=1.5 t=1, V=3.5 A |------< A |------< B |------< B |------< C |------< C |------< D |------< D |------< ---+*------+-------+--- ---+--*----+-------+--- t=2, V=5.5 t=3, V=7.5 A |------< A |------< B |------< B |------< C |------< C |------< D |------< D |------< ---+----*--+-------+--- ---+------*+-------+--- Note: 4 identical tasks in FIFO order ~~~ {A,B}(w=1,r=16) C(w=2,r=16) AACCBBCC... +---+---+---+--- t=0, V=1.25 t=2, V=5.25 A |--------------< A |--------------< B |--------------< B |--------------< C |------< C |------< ---+*------+-------+--- ---+----*--+-------+--- t=4, V=8.25 t=6, V=12.25 A |--------------< A |--------------< B |--------------< B |--------------< C |------< C |------< ---+-------*-------+--- ---+-------+---*---+--- Note: 1 heavy task -- because q=8, double r such that the deadline of the w=2 task doesn't go below q. Note: observe the full schedule becomes: W*max(r_i/w_i) = 4*2q = 8q in length. Note: the period of the heavy task is half the full period at: W*(r_i/w_i) = 4*(2q/2) = 4q ~~~ {A,C,D}(w=1,r=16) B(w=1,r=8): BAACCBDD... +---+---+---+--- t=0, V=1.5 t=1, V=3.5 A |--------------< A |---------------< B |------< B |------< C |--------------< C |--------------< D |--------------< D |--------------< ---+*------+-------+--- ---+--*----+-------+--- t=3, V=7.5 t=5, V=11.5 A |---------------< A |---------------< B |------< B |------< C |--------------< C |--------------< D |--------------< D |--------------< ---+------*+-------+--- ---+-------+--*----+--- t=6, V=13.5 A |---------------< B |------< C |--------------< D |--------------< ---+-------+----*--+--- Note: 1 short task -- again double r so that the deadline of the short task won't be below q. Made B short because its not the leftmost task, but is eligible with the 0,1,2,3 spread. Note: like with the heavy task, the period of the short task observes: W*(r_i/w_i) = 4*(1q/1) = 4q ~~~ A(w=1,r=16) B(w=1,r=8) C(w=2,r=16) BCCAABCC... +---+---+---+--- t=0, V=1.25 t=1, V=3.25 A |--------------< A |--------------< B |------< B |------< C |------< C |------< ---+*------+-------+--- ---+--*----+-------+--- t=3, V=7.25 t=5, V=11.25 A |--------------< A |--------------< B |------< B |------< C |------< C |------< ---+------*+-------+--- ---+-------+--*----+--- t=6, V=13.25 A |--------------< B |------< C |------< ---+-------+----*--+--- Note: 1 heavy and 1 short task -- combine them all. Note: both the short and heavy task end up with a period of 4q ~~~ A(w=1,r=16) B(w=2,r=16) C(w=1,r=8) BBCAABBC... +---+---+---+--- t=0, V=1 t=2, V=5 A |--------------< A |--------------< B |------< B |------< C |------< C |------< ---+*------+-------+--- ---+----*--+-------+--- t=3, V=7 t=5, V=11 A |--------------< A |--------------< B |------< B |------< C |------< C |------< ---+------*+-------+--- ---+-------+--*----+--- t=7, V=15 A |--------------< B |------< C |------< ---+-------+------*+--- Note: as before but permuted ~~~ From all this it can be deduced that, for the steady state: - the total period (P) of a schedule is: W*max(r_i/w_i) - the average period of a task is: W*(r_i/w_i) - each task obtains the fair share: w_i/W of each full period P Signed-off-by: Peter Zijlstra (Intel) Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20240727105030.842834421@infradead.org --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index d25e1cfd5766..89a3d8d94e96 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -547,6 +547,7 @@ struct sched_entity { unsigned char on_rq; unsigned char sched_delayed; unsigned char rel_deadline; + unsigned char custom_slice; /* hole */ u64 exec_start; -- cgit v1.2.3 From aef6987d89544d63a47753cf3741cabff0b5574c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 20 Jun 2024 13:16:49 +0200 Subject: sched/eevdf: Propagate min_slice up the cgroup hierarchy In the absence of an explicit cgroup slice configureation, make mixed slice length work with cgroups by propagating the min_slice up the hierarchy. This ensures the cgroup entity gets timely service to service its entities that have this timing constraint set on them. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20240727105030.948188417@infradead.org --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 89a3d8d94e96..3709dedbab59 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -542,6 +542,7 @@ struct sched_entity { struct rb_node run_node; u64 deadline; u64 min_vruntime; + u64 min_slice; struct list_head group_node; unsigned char on_rq; -- cgit v1.2.3 From b0b228bb8d546edd23e2467ed7669d4ce62ae763 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Sat, 17 Aug 2024 17:33:34 +0800 Subject: ALSA: seq: Remove unused declarations These functions are never implemented and used. Signed-off-by: Yue Haibing Link: https://patch.msgid.link/20240817093334.1120002-1-yuehaibing@huawei.com Signed-off-by: Takashi Iwai --- include/sound/seq_kernel.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/sound/seq_kernel.h b/include/sound/seq_kernel.h index c8621671fa70..00c32eed2124 100644 --- a/include/sound/seq_kernel.h +++ b/include/sound/seq_kernel.h @@ -86,10 +86,6 @@ static inline size_t snd_seq_event_packet_size(struct snd_seq_event *ev) /* interface for OSS emulation */ int snd_seq_set_queue_tempo(int client, struct snd_seq_queue_tempo *tempo); -/* port callback routines */ -void snd_port_init_callback(struct snd_seq_port_callback *p); -struct snd_seq_port_callback *snd_port_alloc_callback(void); - /* port attach/detach */ int snd_seq_event_port_attach(int client, struct snd_seq_port_callback *pcbp, int cap, int type, int midi_channels, int midi_voices, char *portname); -- cgit v1.2.3 From 37745918e0e7575bc40f38da93a99b9fa6406224 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 13 Aug 2024 13:07:00 +0100 Subject: ALSA: timer: Introduce virtual userspace-driven timers Implement two ioctl calls in order to support virtual userspace-driven ALSA timers. The first ioctl is SNDRV_TIMER_IOCTL_CREATE, which gets the snd_timer_uinfo struct as a parameter and puts a file descriptor of a virtual timer into the `fd` field of the snd_timer_unfo structure. It also updates the `id` field of the snd_timer_uinfo struct, which provides a unique identifier for the timer (basically, the subdevice number which can be used when creating timer instances). This patch also introduces a tiny id allocator for the userspace-driven timers, which guarantees that we don't have more than 128 of them in the system. Another ioctl is SNDRV_TIMER_IOCTL_TRIGGER, which allows us to trigger the virtual timer (and calls snd_timer_interrupt for the timer under the hood), causing all of the timer instances binded to this timer to execute their callbacks. The maximum amount of ticks available for the timer is 1 for the sake of simplicity of the userspace API. 'start', 'stop', 'open' and 'close' callbacks for the userspace-driven timers are empty since we don't really do any hardware initialization here. Suggested-by: Axel Holzinger Signed-off-by: Ivan Orlov Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240813120701.171743-4-ivan.orlov0322@gmail.com --- include/uapi/sound/asound.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 8bf7e8a0eb6f..4cd513215bcd 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -869,7 +869,7 @@ struct snd_ump_block_info { * Timer section - /dev/snd/timer */ -#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 7) +#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 8) enum { SNDRV_TIMER_CLASS_NONE = -1, @@ -894,6 +894,7 @@ enum { #define SNDRV_TIMER_GLOBAL_RTC 1 /* unused */ #define SNDRV_TIMER_GLOBAL_HPET 2 #define SNDRV_TIMER_GLOBAL_HRTIMER 3 +#define SNDRV_TIMER_GLOBAL_UDRIVEN 4 /* info flags */ #define SNDRV_TIMER_FLG_SLAVE (1<<0) /* cannot be controlled */ @@ -974,6 +975,18 @@ struct snd_timer_status { }; #endif +/* + * This structure describes the userspace-driven timer. Such timers are purely virtual, + * and can only be triggered from software (for instance, by userspace application). + */ +struct snd_timer_uinfo { + /* To pretend being a normal timer, we need to know the resolution in ns. */ + __u64 resolution; + int fd; + unsigned int id; + unsigned char reserved[16]; +}; + #define SNDRV_TIMER_IOCTL_PVERSION _IOR('T', 0x00, int) #define SNDRV_TIMER_IOCTL_NEXT_DEVICE _IOWR('T', 0x01, struct snd_timer_id) #define SNDRV_TIMER_IOCTL_TREAD_OLD _IOW('T', 0x02, int) @@ -990,6 +1003,8 @@ struct snd_timer_status { #define SNDRV_TIMER_IOCTL_CONTINUE _IO('T', 0xa2) #define SNDRV_TIMER_IOCTL_PAUSE _IO('T', 0xa3) #define SNDRV_TIMER_IOCTL_TREAD64 _IOW('T', 0xa4, int) +#define SNDRV_TIMER_IOCTL_CREATE _IOWR('T', 0xa5, struct snd_timer_uinfo) +#define SNDRV_TIMER_IOCTL_TRIGGER _IO('T', 0xa6) #if __BITS_PER_LONG == 64 #define SNDRV_TIMER_IOCTL_TREAD SNDRV_TIMER_IOCTL_TREAD_OLD -- cgit v1.2.3 From fda1dd3c54ef3c4200c2e77634a91610da973950 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 18 Jul 2024 17:50:37 -0700 Subject: find: Switch from inline to __always_inline 'inline' keyword is only a recommendation for compiler. If it decides to not inline find_bit nodemask functions, the whole small_const_nbits() machinery doesn't work. This is how a standard GCC 11.3.0 does for my x86_64 build now. This patch replaces 'inline' directive with unconditional '__always_inline' to make sure that there's always a chance for compile-time optimization. It doesn't change size of kernel image, according to bloat-o-meter. [[ Brian: split out from: Subject: [PATCH 1/3] bitmap: switch from inline to __always_inline https://lore.kernel.org/all/20221027043810.350460-2-yury.norov@gmail.com/ But rewritten, as there were too many conflicts. ]] Co-developed-by: Brian Norris Signed-off-by: Brian Norris Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Yury Norov --- include/linux/find.h | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/find.h b/include/linux/find.h index 5dfca4225fef..68685714bc18 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -52,7 +52,7 @@ unsigned long _find_next_bit_le(const unsigned long *addr, unsigned * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { @@ -81,7 +81,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) @@ -112,7 +112,7 @@ unsigned long find_next_and_bit(const unsigned long *addr1, * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) @@ -142,7 +142,7 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1, * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) @@ -171,7 +171,7 @@ unsigned long find_next_or_bit(const unsigned long *addr1, * Returns the bit number of the next zero bit * If no bits are zero, returns @size. */ -static inline +static __always_inline unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { @@ -198,7 +198,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, * Returns the bit number of the first set bit. * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { if (small_const_nbits(size)) { @@ -224,7 +224,7 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) * Returns the bit number of the N'th set bit. * If no such, returns >= @size. */ -static inline +static __always_inline unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n) { if (n >= size) @@ -249,7 +249,7 @@ unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsign * Returns the bit number of the N'th set bit. * If no such, returns @size. */ -static inline +static __always_inline unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long n) { @@ -276,7 +276,7 @@ unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long * * Returns the bit number of the N'th set bit. * If no such, returns @size. */ -static inline +static __always_inline unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long n) { @@ -332,7 +332,7 @@ unsigned long find_nth_and_andnot_bit(const unsigned long *addr1, * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_first_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size) @@ -357,7 +357,7 @@ unsigned long find_first_and_bit(const unsigned long *addr1, * Returns the bit number for the first set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_first_and_and_bit(const unsigned long *addr1, const unsigned long *addr2, const unsigned long *addr3, @@ -381,7 +381,7 @@ unsigned long find_first_and_and_bit(const unsigned long *addr1, * Returns the bit number of the first cleared bit. * If no bits are zero, returns @size. */ -static inline +static __always_inline unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) { if (small_const_nbits(size)) { @@ -402,7 +402,7 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) * * Returns the bit number of the last set bit, or size. */ -static inline +static __always_inline unsigned long find_last_bit(const unsigned long *addr, unsigned long size) { if (small_const_nbits(size)) { @@ -425,7 +425,7 @@ unsigned long find_last_bit(const unsigned long *addr, unsigned long size) * Returns the bit number for the next set bit, or first set bit up to @offset * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_and_bit_wrap(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) @@ -448,7 +448,7 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1, * Returns the bit number for the next set bit, or first set bit up to @offset * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_bit_wrap(const unsigned long *addr, unsigned long size, unsigned long offset) { @@ -465,7 +465,7 @@ unsigned long find_next_bit_wrap(const unsigned long *addr, * Helper for for_each_set_bit_wrap(). Make sure you're doing right thing * before using it alone. */ -static inline +static __always_inline unsigned long __for_each_wrap(const unsigned long *bitmap, unsigned long size, unsigned long start, unsigned long n) { @@ -506,20 +506,20 @@ extern unsigned long find_next_clump8(unsigned long *clump, #if defined(__LITTLE_ENDIAN) -static inline unsigned long find_next_zero_bit_le(const void *addr, - unsigned long size, unsigned long offset) +static __always_inline +unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset) { return find_next_zero_bit(addr, size, offset); } -static inline unsigned long find_next_bit_le(const void *addr, - unsigned long size, unsigned long offset) +static __always_inline +unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset) { return find_next_bit(addr, size, offset); } -static inline unsigned long find_first_zero_bit_le(const void *addr, - unsigned long size) +static __always_inline +unsigned long find_first_zero_bit_le(const void *addr, unsigned long size) { return find_first_zero_bit(addr, size); } @@ -527,7 +527,7 @@ static inline unsigned long find_first_zero_bit_le(const void *addr, #elif defined(__BIG_ENDIAN) #ifndef find_next_zero_bit_le -static inline +static __always_inline unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset) { @@ -546,7 +546,7 @@ unsigned long find_next_zero_bit_le(const void *addr, unsigned #endif #ifndef find_first_zero_bit_le -static inline +static __always_inline unsigned long find_first_zero_bit_le(const void *addr, unsigned long size) { if (small_const_nbits(size)) { @@ -560,7 +560,7 @@ unsigned long find_first_zero_bit_le(const void *addr, unsigned long size) #endif #ifndef find_next_bit_le -static inline +static __always_inline unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset) { -- cgit v1.2.3 From ed8cd2b3bd9f070120528fc5403a2d0b5afe07f8 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 18 Jul 2024 17:50:38 -0700 Subject: bitmap: Switch from inline to __always_inline 'inline' keyword is only a recommendation for compiler. If it decides to not inline bitmap functions, the whole small_const_nbits() machinery doesn't work. This is how a standard GCC 11.3.0 does for my x86_64 build now. This patch replaces 'inline' directive with unconditional '__always_inline' to make sure that there's always a chance for compile-time optimization. It doesn't change size of kernel image, according to bloat-o-meter. [[ Brian: split out from: Subject: [PATCH 1/3] bitmap: switch from inline to __always_inline https://lore.kernel.org/all/20221027043810.350460-2-yury.norov@gmail.com/ But rewritten, as there were too many conflicts. ]] Co-developed-by: Brian Norris Signed-off-by: Brian Norris Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Yury Norov --- include/linux/bitmap.h | 140 +++++++++++++++++++++++++++---------------------- 1 file changed, 76 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 8c4768c44a01..0406f48f0a6a 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -203,12 +203,12 @@ unsigned long bitmap_find_next_zero_area_off(unsigned long *map, * the bit offset of all zero areas this function finds is multiples of that * power of 2. A @align_mask of 0 means no alignment is required. */ -static inline unsigned long -bitmap_find_next_zero_area(unsigned long *map, - unsigned long size, - unsigned long start, - unsigned int nr, - unsigned long align_mask) +static __always_inline +unsigned long bitmap_find_next_zero_area(unsigned long *map, + unsigned long size, + unsigned long start, + unsigned int nr, + unsigned long align_mask) { return bitmap_find_next_zero_area_off(map, size, start, nr, align_mask, 0); @@ -228,7 +228,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig, #define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE) -static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) +static __always_inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { unsigned int len = bitmap_size(nbits); @@ -238,7 +238,7 @@ static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) memset(dst, 0, len); } -static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) +static __always_inline void bitmap_fill(unsigned long *dst, unsigned int nbits) { unsigned int len = bitmap_size(nbits); @@ -248,8 +248,8 @@ static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) memset(dst, 0xff, len); } -static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, - unsigned int nbits) +static __always_inline +void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits) { unsigned int len = bitmap_size(nbits); @@ -262,8 +262,8 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, /* * Copy bitmap and clear tail bits in last word. */ -static inline void bitmap_copy_clear_tail(unsigned long *dst, - const unsigned long *src, unsigned int nbits) +static __always_inline +void bitmap_copy_clear_tail(unsigned long *dst, const unsigned long *src, unsigned int nbits) { bitmap_copy(dst, src, nbits); if (nbits % BITS_PER_LONG) @@ -306,16 +306,18 @@ void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits); bitmap_copy_clear_tail((unsigned long *)(buf), (const unsigned long *)(bitmap), (nbits)) #endif -static inline bool bitmap_and(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +bool bitmap_and(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0; return __bitmap_and(dst, src1, src2, nbits); } -static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +void bitmap_or(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 | *src2; @@ -323,8 +325,9 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, __bitmap_or(dst, src1, src2, nbits); } -static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +void bitmap_xor(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 ^ *src2; @@ -332,16 +335,17 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, __bitmap_xor(dst, src1, src2, nbits); } -static inline bool bitmap_andnot(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +bool bitmap_andnot(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; return __bitmap_andnot(dst, src1, src2, nbits); } -static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, - unsigned int nbits) +static __always_inline +void bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = ~(*src); @@ -356,8 +360,8 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr #endif #define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1) -static inline bool bitmap_equal(const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +bool bitmap_equal(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); @@ -376,10 +380,9 @@ static inline bool bitmap_equal(const unsigned long *src1, * * Returns: True if (*@src1 | *@src2) == *@src3, false otherwise */ -static inline bool bitmap_or_equal(const unsigned long *src1, - const unsigned long *src2, - const unsigned long *src3, - unsigned int nbits) +static __always_inline +bool bitmap_or_equal(const unsigned long *src1, const unsigned long *src2, + const unsigned long *src3, unsigned int nbits) { if (!small_const_nbits(nbits)) return __bitmap_or_equal(src1, src2, src3, nbits); @@ -387,9 +390,8 @@ static inline bool bitmap_or_equal(const unsigned long *src1, return !(((*src1 | *src2) ^ *src3) & BITMAP_LAST_WORD_MASK(nbits)); } -static inline bool bitmap_intersects(const unsigned long *src1, - const unsigned long *src2, - unsigned int nbits) +static __always_inline +bool bitmap_intersects(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; @@ -397,8 +399,8 @@ static inline bool bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } -static inline bool bitmap_subset(const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +bool bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); @@ -406,7 +408,8 @@ static inline bool bitmap_subset(const unsigned long *src1, return __bitmap_subset(src1, src2, nbits); } -static inline bool bitmap_empty(const unsigned long *src, unsigned nbits) +static __always_inline +bool bitmap_empty(const unsigned long *src, unsigned nbits) { if (small_const_nbits(nbits)) return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); @@ -414,7 +417,8 @@ static inline bool bitmap_empty(const unsigned long *src, unsigned nbits) return find_first_bit(src, nbits) == nbits; } -static inline bool bitmap_full(const unsigned long *src, unsigned int nbits) +static __always_inline +bool bitmap_full(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); @@ -448,8 +452,8 @@ unsigned long bitmap_weight_andnot(const unsigned long *src1, return __bitmap_weight_andnot(src1, src2, nbits); } -static __always_inline void bitmap_set(unsigned long *map, unsigned int start, - unsigned int nbits) +static __always_inline +void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { if (__builtin_constant_p(nbits) && nbits == 1) __set_bit(start, map); @@ -464,8 +468,8 @@ static __always_inline void bitmap_set(unsigned long *map, unsigned int start, __bitmap_set(map, start, nbits); } -static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, - unsigned int nbits) +static __always_inline +void bitmap_clear(unsigned long *map, unsigned int start, unsigned int nbits) { if (__builtin_constant_p(nbits) && nbits == 1) __clear_bit(start, map); @@ -480,8 +484,9 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, __bitmap_clear(map, start, nbits); } -static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, - unsigned int shift, unsigned int nbits) +static __always_inline +void bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift; @@ -489,8 +494,9 @@ static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *s __bitmap_shift_right(dst, src, shift, nbits); } -static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src, - unsigned int shift, unsigned int nbits) +static __always_inline +void bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits); @@ -498,11 +504,12 @@ static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *sr __bitmap_shift_left(dst, src, shift, nbits); } -static inline void bitmap_replace(unsigned long *dst, - const unsigned long *old, - const unsigned long *new, - const unsigned long *mask, - unsigned int nbits) +static __always_inline +void bitmap_replace(unsigned long *dst, + const unsigned long *old, + const unsigned long *new, + const unsigned long *mask, + unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*old & ~(*mask)) | (*new & *mask); @@ -545,8 +552,9 @@ static inline void bitmap_replace(unsigned long *dst, * bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation. * See bitmap_scatter() for details related to this relationship. */ -static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src, - const unsigned long *mask, unsigned int nbits) +static __always_inline +void bitmap_scatter(unsigned long *dst, const unsigned long *src, + const unsigned long *mask, unsigned int nbits) { unsigned int n = 0; unsigned int bit; @@ -599,8 +607,9 @@ static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src, * bitmap_scatter(res, src, mask, n) and a call to * bitmap_scatter(res, result, mask, n) will lead to the same res value. */ -static inline void bitmap_gather(unsigned long *dst, const unsigned long *src, - const unsigned long *mask, unsigned int nbits) +static __always_inline +void bitmap_gather(unsigned long *dst, const unsigned long *src, + const unsigned long *mask, unsigned int nbits) { unsigned int n = 0; unsigned int bit; @@ -611,9 +620,9 @@ static inline void bitmap_gather(unsigned long *dst, const unsigned long *src, __assign_bit(n++, dst, test_bit(bit, src)); } -static inline void bitmap_next_set_region(unsigned long *bitmap, - unsigned int *rs, unsigned int *re, - unsigned int end) +static __always_inline +void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs, + unsigned int *re, unsigned int end) { *rs = find_next_bit(bitmap, end, *rs); *re = find_next_zero_bit(bitmap, end, *rs + 1); @@ -628,7 +637,8 @@ static inline void bitmap_next_set_region(unsigned long *bitmap, * This is the complement to __bitmap_find_free_region() and releases * the found region (by clearing it in the bitmap). */ -static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order) +static __always_inline +void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order) { bitmap_clear(bitmap, pos, BIT(order)); } @@ -644,7 +654,8 @@ static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos * Returns: 0 on success, or %-EBUSY if specified region wasn't * free (not all bits were zero). */ -static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) +static __always_inline +int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) { unsigned int len = BIT(order); @@ -668,7 +679,8 @@ static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos * Returns: the bit offset in bitmap of the allocated region, * or -errno on failure. */ -static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) +static __always_inline +int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) { unsigned int pos, end; /* scans bitmap by regions of size order */ @@ -722,7 +734,7 @@ static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bi * That is ``(u32 *)(&val)[0]`` gets the upper 32 bits, * but we expect the lower 32-bits of u64. */ -static inline void bitmap_from_u64(unsigned long *dst, u64 mask) +static __always_inline void bitmap_from_u64(unsigned long *dst, u64 mask) { bitmap_from_arr64(dst, &mask, 64); } @@ -737,9 +749,8 @@ static inline void bitmap_from_u64(unsigned long *dst, u64 mask) * @map memory region. For @nbits = 0 and @nbits > BITS_PER_LONG the return * value is undefined. */ -static inline unsigned long bitmap_read(const unsigned long *map, - unsigned long start, - unsigned long nbits) +static __always_inline +unsigned long bitmap_read(const unsigned long *map, unsigned long start, unsigned long nbits) { size_t index = BIT_WORD(start); unsigned long offset = start % BITS_PER_LONG; @@ -772,8 +783,9 @@ static inline unsigned long bitmap_read(const unsigned long *map, * * For @nbits == 0 and @nbits > BITS_PER_LONG no writes are performed. */ -static inline void bitmap_write(unsigned long *map, unsigned long value, - unsigned long start, unsigned long nbits) +static __always_inline +void bitmap_write(unsigned long *map, unsigned long value, + unsigned long start, unsigned long nbits) { size_t index; unsigned long offset; -- cgit v1.2.3 From ab6b1010dab68f6d4bf063517db4ce2d63554bc6 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 18 Jul 2024 17:50:39 -0700 Subject: cpumask: Switch from inline to __always_inline On recent (v6.6+) builds with Clang (based on Clang 18.0.0) and certain configurations [0], I'm finding that (lack of) inlining decisions may lead to section mismatch warnings like the following: WARNING: modpost: vmlinux.o: section mismatch in reference: cpumask_andnot (section: .text) -> cpuhp_bringup_cpus_parallel.tmp_mask (section: .init.data) ERROR: modpost: Section mismatches detected. or more confusingly: WARNING: modpost: vmlinux: section mismatch in reference: cpumask_andnot+0x5f (section: .text) -> efi_systab_phys (section: .init.data) The first warning makes a little sense, because cpuhp_bringup_cpus_parallel() (an __init function) calls cpumask_andnot() on tmp_mask (an __initdata symbol). If the compiler doesn't inline cpumask_andnot(), this may appear like a mismatch. The second warning makes less sense, but might be because efi_systab_phys and cpuhp_bringup_cpus_parallel.tmp_mask are laid out near each other, and the latter isn't a proper C symbol definition. In any case, it seems a reasonable solution to suggest more strongly to the compiler that these cpumask macros *must* be inlined, as 'inline' is just a recommendation. This change has been previously proposed in the past as: Subject: [PATCH 1/3] bitmap: switch from inline to __always_inline https://lore.kernel.org/all/20221027043810.350460-2-yury.norov@gmail.com/ But the change has been split up, to separately justify the cpumask changes (which drive my work) and the bitmap/const optimizations (that Yury separately proposed for other reasons). This ends up as somewhere between a "rebase" and "rewrite" -- I had to rewrite most of the patch. According to bloat-o-meter, vmlinux decreases minimally in size (-0.00% to -0.01%, depending on the version of GCC or Clang and .config in question) with this series of changes: gcc 13.2.0, x86_64_defconfig -3005 bytes, Before=21944501, After=21941496, chg -0.01% clang 16.0.6, x86_64_defconfig -105 bytes, Before=22571692, After=22571587, chg -0.00% gcc 9.5.0, x86_64_defconfig -1771 bytes, Before=21557598, After=21555827, chg -0.01% clang 18.0_pre516547 (ChromiumOS toolchain), x86_64_defconfig -191 bytes, Before=22615339, After=22615148, chg -0.00% clang 18.0_pre516547 (ChromiumOS toolchain), based on ChromiumOS config + gcov -979 bytes, Before=76294783, After=76293804, chg -0.00% [0] CONFIG_HOTPLUG_PARALLEL=y ('select'ed for x86 as of [1]) and CONFIG_GCOV_PROFILE_ALL. [1] commit 0c7ffa32dbd6 ("x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it") Co-developed-by: Brian Norris Signed-off-by: Brian Norris Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Yury Norov --- include/linux/cpumask.h | 212 +++++++++++++++++++++++++----------------------- 1 file changed, 112 insertions(+), 100 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 801a7e524113..9e3f3c96607d 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -30,7 +30,7 @@ extern unsigned int nr_cpu_ids; #endif -static inline void set_nr_cpu_ids(unsigned int nr) +static __always_inline void set_nr_cpu_ids(unsigned int nr) { #if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS) WARN_ON(nr != nr_cpu_ids); @@ -149,7 +149,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu) * * Return: >= nr_cpu_ids if no cpus set. */ -static inline unsigned int cpumask_first(const struct cpumask *srcp) +static __always_inline unsigned int cpumask_first(const struct cpumask *srcp) { return find_first_bit(cpumask_bits(srcp), small_cpumask_bits); } @@ -160,7 +160,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) * * Return: >= nr_cpu_ids if all cpus are set. */ -static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) +static __always_inline unsigned int cpumask_first_zero(const struct cpumask *srcp) { return find_first_zero_bit(cpumask_bits(srcp), small_cpumask_bits); } @@ -172,7 +172,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) * * Return: >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). */ -static inline +static __always_inline unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) { return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); @@ -186,7 +186,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask * * Return: >= nr_cpu_ids if no cpus set in all. */ -static inline +static __always_inline unsigned int cpumask_first_and_and(const struct cpumask *srcp1, const struct cpumask *srcp2, const struct cpumask *srcp3) @@ -201,7 +201,7 @@ unsigned int cpumask_first_and_and(const struct cpumask *srcp1, * * Return: >= nr_cpumask_bits if no CPUs set. */ -static inline unsigned int cpumask_last(const struct cpumask *srcp) +static __always_inline unsigned int cpumask_last(const struct cpumask *srcp) { return find_last_bit(cpumask_bits(srcp), small_cpumask_bits); } @@ -213,7 +213,7 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp) * * Return: >= nr_cpu_ids if no further cpus set. */ -static inline +static __always_inline unsigned int cpumask_next(int n, const struct cpumask *srcp) { /* -1 is a legal arg here. */ @@ -229,7 +229,8 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp) * * Return: >= nr_cpu_ids if no further cpus unset. */ -static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) +static __always_inline +unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) { /* -1 is a legal arg here. */ if (n != -1) @@ -239,18 +240,21 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) #if NR_CPUS == 1 /* Uniprocessor: there is only one valid CPU */ -static inline unsigned int cpumask_local_spread(unsigned int i, int node) +static __always_inline +unsigned int cpumask_local_spread(unsigned int i, int node) { return 0; } -static inline unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, + const struct cpumask *src2p) { return cpumask_first_and(src1p, src2p); } -static inline unsigned int cpumask_any_distribute(const struct cpumask *srcp) +static __always_inline +unsigned int cpumask_any_distribute(const struct cpumask *srcp) { return cpumask_first(srcp); } @@ -269,9 +273,9 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp); * * Return: >= nr_cpu_ids if no further cpus set in both. */ -static inline +static __always_inline unsigned int cpumask_next_and(int n, const struct cpumask *src1p, - const struct cpumask *src2p) + const struct cpumask *src2p) { /* -1 is a legal arg here. */ if (n != -1) @@ -291,7 +295,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits) #if NR_CPUS == 1 -static inline +static __always_inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) { cpumask_check(start); @@ -394,7 +398,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * Often used to find any cpu but smp_processor_id() in a mask. * Return: >= nr_cpu_ids if no cpus set. */ -static inline +static __always_inline unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) { unsigned int i; @@ -414,7 +418,7 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) * * Returns >= nr_cpu_ids if no cpus set. */ -static inline +static __always_inline unsigned int cpumask_any_and_but(const struct cpumask *mask1, const struct cpumask *mask2, unsigned int cpu) @@ -436,7 +440,8 @@ unsigned int cpumask_any_and_but(const struct cpumask *mask1, * * Return: >= nr_cpu_ids if such cpu doesn't exist. */ -static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) +static __always_inline +unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) { return find_nth_bit(cpumask_bits(srcp), small_cpumask_bits, cpumask_check(cpu)); } @@ -449,7 +454,7 @@ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *s * * Return: >= nr_cpu_ids if such cpu doesn't exist. */ -static inline +static __always_inline unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, const struct cpumask *srcp2) { @@ -465,7 +470,7 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, * * Return: >= nr_cpu_ids if such cpu doesn't exist. */ -static inline +static __always_inline unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, const struct cpumask *srcp2) { @@ -508,12 +513,14 @@ unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp * @cpu: cpu number (< nr_cpu_ids) * @dstp: the cpumask pointer */ -static __always_inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +static __always_inline +void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } -static __always_inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +static __always_inline +void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { __set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } @@ -557,7 +564,8 @@ static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp, * * Return: true if @cpu is set in @cpumask, else returns false */ -static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask) +static __always_inline +bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask) { return test_bit(cpumask_check(cpu), cpumask_bits((cpumask))); } @@ -571,7 +579,8 @@ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpum * * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ -static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) +static __always_inline +bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } @@ -585,7 +594,8 @@ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cp * * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ -static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) +static __always_inline +bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) { return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } @@ -594,7 +604,7 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask * * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer */ -static inline void cpumask_setall(struct cpumask *dstp) +static __always_inline void cpumask_setall(struct cpumask *dstp) { if (small_const_nbits(small_cpumask_bits)) { cpumask_bits(dstp)[0] = BITMAP_LAST_WORD_MASK(nr_cpumask_bits); @@ -607,7 +617,7 @@ static inline void cpumask_setall(struct cpumask *dstp) * cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer */ -static inline void cpumask_clear(struct cpumask *dstp) +static __always_inline void cpumask_clear(struct cpumask *dstp) { bitmap_zero(cpumask_bits(dstp), large_cpumask_bits); } @@ -620,9 +630,9 @@ static inline void cpumask_clear(struct cpumask *dstp) * * Return: false if *@dstp is empty, else returns true */ -static inline bool cpumask_and(struct cpumask *dstp, - const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_and(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) { return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -634,8 +644,9 @@ static inline bool cpumask_and(struct cpumask *dstp, * @src1p: the first input * @src2p: the second input */ -static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) { bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -647,9 +658,9 @@ static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, * @src1p: the first input * @src2p: the second input */ -static inline void cpumask_xor(struct cpumask *dstp, - const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +void cpumask_xor(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) { bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -663,9 +674,9 @@ static inline void cpumask_xor(struct cpumask *dstp, * * Return: false if *@dstp is empty, else returns true */ -static inline bool cpumask_andnot(struct cpumask *dstp, - const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) { return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -678,8 +689,8 @@ static inline bool cpumask_andnot(struct cpumask *dstp, * * Return: true if the cpumasks are equal, false if not */ -static inline bool cpumask_equal(const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -694,9 +705,9 @@ static inline bool cpumask_equal(const struct cpumask *src1p, * Return: true if first cpumask ORed with second cpumask == third cpumask, * otherwise false */ -static inline bool cpumask_or_equal(const struct cpumask *src1p, - const struct cpumask *src2p, - const struct cpumask *src3p) +static __always_inline +bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src2p, + const struct cpumask *src3p) { return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p), cpumask_bits(src3p), small_cpumask_bits); @@ -710,8 +721,8 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p, * Return: true if first cpumask ANDed with second cpumask is non-empty, * otherwise false */ -static inline bool cpumask_intersects(const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -724,8 +735,8 @@ static inline bool cpumask_intersects(const struct cpumask *src1p, * * Return: true if *@src1p is a subset of *@src2p, else returns false */ -static inline bool cpumask_subset(const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -737,7 +748,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p, * * Return: true if srcp is empty (has no bits set), else false */ -static inline bool cpumask_empty(const struct cpumask *srcp) +static __always_inline bool cpumask_empty(const struct cpumask *srcp) { return bitmap_empty(cpumask_bits(srcp), small_cpumask_bits); } @@ -748,7 +759,7 @@ static inline bool cpumask_empty(const struct cpumask *srcp) * * Return: true if srcp is full (has all bits set), else false */ -static inline bool cpumask_full(const struct cpumask *srcp) +static __always_inline bool cpumask_full(const struct cpumask *srcp) { return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits); } @@ -759,7 +770,7 @@ static inline bool cpumask_full(const struct cpumask *srcp) * * Return: count of bits set in *srcp */ -static inline unsigned int cpumask_weight(const struct cpumask *srcp) +static __always_inline unsigned int cpumask_weight(const struct cpumask *srcp) { return bitmap_weight(cpumask_bits(srcp), small_cpumask_bits); } @@ -771,8 +782,8 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp) * * Return: count of bits set in both *srcp1 and *srcp2 */ -static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, - const struct cpumask *srcp2) +static __always_inline +unsigned int cpumask_weight_and(const struct cpumask *srcp1, const struct cpumask *srcp2) { return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); } @@ -784,8 +795,9 @@ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, * * Return: count of bits set in both *srcp1 and *srcp2 */ -static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1, - const struct cpumask *srcp2) +static __always_inline +unsigned int cpumask_weight_andnot(const struct cpumask *srcp1, + const struct cpumask *srcp2) { return bitmap_weight_andnot(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); } @@ -796,8 +808,8 @@ static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1, * @srcp: the input to shift * @n: the number of bits to shift by */ -static inline void cpumask_shift_right(struct cpumask *dstp, - const struct cpumask *srcp, int n) +static __always_inline +void cpumask_shift_right(struct cpumask *dstp, const struct cpumask *srcp, int n) { bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n, small_cpumask_bits); @@ -809,8 +821,8 @@ static inline void cpumask_shift_right(struct cpumask *dstp, * @srcp: the input to shift * @n: the number of bits to shift by */ -static inline void cpumask_shift_left(struct cpumask *dstp, - const struct cpumask *srcp, int n) +static __always_inline +void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n) { bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n, nr_cpumask_bits); @@ -821,8 +833,8 @@ static inline void cpumask_shift_left(struct cpumask *dstp, * @dstp: the result * @srcp: the input cpumask */ -static inline void cpumask_copy(struct cpumask *dstp, - const struct cpumask *srcp) +static __always_inline +void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp) { bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits); } @@ -858,8 +870,8 @@ static inline void cpumask_copy(struct cpumask *dstp, * * Return: -errno, or 0 for success. */ -static inline int cpumask_parse_user(const char __user *buf, int len, - struct cpumask *dstp) +static __always_inline +int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); } @@ -872,8 +884,8 @@ static inline int cpumask_parse_user(const char __user *buf, int len, * * Return: -errno, or 0 for success. */ -static inline int cpumask_parselist_user(const char __user *buf, int len, - struct cpumask *dstp) +static __always_inline +int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parselist_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); @@ -886,7 +898,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, * * Return: -errno, or 0 for success. */ -static inline int cpumask_parse(const char *buf, struct cpumask *dstp) +static __always_inline int cpumask_parse(const char *buf, struct cpumask *dstp) { return bitmap_parse(buf, UINT_MAX, cpumask_bits(dstp), nr_cpumask_bits); } @@ -898,7 +910,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) * * Return: -errno, or 0 for success. */ -static inline int cpulist_parse(const char *buf, struct cpumask *dstp) +static __always_inline int cpulist_parse(const char *buf, struct cpumask *dstp) { return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits); } @@ -908,7 +920,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) * * Return: size to allocate for a &struct cpumask in bytes */ -static inline unsigned int cpumask_size(void) +static __always_inline unsigned int cpumask_size(void) { return bitmap_size(large_cpumask_bits); } @@ -920,7 +932,7 @@ static inline unsigned int cpumask_size(void) bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); -static inline +static __always_inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node); @@ -938,13 +950,13 @@ bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) * * Return: %true if allocation succeeded, %false if not */ -static inline +static __always_inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return alloc_cpumask_var_node(mask, flags, NUMA_NO_NODE); } -static inline +static __always_inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return alloc_cpumask_var(mask, flags | __GFP_ZERO); @@ -954,7 +966,7 @@ void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); void free_bootmem_cpumask_var(cpumask_var_t mask); -static inline bool cpumask_available(cpumask_var_t mask) +static __always_inline bool cpumask_available(cpumask_var_t mask) { return mask != NULL; } @@ -964,43 +976,43 @@ static inline bool cpumask_available(cpumask_var_t mask) #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) #define __cpumask_var_read_mostly -static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +static __always_inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return true; } -static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, +static __always_inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { return true; } -static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +static __always_inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { cpumask_clear(*mask); return true; } -static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, +static __always_inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { cpumask_clear(*mask); return true; } -static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) +static __always_inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) { } -static inline void free_cpumask_var(cpumask_var_t mask) +static __always_inline void free_cpumask_var(cpumask_var_t mask) { } -static inline void free_bootmem_cpumask_var(cpumask_var_t mask) +static __always_inline void free_bootmem_cpumask_var(cpumask_var_t mask) { } -static inline bool cpumask_available(cpumask_var_t mask) +static __always_inline bool cpumask_available(cpumask_var_t mask) { return true; } @@ -1058,7 +1070,7 @@ void set_cpu_online(unsigned int cpu, bool online); ((struct cpumask *)(1 ? (bitmap) \ : (void *)sizeof(__check_is_bitmap(bitmap)))) -static inline int __check_is_bitmap(const unsigned long *bitmap) +static __always_inline int __check_is_bitmap(const unsigned long *bitmap) { return 1; } @@ -1073,7 +1085,7 @@ static inline int __check_is_bitmap(const unsigned long *bitmap) extern const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; -static inline const struct cpumask *get_cpu_mask(unsigned int cpu) +static __always_inline const struct cpumask *get_cpu_mask(unsigned int cpu) { const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; p -= cpu / BITS_PER_LONG; @@ -1100,32 +1112,32 @@ static __always_inline unsigned int num_online_cpus(void) #define num_present_cpus() cpumask_weight(cpu_present_mask) #define num_active_cpus() cpumask_weight(cpu_active_mask) -static inline bool cpu_online(unsigned int cpu) +static __always_inline bool cpu_online(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_online_mask); } -static inline bool cpu_enabled(unsigned int cpu) +static __always_inline bool cpu_enabled(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_enabled_mask); } -static inline bool cpu_possible(unsigned int cpu) +static __always_inline bool cpu_possible(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_possible_mask); } -static inline bool cpu_present(unsigned int cpu) +static __always_inline bool cpu_present(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_present_mask); } -static inline bool cpu_active(unsigned int cpu) +static __always_inline bool cpu_active(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_active_mask); } -static inline bool cpu_dying(unsigned int cpu) +static __always_inline bool cpu_dying(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_dying_mask); } @@ -1138,32 +1150,32 @@ static inline bool cpu_dying(unsigned int cpu) #define num_present_cpus() 1U #define num_active_cpus() 1U -static inline bool cpu_online(unsigned int cpu) +static __always_inline bool cpu_online(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_possible(unsigned int cpu) +static __always_inline bool cpu_possible(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_enabled(unsigned int cpu) +static __always_inline bool cpu_enabled(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_present(unsigned int cpu) +static __always_inline bool cpu_present(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_active(unsigned int cpu) +static __always_inline bool cpu_active(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_dying(unsigned int cpu) +static __always_inline bool cpu_dying(unsigned int cpu) { return false; } @@ -1197,7 +1209,7 @@ static inline bool cpu_dying(unsigned int cpu) * Return: the length of the (null-terminated) @buf string, zero if * nothing is copied. */ -static inline ssize_t +static __always_inline ssize_t cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) { return bitmap_print_to_pagebuf(list, buf, cpumask_bits(mask), @@ -1220,9 +1232,9 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) * Return: the length of how many bytes have been copied, excluding * terminating '\0'. */ -static inline ssize_t -cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, - loff_t off, size_t count) +static __always_inline +ssize_t cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, + loff_t off, size_t count) { return bitmap_print_bitmask_to_buf(buf, cpumask_bits(mask), nr_cpu_ids, off, count) - 1; @@ -1242,9 +1254,9 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, * Return: the length of how many bytes have been copied, excluding * terminating '\0'. */ -static inline ssize_t -cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, - loff_t off, size_t count) +static __always_inline +ssize_t cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, + loff_t off, size_t count) { return bitmap_print_list_to_buf(buf, cpumask_bits(mask), nr_cpu_ids, off, count) - 1; -- cgit v1.2.3 From 54c9e0085bd1015e524d8f4d3c4e78a7bc77ffca Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 18 Jul 2024 17:50:40 -0700 Subject: nodemask: Switch from inline to __always_inline 'inline' keyword is only a recommendation for compiler. If it decides to not inline nodemask functions, the whole small_const_nbits() machinery doesn't work. This is how a standard GCC 11.3.0 does for my x86_64 build now. This patch replaces 'inline' directive with unconditional '__always_inline' to make sure that there's always a chance for compile-time optimization. It doesn't change size of kernel image, according to bloat-o-meter. [[ Brian: split out from: Subject: [PATCH 1/3] bitmap: switch from inline to __always_inline https://lore.kernel.org/all/20221027043810.350460-2-yury.norov@gmail.com/ But rewritten, as there were too many conflicts. ]] Co-developed-by: Brian Norris Signed-off-by: Brian Norris Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Yury Norov --- include/linux/nodemask.h | 86 ++++++++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index b61438313a73..9fd7a0ce9c1a 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -107,11 +107,11 @@ extern nodemask_t _unused_nodemask_arg_; */ #define nodemask_pr_args(maskp) __nodemask_pr_numnodes(maskp), \ __nodemask_pr_bits(maskp) -static inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m) +static __always_inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m) { return m ? MAX_NUMNODES : 0; } -static inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m) +static __always_inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m) { return m ? m->bits : NULL; } @@ -132,19 +132,19 @@ static __always_inline void __node_set(int node, volatile nodemask_t *dstp) } #define node_clear(node, dst) __node_clear((node), &(dst)) -static inline void __node_clear(int node, volatile nodemask_t *dstp) +static __always_inline void __node_clear(int node, volatile nodemask_t *dstp) { clear_bit(node, dstp->bits); } #define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES) -static inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits) +static __always_inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits) { bitmap_fill(dstp->bits, nbits); } #define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES) -static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) +static __always_inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) { bitmap_zero(dstp->bits, nbits); } @@ -154,14 +154,14 @@ static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) #define node_test_and_set(node, nodemask) \ __node_test_and_set((node), &(nodemask)) -static inline bool __node_test_and_set(int node, nodemask_t *addr) +static __always_inline bool __node_test_and_set(int node, nodemask_t *addr) { return test_and_set_bit(node, addr->bits); } #define nodes_and(dst, src1, src2) \ __nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES) -static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p, +static __always_inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); @@ -169,7 +169,7 @@ static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p, #define nodes_or(dst, src1, src2) \ __nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES) -static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p, +static __always_inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); @@ -177,7 +177,7 @@ static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p, #define nodes_xor(dst, src1, src2) \ __nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES) -static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p, +static __always_inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); @@ -185,7 +185,7 @@ static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p, #define nodes_andnot(dst, src1, src2) \ __nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES) -static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p, +static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); @@ -193,7 +193,7 @@ static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p, #define nodes_complement(dst, src) \ __nodes_complement(&(dst), &(src), MAX_NUMNODES) -static inline void __nodes_complement(nodemask_t *dstp, +static __always_inline void __nodes_complement(nodemask_t *dstp, const nodemask_t *srcp, unsigned int nbits) { bitmap_complement(dstp->bits, srcp->bits, nbits); @@ -201,7 +201,7 @@ static inline void __nodes_complement(nodemask_t *dstp, #define nodes_equal(src1, src2) \ __nodes_equal(&(src1), &(src2), MAX_NUMNODES) -static inline bool __nodes_equal(const nodemask_t *src1p, +static __always_inline bool __nodes_equal(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_equal(src1p->bits, src2p->bits, nbits); @@ -209,7 +209,7 @@ static inline bool __nodes_equal(const nodemask_t *src1p, #define nodes_intersects(src1, src2) \ __nodes_intersects(&(src1), &(src2), MAX_NUMNODES) -static inline bool __nodes_intersects(const nodemask_t *src1p, +static __always_inline bool __nodes_intersects(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_intersects(src1p->bits, src2p->bits, nbits); @@ -217,33 +217,33 @@ static inline bool __nodes_intersects(const nodemask_t *src1p, #define nodes_subset(src1, src2) \ __nodes_subset(&(src1), &(src2), MAX_NUMNODES) -static inline bool __nodes_subset(const nodemask_t *src1p, +static __always_inline bool __nodes_subset(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_subset(src1p->bits, src2p->bits, nbits); } #define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES) -static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) +static __always_inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) { return bitmap_empty(srcp->bits, nbits); } #define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES) -static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) +static __always_inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) { return bitmap_full(srcp->bits, nbits); } #define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES) -static inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits) +static __always_inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits) { return bitmap_weight(srcp->bits, nbits); } #define nodes_shift_right(dst, src, n) \ __nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES) -static inline void __nodes_shift_right(nodemask_t *dstp, +static __always_inline void __nodes_shift_right(nodemask_t *dstp, const nodemask_t *srcp, int n, int nbits) { bitmap_shift_right(dstp->bits, srcp->bits, n, nbits); @@ -251,7 +251,7 @@ static inline void __nodes_shift_right(nodemask_t *dstp, #define nodes_shift_left(dst, src, n) \ __nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES) -static inline void __nodes_shift_left(nodemask_t *dstp, +static __always_inline void __nodes_shift_left(nodemask_t *dstp, const nodemask_t *srcp, int n, int nbits) { bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); @@ -261,13 +261,13 @@ static inline void __nodes_shift_left(nodemask_t *dstp, > MAX_NUMNODES, then the silly min_ts could be dropped. */ #define first_node(src) __first_node(&(src)) -static inline unsigned int __first_node(const nodemask_t *srcp) +static __always_inline unsigned int __first_node(const nodemask_t *srcp) { return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); } #define next_node(n, src) __next_node((n), &(src)) -static inline unsigned int __next_node(int n, const nodemask_t *srcp) +static __always_inline unsigned int __next_node(int n, const nodemask_t *srcp) { return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); } @@ -277,7 +277,7 @@ static inline unsigned int __next_node(int n, const nodemask_t *srcp) * the first node in src if needed. Returns MAX_NUMNODES if src is empty. */ #define next_node_in(n, src) __next_node_in((n), &(src)) -static inline unsigned int __next_node_in(int node, const nodemask_t *srcp) +static __always_inline unsigned int __next_node_in(int node, const nodemask_t *srcp) { unsigned int ret = __next_node(node, srcp); @@ -286,7 +286,7 @@ static inline unsigned int __next_node_in(int node, const nodemask_t *srcp) return ret; } -static inline void init_nodemask_of_node(nodemask_t *mask, int node) +static __always_inline void init_nodemask_of_node(nodemask_t *mask, int node) { nodes_clear(*mask); node_set(node, *mask); @@ -304,7 +304,7 @@ static inline void init_nodemask_of_node(nodemask_t *mask, int node) }) #define first_unset_node(mask) __first_unset_node(&(mask)) -static inline unsigned int __first_unset_node(const nodemask_t *maskp) +static __always_inline unsigned int __first_unset_node(const nodemask_t *maskp) { return min_t(unsigned int, MAX_NUMNODES, find_first_zero_bit(maskp->bits, MAX_NUMNODES)); @@ -338,21 +338,21 @@ static inline unsigned int __first_unset_node(const nodemask_t *maskp) #define nodemask_parse_user(ubuf, ulen, dst) \ __nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES) -static inline int __nodemask_parse_user(const char __user *buf, int len, +static __always_inline int __nodemask_parse_user(const char __user *buf, int len, nodemask_t *dstp, int nbits) { return bitmap_parse_user(buf, len, dstp->bits, nbits); } #define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES) -static inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits) +static __always_inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits) { return bitmap_parselist(buf, dstp->bits, nbits); } #define node_remap(oldbit, old, new) \ __node_remap((oldbit), &(old), &(new), MAX_NUMNODES) -static inline int __node_remap(int oldbit, +static __always_inline int __node_remap(int oldbit, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits); @@ -360,7 +360,7 @@ static inline int __node_remap(int oldbit, #define nodes_remap(dst, src, old, new) \ __nodes_remap(&(dst), &(src), &(old), &(new), MAX_NUMNODES) -static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, +static __always_inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); @@ -368,7 +368,7 @@ static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, #define nodes_onto(dst, orig, relmap) \ __nodes_onto(&(dst), &(orig), &(relmap), MAX_NUMNODES) -static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, +static __always_inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, const nodemask_t *relmapp, int nbits) { bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits); @@ -376,7 +376,7 @@ static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, #define nodes_fold(dst, orig, sz) \ __nodes_fold(&(dst), &(orig), sz, MAX_NUMNODES) -static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, +static __always_inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, int sz, int nbits) { bitmap_fold(dstp->bits, origp->bits, sz, nbits); @@ -418,22 +418,22 @@ enum node_states { extern nodemask_t node_states[NR_NODE_STATES]; #if MAX_NUMNODES > 1 -static inline int node_state(int node, enum node_states state) +static __always_inline int node_state(int node, enum node_states state) { return node_isset(node, node_states[state]); } -static inline void node_set_state(int node, enum node_states state) +static __always_inline void node_set_state(int node, enum node_states state) { __node_set(node, &node_states[state]); } -static inline void node_clear_state(int node, enum node_states state) +static __always_inline void node_clear_state(int node, enum node_states state) { __node_clear(node, &node_states[state]); } -static inline int num_node_state(enum node_states state) +static __always_inline int num_node_state(enum node_states state) { return nodes_weight(node_states[state]); } @@ -443,11 +443,11 @@ static inline int num_node_state(enum node_states state) #define first_online_node first_node(node_states[N_ONLINE]) #define first_memory_node first_node(node_states[N_MEMORY]) -static inline unsigned int next_online_node(int nid) +static __always_inline unsigned int next_online_node(int nid) { return next_node(nid, node_states[N_ONLINE]); } -static inline unsigned int next_memory_node(int nid) +static __always_inline unsigned int next_memory_node(int nid) { return next_node(nid, node_states[N_MEMORY]); } @@ -455,13 +455,13 @@ static inline unsigned int next_memory_node(int nid) extern unsigned int nr_node_ids; extern unsigned int nr_online_nodes; -static inline void node_set_online(int nid) +static __always_inline void node_set_online(int nid) { node_set_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } -static inline void node_set_offline(int nid) +static __always_inline void node_set_offline(int nid) { node_clear_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); @@ -469,20 +469,20 @@ static inline void node_set_offline(int nid) #else -static inline int node_state(int node, enum node_states state) +static __always_inline int node_state(int node, enum node_states state) { return node == 0; } -static inline void node_set_state(int node, enum node_states state) +static __always_inline void node_set_state(int node, enum node_states state) { } -static inline void node_clear_state(int node, enum node_states state) +static __always_inline void node_clear_state(int node, enum node_states state) { } -static inline int num_node_state(enum node_states state) +static __always_inline int num_node_state(enum node_states state) { return 1; } @@ -502,7 +502,7 @@ static inline int num_node_state(enum node_states state) #endif -static inline int node_random(const nodemask_t *maskp) +static __always_inline int node_random(const nodemask_t *maskp) { #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1) int w, bit; -- cgit v1.2.3 From 92a10d3861491d09e73b35db7113dd049659f588 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 30 Jul 2024 22:15:16 +0200 Subject: runtime constants: move list of constants to vmlinux.lds.h Refactor the list of constant variables into a macro. This should make it easier to add more constants in the future. Signed-off-by: Jann Horn Reviewed-by: Alexander Gordeev Reviewed-by: Thomas Gleixner Acked-by: Arnd Bergmann Acked-by: Will Deacon Signed-off-by: Arnd Bergmann --- include/asm-generic/vmlinux.lds.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 1ae44793132a..2157d8802351 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -918,6 +918,10 @@ #define RUNTIME_CONST(t,x) NAMED_SECTION(runtime_##t##_##x) +#define RUNTIME_CONST_VARIABLES \ + RUNTIME_CONST(shift, d_hash_shift) \ + RUNTIME_CONST(ptr, dentry_hashtable) + /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */ #define KUNIT_TABLE() \ . = ALIGN(8); \ -- cgit v1.2.3 From b27404b2bbf951a11500525dea15681cc970226c Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 19 Aug 2024 08:55:46 +0800 Subject: ALSA/ASoC/SoundWire: Intel: use single definition for SDW_INTEL_MAX_LINKS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The definitions are currently duplicated in intel-sdw-acpi.c and sof_sdw.c. Move the definition to the sdw_intel.h header, and change the prefix to make it Intel-specific. No functionality change in this patch. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Reviewed-by: Takashi Iwai Link: https://patch.msgid.link/20240819005548.5867-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/linux/soundwire/sdw_intel.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index d537587b4499..87d82ea9a13a 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -447,4 +447,9 @@ extern const struct sdw_intel_hw_ops sdw_intel_lnl_hw_ops; #define SDW_INTEL_DEV_NUM_IDA_MIN 6 +/* + * Max number of links supported in hardware + */ +#define SDW_INTEL_MAX_LINKS 4 + #endif -- cgit v1.2.3 From d2234596be2192d9c1ae34f8c7534531191bc433 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 19 Aug 2024 08:55:47 +0800 Subject: soundwire: intel: add probe-time check on link id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In older platforms, the number of links was constant and hard-coded to 4. Newer platforms can have varying number of links, so we need to add a probe-time check to make sure the ACPI-reported information with _DSD properties is aligned with hardware capabilities reported in the SoundWire LCAP register. Acked-by: Vinod Koul Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Acked-by: Mark Brown Reviewed-by: Takashi Iwai Link: https://patch.msgid.link/20240819005548.5867-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/linux/soundwire/sdw_intel.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 87d82ea9a13a..cb8e7396b4db 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -388,6 +388,7 @@ struct sdw_intel; /* struct intel_sdw_hw_ops - SoundWire ops for Intel platforms. * @debugfs_init: initialize all debugfs capabilities * @debugfs_exit: close and cleanup debugfs capabilities + * @get_link_count: fetch link count from hardware registers * @register_dai: read all PDI information and register DAIs * @check_clock_stop: throw error message if clock is not stopped. * @start_bus: normal start @@ -412,6 +413,8 @@ struct sdw_intel_hw_ops { void (*debugfs_init)(struct sdw_intel *sdw); void (*debugfs_exit)(struct sdw_intel *sdw); + int (*get_link_count)(struct sdw_intel *sdw); + int (*register_dai)(struct sdw_intel *sdw); void (*check_clock_stop)(struct sdw_intel *sdw); -- cgit v1.2.3 From 1f3662838a05f1ab6af89a417f6f252d91d0806b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 19 Aug 2024 08:55:48 +0800 Subject: soundwire: intel: increase maximum number of links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel platforms have enabled 4 links since the beginning, newer platforms now have 5 links. Update the definition accordingly. This patch will have no effect on older platforms where the number of links was hard-coded. A follow-up patch will add a dynamic check that the ACPI-reported information is aligned with hardware capabilities on newer platforms. Acked-by: Vinod Koul Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Acked-by: Mark Brown Reviewed-by: Takashi Iwai Link: https://patch.msgid.link/20240819005548.5867-4-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/linux/soundwire/sdw_intel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index cb8e7396b4db..37ae69365fe2 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -453,6 +453,6 @@ extern const struct sdw_intel_hw_ops sdw_intel_lnl_hw_ops; /* * Max number of links supported in hardware */ -#define SDW_INTEL_MAX_LINKS 4 +#define SDW_INTEL_MAX_LINKS 5 #endif -- cgit v1.2.3 From 820a185896b77814557302b981b092a9e7b36814 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 24 Jul 2024 15:15:35 +0200 Subject: fcntl: add F_CREATED_QUERY Systemd has a helper called openat_report_new() that returns whether a file was created anew or it already existed before for cases where O_CREAT has to be used without O_EXCL (cf. [1]). That apparently isn't something that's specific to systemd but it's where I noticed it. The current logic is that it first attempts to open the file without O_CREAT | O_EXCL and if it gets ENOENT the helper tries again with both flags. If that succeeds all is well. If it now reports EEXIST it retries. That works fairly well but some corner cases make this more involved. If this operates on a dangling symlink the first openat() without O_CREAT | O_EXCL will return ENOENT but the second openat() with O_CREAT | O_EXCL will fail with EEXIST. The reason is that openat() without O_CREAT | O_EXCL follows the symlink while O_CREAT | O_EXCL doesn't for security reasons. So it's not something we can really change unless we add an explicit opt-in via O_FOLLOW which seems really ugly. The caller could try and use fanotify() to register to listen for creation events in the directory before calling openat(). The caller could then compare the returned tid to its own tid to ensure that even in threaded environments it actually created the file. That might work but is a lot of work for something that should be fairly simple and I'm uncertain about it's reliability. The caller could use a bpf lsm hook to hook into security_file_open() to figure out whether they created the file. That also seems a bit wild. So let's add F_CREATED_QUERY which allows the caller to check whether they actually did create the file. That has caveats of course but I don't think they are problematic: * In multi-threaded environments a thread can only be sure that it did create the file if it calls openat() with O_CREAT. In other words, it's obviously not enough to just go through it's fdtable and check these fds because another thread could've created the file. * If there's any codepaths where an openat() with O_CREAT would yield the same struct file as that of another thread it would obviously cause wrong results. I'm not aware of any such codepaths from openat() itself. Imho, that would be a bug. * Related to the previous point, calling the new fcntl() on files created and opened via special-purpose system calls or ioctl()s would cause wrong results only if the affected subsystem a) raises FMODE_CREATED and b) may return the same struct file for two different calls. I'm not seeing anything outside of regular VFS code that raises FMODE_CREATED. There is code for b) in e.g., the drm layer where the same struct file is resurfaced but again FMODE_CREATED isn't used and it would be very misleading if it did. Link: https://github.com/systemd/systemd/blob/11d5e2b5fbf9f6bfa5763fd45b56829ad4f0777f/src/basic/fs-util.c#L1078 [1] Link: https://lore.kernel.org/r/20240724-work-fcntl-v1-1-e8153a2f1991@kernel.org Reviewed-by: Jeff Layton Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index c0bcc185fa48..e55a3314bcb0 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -16,6 +16,9 @@ #define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3) +/* Was the file just created? */ +#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4) + /* * Cancel a blocking posix lock; internal use only until we expose an * asynchronous lock api to userspace: -- cgit v1.2.3 From c393eaa85349941badf3ce5f087400dbaf3bbe02 Mon Sep 17 00:00:00 2001 From: Haifeng Xu Date: Fri, 26 Jul 2024 11:05:25 +0800 Subject: fs: don't flush in-flight wb switches for superblocks without cgroup writeback When deactivating any type of superblock, it had to wait for the in-flight wb switches to be completed. wb switches are executed in inode_switch_wbs_work_fn() which needs to acquire the wb_switch_rwsem and races against sync_inodes_sb(). If there are too much dirty data in the superblock, the waiting time may increase significantly. For superblocks without cgroup writeback such as tmpfs, they have nothing to do with the wb swithes, so the flushing can be avoided. Signed-off-by: Haifeng Xu Link: https://lore.kernel.org/r/20240726030525.180330-1-haifeng.xu@shopee.com Reviewed-by: Jan Kara Suggested-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/writeback.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 1a54676d843a..56b85841ae4c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -217,7 +217,7 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, size_t bytes); int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, enum wb_reason reason, struct wb_completion *done); -void cgroup_writeback_umount(void); +void cgroup_writeback_umount(struct super_block *sb); bool cleanup_offline_cgwb(struct bdi_writeback *wb); /** @@ -324,7 +324,7 @@ static inline void wbc_account_cgroup_owner(struct writeback_control *wbc, { } -static inline void cgroup_writeback_umount(void) +static inline void cgroup_writeback_umount(struct super_block *sb) { } -- cgit v1.2.3 From c01a5d89e5c8abe638107be2a4ea9e4c7fcdd7f6 Mon Sep 17 00:00:00 2001 From: Wang Long Date: Fri, 2 Aug 2024 17:19:01 +0800 Subject: percpu-rwsem: remove the unused parameter 'read' In the function percpu_rwsem_release, the parameter `read` is unused, so remove it. Signed-off-by: Wang Long Link: https://lore.kernel.org/r/20240802091901.2546797-1-w@laoqinren.net Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 +- include/linux/percpu-rwsem.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index fb0426f349fc..c0286d479c9d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1683,7 +1683,7 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level) #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) #define __sb_writers_release(sb, lev) \ - percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) + percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], _THIS_IP_) /** * __sb_write_started - check if sb freeze level is held diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 36b942b67b7d..c012df33a9f0 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -145,7 +145,7 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *); #define percpu_rwsem_assert_held(sem) lockdep_assert_held(sem) static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, - bool read, unsigned long ip) + unsigned long ip) { lock_release(&sem->dep_map, ip); } -- cgit v1.2.3 From cd8e468efb4fb2742e06328a75b282c35c1abf8d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Aug 2024 21:01:57 +0200 Subject: ACPI: video: Add Dell UART backlight controller detection Dell All In One (AIO) models released after 2017 use a backlight controller board connected to an UART. In DSDT this uart port will be defined as: Name (_HID, "DELL0501") Name (_CID, EisaId ("PNP0501") Commit 484bae9e4d6a ("platform/x86: Add new Dell UART backlight driver") has added support for this, but I neglected to tie this into acpi_video_get_backlight_type(). Now the first AIO has turned up which has not only the DSDT bits for this, but also an actual controller attached to the UART, yet it is not using this controller for backlight control. Add support to acpi_video_get_backlight_type() for a new dell_uart backlight type. So that the existing infra to override the backlight control method on the commandline or with DMI quirks can be used. Fixes: 484bae9e4d6a ("platform/x86: Add new Dell UART backlight driver") Cc: All applicable Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20240814190159.15650-2-hdegoede@redhat.com Signed-off-by: Rafael J. Wysocki --- include/acpi/video.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/video.h b/include/acpi/video.h index 3d538d4178ab..044c463138df 100644 --- a/include/acpi/video.h +++ b/include/acpi/video.h @@ -50,6 +50,7 @@ enum acpi_backlight_type { acpi_backlight_native, acpi_backlight_nvidia_wmi_ec, acpi_backlight_apple_gmux, + acpi_backlight_dell_uart, }; #if IS_ENABLED(CONFIG_ACPI_VIDEO) -- cgit v1.2.3 From ad614a706b1ac83b95b333f44b8f5e70bcb37dc5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 29 Jul 2024 11:26:34 +0200 Subject: drm/xe/oa/uapi: Make bit masks unsigned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with gcc-5: In function ‘decode_oa_format.isra.26’, inlined from ‘xe_oa_set_prop_oa_format’ at drivers/gpu/drm/xe/xe_oa.c:1664:6: ././include/linux/compiler_types.h:510:38: error: call to ‘__compiletime_assert_1336’ declared with attribute error: FIELD_GET: mask is not constant [...] ./include/linux/bitfield.h:155:3: note: in expansion of macro ‘__BF_FIELD_CHECK’ __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ ^ drivers/gpu/drm/xe/xe_oa.c:1573:18: note: in expansion of macro ‘FIELD_GET’ u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); ^ Fixes: b6fd51c62119 ("drm/xe/oa/uapi: Define and parse OA stream properties") Signed-off-by: Geert Uytterhoeven Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240729092634.2227611-1-geert+renesas@glider.be Signed-off-by: Lucas De Marchi (cherry picked from commit f2881dfdaaa9ec873dbd383ef5512fc31e576cbb) Signed-off-by: Rodrigo Vivi --- include/uapi/drm/xe_drm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 19619d4952a8..db232a25189e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1590,10 +1590,10 @@ enum drm_xe_oa_property_id { * b. Counter select c. Counter size and d. BC report. Also refer to the * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. */ -#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0) -#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8) -#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16) -#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24) +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xffu << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xffu << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xffu << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xffu << 24) /** * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit -- cgit v1.2.3 From 81475beb1b5996505a39cd1d9316ce1e668932a2 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 15 Aug 2024 16:32:28 +0000 Subject: block: Drop NULL check in bdev_write_zeroes_sectors() Function bdev_get_queue() must not return NULL, so drop the check in bdev_write_zeroes_sectors(). Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Reviewed-by: Martin K. Petersen Reviewed-by: Nitesh Shetty Link: https://lore.kernel.org/r/20240815163228.216051-3-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e85ec73a07d5..b7664d593486 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1296,12 +1296,7 @@ bdev_max_secure_erase_sectors(struct block_device *bdev) static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return q->limits.max_write_zeroes_sectors; - - return 0; + return bdev_get_queue(bdev)->limits.max_write_zeroes_sectors; } static inline bool bdev_nonrot(struct block_device *bdev) -- cgit v1.2.3 From decbfaf06db05fa1f9b33149ebb3c145b44e878f Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 16 Aug 2024 15:51:53 +0200 Subject: drm/ttm: Add a flag to allow drivers to skip clear-on-free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can set before releasing backing stores if they want to skip clear-on-free. Cc: Matthew Auld Cc: Thomas Hellström Suggested-by: Christian König Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240816135154.19678-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- include/drm/ttm/ttm_tt.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 2b9d856ff388..cfaf49de2419 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -85,6 +85,9 @@ struct ttm_tt { * fault handling abuses the DMA api a bit and dma_map_attrs can't be * used to assure pgprot always matches. * + * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver handles + * clearing backing store + * * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is * set by TTM after ttm_tt_populate() has successfully returned, and is * then unset when TTM calls ttm_tt_unpopulate(). @@ -94,8 +97,9 @@ struct ttm_tt { #define TTM_TT_FLAG_EXTERNAL BIT(2) #define TTM_TT_FLAG_EXTERNAL_MAPPABLE BIT(3) #define TTM_TT_FLAG_DECRYPTED BIT(4) +#define TTM_TT_FLAG_CLEARED_ON_FREE BIT(5) -#define TTM_TT_FLAG_PRIV_POPULATED BIT(5) +#define TTM_TT_FLAG_PRIV_POPULATED BIT(6) uint32_t page_flags; /** @num_pages: Number of pages in the page array. */ uint32_t num_pages; -- cgit v1.2.3 From dc0112e6d8b42b39f9d283bab489a757e9d284f0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Aug 2024 11:47:59 -0400 Subject: rpcrdma: Trace connection registration and unregistration These new trace points record xarray indices and the time of endpoint registration and unregistration, to co-ordinate with device removal events. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index ba2d6a0e41cc..a96a985c49b3 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -2277,6 +2277,42 @@ DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one); DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_wait_on); DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one_done); +DECLARE_EVENT_CLASS(rpcrdma_client_register_class, + TP_PROTO( + const struct ib_device *device, + const struct rpcrdma_notification *rn + ), + + TP_ARGS(device, rn), + + TP_STRUCT__entry( + __string(name, device->name) + __field(void *, callback) + __field(u32, index) + ), + + TP_fast_assign( + __assign_str(name); + __entry->callback = rn->rn_done; + __entry->index = rn->rn_index; + ), + + TP_printk("device=%s index=%u done callback=%pS\n", + __get_str(name), __entry->index, __entry->callback + ) +); + +#define DEFINE_CLIENT_REGISTER_EVENT(name) \ + DEFINE_EVENT(rpcrdma_client_register_class, name, \ + TP_PROTO( \ + const struct ib_device *device, \ + const struct rpcrdma_notification *rn \ + ), \ + TP_ARGS(device, rn)) + +DEFINE_CLIENT_REGISTER_EVENT(rpcrdma_client_register); +DEFINE_CLIENT_REGISTER_EVENT(rpcrdma_client_unregister); + #endif /* _TRACE_RPCRDMA_H */ #include -- cgit v1.2.3 From c1aa38866b9c58dc6cf7a5fc6a3e1ca75565169e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 10 Jul 2024 10:58:29 +0200 Subject: netfilter: nf_tables: store new sets in dedicated list nft_set_lookup_byid() is very slow when transaction becomes large, due to walk of the transaction list. Add a dedicated list that contains only the new sets. Before: nft -f ruleset 0.07s user 0.00s system 0% cpu 1:04.84 total After: nft -f ruleset 0.07s user 0.00s system 0% cpu 30.115 total .. where ruleset contains ~10 sets with ~100k elements. The above number is for a combined flush+reload of the ruleset. With previous flush, even the first NEWELEM has to walk through a few hundred thousands of DELSET(ELEM) transactions before the first NEWSET object. To cope with random-order-newset-newsetelem we'd need to replace commit_set_list with a hashtable. Expectation is that a NEWELEM operation refers to the most recently added set, so last entry of the dedicated list should be the set we want. NB: This is not a bug fix per se (functionality is fine), but with larger transaction batches list search takes forever, so it would be nice to speed this up for -stable too, hence adding a "fixes" tag. Fixes: 958bee14d071 ("netfilter: nf_tables: use new transaction infrastructure to handle sets") Reported-by: Nadia Pinaeva Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 1bfdd16890fa..2be4738eae1c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1674,6 +1674,7 @@ struct nft_trans_rule { struct nft_trans_set { struct nft_trans_binding nft_trans_binding; + struct list_head list_trans_newset; struct nft_set *set; u32 set_id; u32 gc_int; @@ -1875,6 +1876,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re struct nftables_pernet { struct list_head tables; struct list_head commit_list; + struct list_head commit_set_list; struct list_head binding_list; struct list_head module_list; struct list_head notify_list; -- cgit v1.2.3 From d5283b47e225e1473e1a07085b9c4e6bfd08ba51 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 17 Jul 2024 22:09:44 -0400 Subject: netfilter: move nf_ct_netns_get out of nf_conncount_init This patch is to move nf_ct_netns_get() out of nf_conncount_init() and let the consumers of nf_conncount decide if they want to turn on netfilter conntrack. It makes nf_conncount more flexible to be used in other places and avoids netfilter conntrack turned on when using it in openvswitch conntrack. Signed-off-by: Xin Long Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_count.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index e227d997fc71..1b58b5b91ff6 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -15,10 +15,8 @@ struct nf_conncount_list { unsigned int count; /* length of list */ }; -struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, - unsigned int keylen); -void nf_conncount_destroy(struct net *net, unsigned int family, - struct nf_conncount_data *data); +struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen); +void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data); unsigned int nf_conncount_count(struct net *net, struct nf_conncount_data *data, -- cgit v1.2.3 From 2865baf54077aa98fcdb478cefe6a42c417b9374 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 8 Apr 2024 20:04:58 -0700 Subject: x86: support user address masking instead of non-speculative conditional The Spectre-v1 mitigations made "access_ok()" much more expensive, since it has to serialize execution with the test for a valid user address. All the normal user copy routines avoid this by just masking the user address with a data-dependent mask instead, but the fast "unsafe_user_read()" kind of patterms that were supposed to be a fast case got slowed down. This introduces a notion of using src = masked_user_access_begin(src); to do the user address sanity using a data-dependent mask instead of the more traditional conditional if (user_read_access_begin(src, len)) { model. This model only works for dense accesses that start at 'src' and on architectures that have a guard region that is guaranteed to fault in between the user space and the kernel space area. With this, the user access doesn't need to be manually checked, because a bad address is guaranteed to fault (by some architecture masking trick: on x86-64 this involves just turning an invalid user address into all ones, since we don't map the top of address space). This only converts a couple of examples for now. Example x86-64 code generation for loading two words from user space: stac mov %rax,%rcx sar $0x3f,%rcx or %rax,%rcx mov (%rcx),%r13 mov 0x8(%rcx),%r14 clac where all the error handling and -EFAULT is now purely handled out of line by the exception path. Of course, if the micro-architecture does badly at 'clac' and 'stac', the above is still pitifully slow. But at least we did as well as we could. Signed-off-by: Linus Torvalds --- include/linux/uaccess.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 3064314f4832..f18371f6cf36 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -32,6 +32,13 @@ }) #endif +#ifdef masked_user_access_begin + #define can_do_masked_user_access() 1 +#else + #define can_do_masked_user_access() 0 + #define masked_user_access_begin(src) NULL +#endif + /* * Architectures should provide two primitives (raw_copy_{to,from}_user()) * and get rid of their private instances of copy_{to,from}_user() and -- cgit v1.2.3 From 8dffaec34dd55473adcbc924a4c9b04aaa0d4278 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 19 Aug 2024 12:18:23 -1000 Subject: workqueue: Fix htmldocs build warning Fix htmldocs build warning introduced by ec0a7d44b358 ("workqueue: Add interface for user-defined workqueue lockdep map"). Signed-off-by: Tejun Heo Reported-by: Stephen Rothwell Cc: Matthew Brost --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 8ccbf510880b..04dff07a9e2b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -534,7 +534,7 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) * @lockdep_map: user-defined lockdep_map - * @args: args for @fmt + * @...: args for @fmt * * Same as alloc_ordered_workqueue but with the a user-define lockdep_map. * Useful for workqueues created with the same purpose and to avoid leaking a -- cgit v1.2.3 From 7f6287417baf57754f47687c6ea1a749a0686ab0 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 19 Aug 2024 18:28:05 +0200 Subject: bpf: Allow bpf_current_task_under_cgroup() with BPF_CGROUP_* The helper bpf_current_task_under_cgroup() currently is only allowed for tracing programs, allow its usage also in the BPF_CGROUP_* program types. Move the code from kernel/trace/bpf_trace.c to kernel/bpf/helpers.c, so it compiles also without CONFIG_BPF_EVENTS. This will be used in systemd-networkd to monitor the sysctl writes, and filter it's own writes from others: https://github.com/systemd/systemd/pull/32212 Signed-off-by: Matteo Croce Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240819162805.78235-3-technoboy85@gmail.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b9425e410bcb..f0192c173ed8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3206,6 +3206,7 @@ extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto; +extern const struct bpf_func_proto bpf_current_task_under_cgroup_proto; extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; extern const struct bpf_func_proto bpf_msg_redirect_map_proto; extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; -- cgit v1.2.3 From 807067bf014d4a3ae2cc55bd3de16f22a01eb580 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 Aug 2024 15:04:37 -0700 Subject: kcm: Serialise kcm_sendmsg() for the same socket. syzkaller reported UAF in kcm_release(). [0] The scenario is 1. Thread A builds a skb with MSG_MORE and sets kcm->seq_skb. 2. Thread A resumes building skb from kcm->seq_skb but is blocked by sk_stream_wait_memory() 3. Thread B calls sendmsg() concurrently, finishes building kcm->seq_skb and puts the skb to the write queue 4. Thread A faces an error and finally frees skb that is already in the write queue 5. kcm_release() does double-free the skb in the write queue When a thread is building a MSG_MORE skb, another thread must not touch it. Let's add a per-sk mutex and serialise kcm_sendmsg(). [0]: BUG: KASAN: slab-use-after-free in __skb_unlink include/linux/skbuff.h:2366 [inline] BUG: KASAN: slab-use-after-free in __skb_dequeue include/linux/skbuff.h:2385 [inline] BUG: KASAN: slab-use-after-free in __skb_queue_purge_reason include/linux/skbuff.h:3175 [inline] BUG: KASAN: slab-use-after-free in __skb_queue_purge include/linux/skbuff.h:3181 [inline] BUG: KASAN: slab-use-after-free in kcm_release+0x170/0x4c8 net/kcm/kcmsock.c:1691 Read of size 8 at addr ffff0000ced0fc80 by task syz-executor329/6167 CPU: 1 PID: 6167 Comm: syz-executor329 Tainted: G B 6.8.0-rc5-syzkaller-g9abbc24128bc #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call trace: dump_backtrace+0x1b8/0x1e4 arch/arm64/kernel/stacktrace.c:291 show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:298 __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd0/0x124 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:377 [inline] print_report+0x178/0x518 mm/kasan/report.c:488 kasan_report+0xd8/0x138 mm/kasan/report.c:601 __asan_report_load8_noabort+0x20/0x2c mm/kasan/report_generic.c:381 __skb_unlink include/linux/skbuff.h:2366 [inline] __skb_dequeue include/linux/skbuff.h:2385 [inline] __skb_queue_purge_reason include/linux/skbuff.h:3175 [inline] __skb_queue_purge include/linux/skbuff.h:3181 [inline] kcm_release+0x170/0x4c8 net/kcm/kcmsock.c:1691 __sock_release net/socket.c:659 [inline] sock_close+0xa4/0x1e8 net/socket.c:1421 __fput+0x30c/0x738 fs/file_table.c:376 ____fput+0x20/0x30 fs/file_table.c:404 task_work_run+0x230/0x2e0 kernel/task_work.c:180 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0x618/0x1f64 kernel/exit.c:871 do_group_exit+0x194/0x22c kernel/exit.c:1020 get_signal+0x1500/0x15ec kernel/signal.c:2893 do_signal+0x23c/0x3b44 arch/arm64/kernel/signal.c:1249 do_notify_resume+0x74/0x1f4 arch/arm64/kernel/entry-common.c:148 exit_to_user_mode_prepare arch/arm64/kernel/entry-common.c:169 [inline] exit_to_user_mode arch/arm64/kernel/entry-common.c:178 [inline] el0_svc+0xac/0x168 arch/arm64/kernel/entry-common.c:713 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 Allocated by task 6166: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_alloc_info+0x70/0x84 mm/kasan/generic.c:626 unpoison_slab_object mm/kasan/common.c:314 [inline] __kasan_slab_alloc+0x74/0x8c mm/kasan/common.c:340 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slub.c:3813 [inline] slab_alloc_node mm/slub.c:3860 [inline] kmem_cache_alloc_node+0x204/0x4c0 mm/slub.c:3903 __alloc_skb+0x19c/0x3d8 net/core/skbuff.c:641 alloc_skb include/linux/skbuff.h:1296 [inline] kcm_sendmsg+0x1d3c/0x2124 net/kcm/kcmsock.c:783 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] sock_sendmsg+0x220/0x2c0 net/socket.c:768 splice_to_socket+0x7cc/0xd58 fs/splice.c:889 do_splice_from fs/splice.c:941 [inline] direct_splice_actor+0xec/0x1d8 fs/splice.c:1164 splice_direct_to_actor+0x438/0xa0c fs/splice.c:1108 do_splice_direct_actor fs/splice.c:1207 [inline] do_splice_direct+0x1e4/0x304 fs/splice.c:1233 do_sendfile+0x460/0xb3c fs/read_write.c:1295 __do_sys_sendfile64 fs/read_write.c:1362 [inline] __se_sys_sendfile64 fs/read_write.c:1348 [inline] __arm64_sys_sendfile64+0x160/0x3b4 fs/read_write.c:1348 __invoke_syscall arch/arm64/kernel/syscall.c:37 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155 el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:712 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 Freed by task 6167: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_free_info+0x5c/0x74 mm/kasan/generic.c:640 poison_slab_object+0x124/0x18c mm/kasan/common.c:241 __kasan_slab_free+0x3c/0x78 mm/kasan/common.c:257 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2121 [inline] slab_free mm/slub.c:4299 [inline] kmem_cache_free+0x15c/0x3d4 mm/slub.c:4363 kfree_skbmem+0x10c/0x19c __kfree_skb net/core/skbuff.c:1109 [inline] kfree_skb_reason+0x240/0x6f4 net/core/skbuff.c:1144 kfree_skb include/linux/skbuff.h:1244 [inline] kcm_release+0x104/0x4c8 net/kcm/kcmsock.c:1685 __sock_release net/socket.c:659 [inline] sock_close+0xa4/0x1e8 net/socket.c:1421 __fput+0x30c/0x738 fs/file_table.c:376 ____fput+0x20/0x30 fs/file_table.c:404 task_work_run+0x230/0x2e0 kernel/task_work.c:180 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0x618/0x1f64 kernel/exit.c:871 do_group_exit+0x194/0x22c kernel/exit.c:1020 get_signal+0x1500/0x15ec kernel/signal.c:2893 do_signal+0x23c/0x3b44 arch/arm64/kernel/signal.c:1249 do_notify_resume+0x74/0x1f4 arch/arm64/kernel/entry-common.c:148 exit_to_user_mode_prepare arch/arm64/kernel/entry-common.c:169 [inline] exit_to_user_mode arch/arm64/kernel/entry-common.c:178 [inline] el0_svc+0xac/0x168 arch/arm64/kernel/entry-common.c:713 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 The buggy address belongs to the object at ffff0000ced0fc80 which belongs to the cache skbuff_head_cache of size 240 The buggy address is located 0 bytes inside of freed 240-byte region [ffff0000ced0fc80, ffff0000ced0fd70) The buggy address belongs to the physical page: page:00000000d35f4ae4 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10ed0f flags: 0x5ffc00000000800(slab|node=0|zone=2|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 05ffc00000000800 ffff0000c1cbf640 fffffdffc3423100 dead000000000004 raw: 0000000000000000 00000000000c000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff0000ced0fb80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff0000ced0fc00: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc >ffff0000ced0fc80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff0000ced0fd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc ffff0000ced0fd80: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+b72d86aa5df17ce74c60@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b72d86aa5df17ce74c60 Tested-by: syzbot+b72d86aa5df17ce74c60@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240815220437.69511-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/kcm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/kcm.h b/include/net/kcm.h index 90279e5e09a5..441e993be634 100644 --- a/include/net/kcm.h +++ b/include/net/kcm.h @@ -70,6 +70,7 @@ struct kcm_sock { struct work_struct tx_work; struct list_head wait_psock_list; struct sk_buff *seq_skb; + struct mutex tx_mutex; u32 tx_stopped : 1; /* Don't use bit fields here, these are set under different locks */ -- cgit v1.2.3 From 0311507792b54069ac72e0a6c6b35c5d40aadad8 Mon Sep 17 00:00:00 2001 From: Deven Bowers Date: Fri, 2 Aug 2024 23:08:15 -0700 Subject: lsm: add IPE lsm Integrity Policy Enforcement (IPE) is an LSM that provides an complimentary approach to Mandatory Access Control than existing LSMs today. Existing LSMs have centered around the concept of access to a resource should be controlled by the current user's credentials. IPE's approach, is that access to a resource should be controlled by the system's trust of a current resource. The basis of this approach is defining a global policy to specify which resource can be trusted. Signed-off-by: Deven Bowers Signed-off-by: Fan Wu [PM: subject line tweak] Signed-off-by: Paul Moore --- include/uapi/linux/lsm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/lsm.h b/include/uapi/linux/lsm.h index 33d8c9f4aa6b..938593dfd5da 100644 --- a/include/uapi/linux/lsm.h +++ b/include/uapi/linux/lsm.h @@ -64,6 +64,7 @@ struct lsm_ctx { #define LSM_ID_LANDLOCK 110 #define LSM_ID_IMA 111 #define LSM_ID_EVM 112 +#define LSM_ID_IPE 113 /* * LSM_ATTR_XXX definitions identify different LSM attributes -- cgit v1.2.3 From 6ad8bc92a47702f0b5d0b96b680199910b89f688 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 15 Aug 2024 13:21:14 -0400 Subject: net: add copy from skb_seq_state to buffer function Add an skb helper function to copy a range of bytes from within an existing skb_seq_state. Signed-off-by: Christian Hopps Signed-off-by: Steffen Klassert --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 29c3ea5b6e93..a871533b8568 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1433,6 +1433,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, unsigned int skb_seq_read(unsigned int consumed, const u8 **data, struct skb_seq_state *st); void skb_abort_seq_read(struct skb_seq_state *st); +int skb_copy_seq_read(struct skb_seq_state *st, int offset, void *to, int len); unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config); -- cgit v1.2.3 From 77c5e7b623032502ee49fe7e7868eaca6786d7ed Mon Sep 17 00:00:00 2001 From: Finley Xiao Date: Wed, 14 Aug 2024 18:26:41 -0400 Subject: dt-bindings: power: Add support for RK3576 SoC Define power domain IDs as described in the TRM and add compatible for rockchip,rk3576-power-controller Signed-off-by: Finley Xiao Co-Developed-by: Detlev Casanova Signed-off-by: Detlev Casanova Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240814222824.3170-2-detlev.casanova@collabora.com Signed-off-by: Ulf Hansson --- include/dt-bindings/power/rockchip,rk3576-power.h | 30 +++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/dt-bindings/power/rockchip,rk3576-power.h (limited to 'include') diff --git a/include/dt-bindings/power/rockchip,rk3576-power.h b/include/dt-bindings/power/rockchip,rk3576-power.h new file mode 100644 index 000000000000..324a056aa851 --- /dev/null +++ b/include/dt-bindings/power/rockchip,rk3576-power.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +#ifndef __DT_BINDINGS_POWER_RK3576_POWER_H__ +#define __DT_BINDINGS_POWER_RK3576_POWER_H__ + +/* VD_NPU */ +#define RK3576_PD_NPU 0 +#define RK3576_PD_NPUTOP 1 +#define RK3576_PD_NPU0 2 +#define RK3576_PD_NPU1 3 + +/* VD_GPU */ +#define RK3576_PD_GPU 4 + +/* VD_LOGIC */ +#define RK3576_PD_NVM 5 +#define RK3576_PD_SDGMAC 6 +#define RK3576_PD_USB 7 +#define RK3576_PD_PHP 8 +#define RK3576_PD_SUBPHP 9 +#define RK3576_PD_AUDIO 10 +#define RK3576_PD_VEPU0 11 +#define RK3576_PD_VEPU1 12 +#define RK3576_PD_VPU 13 +#define RK3576_PD_VDEC 14 +#define RK3576_PD_VI 15 +#define RK3576_PD_VO0 16 +#define RK3576_PD_VO1 17 +#define RK3576_PD_VOP 18 + +#endif -- cgit v1.2.3 From b6cee6544d01b8ac01232d343b1b2b594d94d61c Mon Sep 17 00:00:00 2001 From: Dikshita Agarwal Date: Mon, 19 Aug 2024 15:59:09 +0530 Subject: PM: domains: add device managed version of dev_pm_domain_attach|detach_list() Add the devres-enabled version of dev_pm_domain_attach|detach_list. If client drivers use devm_pm_domain_attach_list() to attach the PM domains, devm_pm_domain_detach_list() will be invoked implicitly during remove phase. Signed-off-by: Dikshita Agarwal Link: https://lore.kernel.org/r/1724063350-11993-2-git-send-email-quic_dikshita@quicinc.com Signed-off-by: Ulf Hansson --- include/linux/pm_domain.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index b86bb52858ac..b637ec14025f 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -476,6 +476,9 @@ struct device *dev_pm_domain_attach_by_name(struct device *dev, int dev_pm_domain_attach_list(struct device *dev, const struct dev_pm_domain_attach_data *data, struct dev_pm_domain_list **list); +int devm_pm_domain_attach_list(struct device *dev, + const struct dev_pm_domain_attach_data *data, + struct dev_pm_domain_list **list); void dev_pm_domain_detach(struct device *dev, bool power_off); void dev_pm_domain_detach_list(struct dev_pm_domain_list *list); int dev_pm_domain_start(struct device *dev); @@ -502,6 +505,14 @@ static inline int dev_pm_domain_attach_list(struct device *dev, { return 0; } + +static inline int devm_pm_domain_attach_list(struct device *dev, + const struct dev_pm_domain_attach_data *data, + struct dev_pm_domain_list **list) +{ + return 0; +} + static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {} static inline void dev_pm_domain_detach_list(struct dev_pm_domain_list *list) {} static inline int dev_pm_domain_start(struct device *dev) -- cgit v1.2.3 From d386d59b7c1a39112ca875327339ed519df2b96c Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Wed, 14 Aug 2024 21:08:26 +0800 Subject: net/smc: introduce statistics for allocated ringbufs of link group Currently we have the statistics on sndbuf/RMB sizes of all connections that have ever been on the link group, namely smc_stats_memsize. However these statistics are incremental and since the ringbufs of link group are allowed to be reused, we cannot know the actual allocated buffers through these. So here introduces the statistic on actual allocated ringbufs of the link group, it will be incremented when a new ringbuf is added into buf_list and decremented when it is deleted from buf_list. Signed-off-by: Wen Gu Signed-off-by: Paolo Abeni --- include/uapi/linux/smc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index b531e3ef011a..9b74ef79070a 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -127,6 +127,8 @@ enum { SMC_NLA_LGR_R_NET_COOKIE, /* u64 */ SMC_NLA_LGR_R_PAD, /* flag */ SMC_NLA_LGR_R_BUF_TYPE, /* u8 */ + SMC_NLA_LGR_R_SNDBUF_ALLOC, /* uint */ + SMC_NLA_LGR_R_RMB_ALLOC, /* uint */ __SMC_NLA_LGR_R_MAX, SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1 }; @@ -162,6 +164,8 @@ enum { SMC_NLA_LGR_D_V2_COMMON, /* nest */ SMC_NLA_LGR_D_EXT_GID, /* u64 */ SMC_NLA_LGR_D_PEER_EXT_GID, /* u64 */ + SMC_NLA_LGR_D_SNDBUF_ALLOC, /* uint */ + SMC_NLA_LGR_D_DMB_ALLOC, /* uint */ __SMC_NLA_LGR_D_MAX, SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1 }; -- cgit v1.2.3 From e0d103542b06c36240e3887edfe49578464866eb Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Wed, 14 Aug 2024 21:08:27 +0800 Subject: net/smc: introduce statistics for ringbufs usage of net namespace The buffer size histograms in smc_stats, namely rx/tx_rmbsize, record the sizes of ringbufs for all connections that have ever appeared in the net namespace. They are incremental and we cannot know the actual ringbufs usage from these. So here introduces statistics for current ringbufs usage of existing smc connections in the net namespace into smc_stats, it will be incremented when new connection uses a ringbuf and decremented when the ringbuf is unused. Signed-off-by: Wen Gu Signed-off-by: Paolo Abeni --- include/uapi/linux/smc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 9b74ef79070a..1f58cb0c266b 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -253,6 +253,8 @@ enum { SMC_NLA_STATS_T_TX_BYTES, /* u64 */ SMC_NLA_STATS_T_RX_CNT, /* u64 */ SMC_NLA_STATS_T_TX_CNT, /* u64 */ + SMC_NLA_STATS_T_RX_RMB_USAGE, /* uint */ + SMC_NLA_STATS_T_TX_RMB_USAGE, /* uint */ __SMC_NLA_STATS_T_MAX, SMC_NLA_STATS_T_MAX = __SMC_NLA_STATS_T_MAX - 1 }; -- cgit v1.2.3 From 7ea0522ef81a335c2d3a0ab1c8a4fab9a23c4a03 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Aug 2024 11:56:12 +0200 Subject: netfilter: nf_tables: pass context structure to nft_parse_register_load Mechanical transformation, no logical changes intended. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2be4738eae1c..573995e7a27e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -254,7 +254,8 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); -int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len); +int nft_parse_register_load(const struct nft_ctx *ctx, + const struct nlattr *attr, u8 *sreg, u32 len); int nft_parse_register_store(const struct nft_ctx *ctx, const struct nlattr *attr, u8 *dreg, const struct nft_data *data, -- cgit v1.2.3 From 14fb07130c7ddd257e30079b87499b3f89097b09 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Aug 2024 11:56:13 +0200 Subject: netfilter: nf_tables: allow loads only when register is initialized Reject rules where a load occurs from a register that has not seen a store early in the same rule. commit 4c905f6740a3 ("netfilter: nf_tables: initialize registers in nft_do_chain()") had to add a unconditional memset to the nftables register space to avoid leaking stack information to userspace. This memset shows up in benchmarks. After this change, this commit can be reverted again. Note that this breaks userspace compatibility, because theoretically you can do rule 1: reg2 := meta load iif, reg2 == 1 jump ... rule 2: reg2 == 2 jump ... // read access with no store in this rule ... after this change this is rejected. Neither nftables nor iptables-nft generate such rules, each rule is always standalone. This resuts in a small increase of nft_ctx structure by sizeof(long). To cope with hypothetical rulesets like the example above one could emit on-demand "reg[x] = 0" store when generating the datapath blob in nf_tables_commit_chain_prepare(). A patch that does this is linked to below. For now, lets disable this. In nf_tables, a rule is the smallest unit that can be replaced from userspace, i.e. a hypothetical ruleset that relies on earlier initialisations of registers can't be changed at will as register usage would need to be coordinated. Link: https://lore.kernel.org/netfilter-devel/20240627135330.17039-4-fw@strlen.de/ Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 573995e7a27e..1cc33d946d41 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -221,6 +221,7 @@ struct nft_ctx { u8 family; u8 level; bool report; + DECLARE_BITMAP(reg_inited, NFT_REG32_NUM); }; enum nft_data_desc_flags { -- cgit v1.2.3 From ea72ce5da22806d5713f3ffb39a6d5ae73841f93 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Aug 2024 00:29:36 +0200 Subject: x86/kaslr: Expose and use the end of the physical memory address space iounmap() on x86 occasionally fails to unmap because the provided valid ioremap address is not below high_memory. It turned out that this happens due to KASLR. KASLR uses the full address space between PAGE_OFFSET and vaddr_end to randomize the starting points of the direct map, vmalloc and vmemmap regions. It thereby limits the size of the direct map by using the installed memory size plus an extra configurable margin for hot-plug memory. This limitation is done to gain more randomization space because otherwise only the holes between the direct map, vmalloc, vmemmap and vaddr_end would be usable for randomizing. The limited direct map size is not exposed to the rest of the kernel, so the memory hot-plug and resource management related code paths still operate under the assumption that the available address space can be determined with MAX_PHYSMEM_BITS. request_free_mem_region() allocates from (1 << MAX_PHYSMEM_BITS) - 1 downwards. That means the first allocation happens past the end of the direct map and if unlucky this address is in the vmalloc space, which causes high_memory to become greater than VMALLOC_START and consequently causes iounmap() to fail for valid ioremap addresses. MAX_PHYSMEM_BITS cannot be changed for that because the randomization does not align with address bit boundaries and there are other places which actually require to know the maximum number of address bits. All remaining usage sites of MAX_PHYSMEM_BITS have been analyzed and found to be correct. Cure this by exposing the end of the direct map via PHYSMEM_END and use that for the memory hot-plug and resource management related places instead of relying on MAX_PHYSMEM_BITS. In the KASLR case PHYSMEM_END maps to a variable which is initialized by the KASLR initialization and otherwise it is based on MAX_PHYSMEM_BITS as before. To prevent future hickups add a check into add_pages() to catch callers trying to add memory above PHYSMEM_END. Fixes: 0483e1fa6e09 ("x86/mm: Implement ASLR for kernel memory regions") Reported-by: Max Ramanouski Reported-by: Alistair Popple Signed-off-by: Thomas Gleixner Tested-By: Max Ramanouski Tested-by: Alistair Popple Reviewed-by: Dan Williams Reviewed-by: Alistair Popple Reviewed-by: Kees Cook Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/87ed6soy3z.ffs@tglx --- include/linux/mm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c4b238a20b76..b3864156eaa4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -97,6 +97,10 @@ extern const int mmap_rnd_compat_bits_max; extern int mmap_rnd_compat_bits __read_mostly; #endif +#ifndef PHYSMEM_END +# define PHYSMEM_END ((1ULL << MAX_PHYSMEM_BITS) - 1) +#endif + #include #include -- cgit v1.2.3 From 1fa3314c14c6a20d098991a0a6980f9b18b2f930 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 14 Aug 2024 15:52:24 +0300 Subject: ipv4: Centralize TOS matching The TOS field in the IPv4 flow information structure ('flowi4_tos') is matched by the kernel against the TOS selector in IPv4 rules and routes. The field is initialized differently by different call sites. Some treat it as DSCP (RFC 2474) and initialize all six DSCP bits, some treat it as RFC 1349 TOS and initialize it using RT_TOS() and some treat it as RFC 791 TOS and initialize it using IPTOS_RT_MASK. What is common to all these call sites is that they all initialize the lower three DSCP bits, which fits the TOS definition in the initial IPv4 specification (RFC 791). Therefore, the kernel only allows configuring IPv4 FIB rules that match on the lower three DSCP bits which are always guaranteed to be initialized by all call sites: # ip -4 rule add tos 0x1c table 100 # ip -4 rule add tos 0x3c table 100 Error: Invalid tos. While this works, it is unlikely to be very useful. RFC 791 that initially defined the TOS and IP precedence fields was updated by RFC 2474 over twenty five years ago where these fields were replaced by a single six bits DSCP field. Extending FIB rules to match on DSCP can be done by adding a new DSCP selector while maintaining the existing semantics of the TOS selector for applications that rely on that. A prerequisite for allowing FIB rules to match on DSCP is to adjust all the call sites to initialize the high order DSCP bits and remove their masking along the path to the core where the field is matched on. However, making this change alone will result in a behavior change. For example, a forwarded IPv4 packet with a DS field of 0xfc will no longer match a FIB rule that was configured with 'tos 0x1c'. This behavior change can be avoided by masking the upper three DSCP bits in 'flowi4_tos' before comparing it against the TOS selectors in FIB rules and routes. Implement the above by adding a new function that checks whether a given DSCP value matches the one specified in the IPv4 flow information structure and invoke it from the three places that currently match on 'flowi4_tos'. Use RT_TOS() for the masking of 'flowi4_tos' instead of IPTOS_RT_MASK since the latter is not uAPI and we should be able to remove it at some point. Include in since the former defines IPTOS_TOS_MASK which is used in the definition of RT_TOS() in . No regressions in FIB tests: # ./fib_tests.sh [...] Tests passed: 218 Tests failed: 0 And FIB rule tests: # ./fib_rule_tests.sh [...] Tests passed: 116 Tests failed: 0 Signed-off-by: Ido Schimmel Signed-off-by: Paolo Abeni --- include/net/ip_fib.h | 6 ++++++ include/uapi/linux/in_route.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 72af2f223e59..269ec10f63e4 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -22,6 +22,7 @@ #include #include #include +#include struct fib_config { u8 fc_dst_len; @@ -434,6 +435,11 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net, #endif /* CONFIG_IP_MULTIPLE_TABLES */ +static inline bool fib_dscp_masked_match(dscp_t dscp, const struct flowi4 *fl4) +{ + return dscp == inet_dsfield_to_dscp(RT_TOS(fl4->flowi4_tos)); +} + /* Exported by fib_frontend.c */ extern const struct nla_policy rtm_ipv4_policy[]; void ip_fib_init(void); diff --git a/include/uapi/linux/in_route.h b/include/uapi/linux/in_route.h index 0cc2c23b47f8..10bdd7e7107f 100644 --- a/include/uapi/linux/in_route.h +++ b/include/uapi/linux/in_route.h @@ -2,6 +2,8 @@ #ifndef _LINUX_IN_ROUTE_H #define _LINUX_IN_ROUTE_H +#include + /* IPv4 routing cache flags */ #define RTCF_DEAD RTNH_F_DEAD -- cgit v1.2.3 From acb0184fe9bca3bb7103dadc76ba9d38c969ca86 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 22 Jul 2024 11:11:53 +0100 Subject: coresight: Move struct coresight_trace_id_map to common header The trace ID maps will need to be created and stored by the core and Perf code so move the definition up to the common header. Reviewed-by: Anshuman Khandual Reviewed-by: Mike Leach Signed-off-by: James Clark Tested-by: Leo Yan Tested-by: Ganapatrao Kulkarni Signed-off-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20240722101202.26915-12-james.clark@linaro.org --- include/linux/coresight.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index f09ace92176e..c16c61a8411d 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -218,6 +218,24 @@ struct coresight_sysfs_link { const char *target_name; }; +/* architecturally we have 128 IDs some of which are reserved */ +#define CORESIGHT_TRACE_IDS_MAX 128 + +/** + * Trace ID map. + * + * @used_ids: Bitmap to register available (bit = 0) and in use (bit = 1) IDs. + * Initialised so that the reserved IDs are permanently marked as + * in use. + * @pend_rel_ids: CPU IDs that have been released by the trace source but not + * yet marked as available, to allow re-allocation to the same + * CPU during a perf session. + */ +struct coresight_trace_id_map { + DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX); + DECLARE_BITMAP(pend_rel_ids, CORESIGHT_TRACE_IDS_MAX); +}; + /** * struct coresight_device - representation of a device as used by the framework * @pdata: Platform data with device connections associated to this device. -- cgit v1.2.3 From d53c8253c7822fbc524adcd950e7c6de6a229c99 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 22 Jul 2024 11:11:55 +0100 Subject: coresight: Make CPU id map a property of a trace ID map The global CPU ID mappings won't work for per-sink ID maps so move it to the ID map struct. coresight_trace_id_release_all_pending() is hard coded to operate on the default map, but once Perf sessions use their own maps the pending release mechanism will be deleted. So it doesn't need to be extended to accept a trace ID map argument at this point. Signed-off-by: James Clark Reviewed-by: Mike Leach Tested-by: Leo Yan Signed-off-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20240722101202.26915-14-james.clark@linaro.org --- include/linux/coresight.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index c16c61a8411d..7d62b88bfb5c 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -234,6 +234,7 @@ struct coresight_sysfs_link { struct coresight_trace_id_map { DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX); DECLARE_BITMAP(pend_rel_ids, CORESIGHT_TRACE_IDS_MAX); + atomic_t __percpu *cpu_map; }; /** -- cgit v1.2.3 From 5ad628a7617607baac53745f8025638b967470a2 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 22 Jul 2024 11:11:56 +0100 Subject: coresight: Use per-sink trace ID maps for Perf sessions This will allow sessions with more than CORESIGHT_TRACE_IDS_MAX ETMs as long as there are fewer than that many ETMs connected to each sink. Each sink owns its own trace ID map, and any Perf session connecting to that sink will allocate from it, even if the sink is currently in use by other users. This is similar to the existing behavior where the dynamic trace IDs are constant as long as there is any concurrent Perf session active. It's not completely optimal because slightly more IDs will be used than necessary, but the optimal solution involves tracking the PIDs of each session and allocating ID maps based on the session owner. This is difficult to do with the combination of per-thread and per-cpu modes and some scheduling issues. The complexity of this isn't likely to worth it because even with multiple users they'd just see a difference in the ordering of ID allocations rather than hitting any limits (unless the hardware does have too many ETMs connected to one sink). Signed-off-by: James Clark Reviewed-by: Mike Leach Signed-off-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20240722101202.26915-15-james.clark@linaro.org --- include/linux/coresight.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 7d62b88bfb5c..9c3067e2e38b 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -290,6 +290,7 @@ struct coresight_device { bool sysfs_sink_activated; struct dev_ext_attribute *ea; struct coresight_device *def_sink; + struct coresight_trace_id_map perf_sink_id_map; /* sysfs links between components */ int nr_links; bool has_conns_grp; @@ -384,7 +385,7 @@ struct coresight_ops_link { struct coresight_ops_source { int (*cpu_id)(struct coresight_device *csdev); int (*enable)(struct coresight_device *csdev, struct perf_event *event, - enum cs_mode mode); + enum cs_mode mode, struct coresight_trace_id_map *id_map); void (*disable)(struct coresight_device *csdev, struct perf_event *event); }; -- cgit v1.2.3 From de0029fdde86092c75472c92e56a962f4edee0f6 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 22 Jul 2024 11:11:57 +0100 Subject: coresight: Remove pending trace ID release mechanism Pending the release of IDs was a way of managing concurrent sysfs and Perf sessions in a single global ID map. Perf may have finished while sysfs hadn't, and Perf shouldn't release the IDs in use by sysfs and vice versa. Now that Perf uses its own exclusive ID maps, pending release doesn't result in any different behavior than just releasing all IDs when the last Perf session finishes. As part of the per-sink trace ID change, we would have still had to make the pending mechanism work on a per-sink basis, due to the overlapping ID allocations, so instead of making that more complicated, just remove it. Signed-off-by: James Clark Reviewed-by: Mike Leach Signed-off-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20240722101202.26915-16-james.clark@linaro.org --- include/linux/coresight.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 9c3067e2e38b..197949fd2c35 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -227,14 +227,12 @@ struct coresight_sysfs_link { * @used_ids: Bitmap to register available (bit = 0) and in use (bit = 1) IDs. * Initialised so that the reserved IDs are permanently marked as * in use. - * @pend_rel_ids: CPU IDs that have been released by the trace source but not - * yet marked as available, to allow re-allocation to the same - * CPU during a perf session. + * @perf_cs_etm_session_active: Number of Perf sessions using this ID map. */ struct coresight_trace_id_map { DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX); - DECLARE_BITMAP(pend_rel_ids, CORESIGHT_TRACE_IDS_MAX); atomic_t __percpu *cpu_map; + atomic_t perf_cs_etm_session_active; }; /** -- cgit v1.2.3 From 487eec8da80aef16229d30429f1b26090b1bf0eb Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 22 Jul 2024 11:11:58 +0100 Subject: coresight: Emit sink ID in the HW_ID packets For Perf to be able to decode when per-sink trace IDs are used, emit the sink that's being written to for each ETM. Perf currently errors out if it sees a newer packet version so instead of bumping it, add a new minor version field. This can be used to signify new versions that have backwards compatible fields. Considering this change is only for high core count machines, it doesn't make sense to make a breaking change for everyone. Signed-off-by: James Clark Tested-by: Leo Yan Reviewed-by: Mike Leach Signed-off-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20240722101202.26915-17-james.clark@linaro.org --- include/linux/coresight-pmu.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index 51ac441a37c3..89b0ac0014b0 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -49,12 +49,21 @@ * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload. * Used to associate a CPU with the CoreSight Trace ID. * [07:00] - Trace ID - uses 8 bits to make value easy to read in file. - * [59:08] - Unused (SBZ) - * [63:60] - Version + * [39:08] - Sink ID - as reported in /sys/bus/event_source/devices/cs_etm/sinks/ + * Added in minor version 1. + * [55:40] - Unused (SBZ) + * [59:56] - Minor Version - previously existing fields are compatible with + * all minor versions. + * [63:60] - Major Version - previously existing fields mean different things + * in new major versions. */ #define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0) -#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60) +#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8) -#define CS_AUX_HW_ID_CURR_VERSION 0 +#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56) +#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60) + +#define CS_AUX_HW_ID_MAJOR_VERSION 0 +#define CS_AUX_HW_ID_MINOR_VERSION 1 #endif -- cgit v1.2.3 From 988d40a4d4e7d671305bea501562a5d1a1d479fa Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 22 Jul 2024 11:11:59 +0100 Subject: coresight: Make trace ID map spinlock local to the map Reduce contention on the lock by replacing the global lock with one for each map. Signed-off-by: James Clark Reviewed-by: Mike Leach Signed-off-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20240722101202.26915-18-james.clark@linaro.org --- include/linux/coresight.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 197949fd2c35..c13342594278 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -233,6 +233,7 @@ struct coresight_trace_id_map { DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX); atomic_t __percpu *cpu_map; atomic_t perf_cs_etm_session_active; + spinlock_t lock; }; /** -- cgit v1.2.3 From 7c432a18ad216b4f7b08e93287586d60e12a3b7b Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 20 Aug 2024 15:27:55 +0100 Subject: firmware: arm_ffa: Update the FF-A command list with v1.2 additions Arm Firmware Framework for A-profile(FFA) v1.2 introduces register based discovery mechanism and direct messaging extensions that enables to target specific UUID within a partition. Let us add all the newly supported FF-A function IDs in the spec. Also update to the error values and associated handling. Message-Id: <20240820-ffa_v1-2-v2-2-18c0c5f3c65e@arm.com> Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 89683f31ae12..b34f0c0dc2c5 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -73,6 +73,11 @@ #define FFA_FN64_MEM_PERM_GET FFA_SMC_64(0x88) #define FFA_MEM_PERM_SET FFA_SMC_32(0x89) #define FFA_FN64_MEM_PERM_SET FFA_SMC_64(0x89) +#define FFA_CONSOLE_LOG FFA_SMC_32(0x8A) +#define FFA_PARTITION_INFO_GET_REGS FFA_SMC_64(0x8B) +#define FFA_EL3_INTR_HANDLE FFA_SMC_32(0x8C) +#define FFA_MSG_SEND_DIRECT_REQ2 FFA_SMC_64(0x8D) +#define FFA_MSG_SEND_DIRECT_RESP2 FFA_SMC_64(0x8E) /* * For some calls it is necessary to use SMC64 to pass or return 64-bit values. -- cgit v1.2.3 From aaef3bc98129c86078b336f16788dd733b0728a4 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 20 Aug 2024 15:27:58 +0100 Subject: firmware: arm_ffa: Add support for FFA_MSG_SEND_DIRECT_{REQ,RESP}2 FFA_MSG_SEND_DIRECT_{REQ,RESP} supported only x3-x7 to pass implementation defined values as part of the message. This may not be sufficient sometimes and also it would be good to use all the registers supported by SMCCC v1.2 (x0-x17) for such register based communication. Also another limitation with the FFA_MSG_SEND_DIRECT_{REQ,RESP} is the ability to target a specific service within the partition based on it's UUID. In order to address both of the above limitation, FF-A v1.2 introduced FFA_MSG_SEND_DIRECT_{REQ,RESP}2 which has the ability to target the message to a specific service based on its UUID within a partition as well as utilise all the available registers(x4-x17 specifically) for the communication. This change adds support for FFA_MSG_SEND_DIRECT_REQ2 and FFA_MSG_SEND_DIRECT_RESP2. Message-Id: <20240820-ffa_v1-2-v2-5-18c0c5f3c65e@arm.com> Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index b34f0c0dc2c5..a28e2a6a13d0 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -270,6 +270,11 @@ struct ffa_indirect_msg_hdr { u32 size; }; +/* For use with FFA_MSG_SEND_DIRECT_{REQ,RESP}2 which pass data via registers */ +struct ffa_send_direct_data2 { + unsigned long data[14]; /* x4-x17 */ +}; + struct ffa_mem_region_addr_range { /* The base IPA of the constituent memory region, aligned to 4 kiB */ u64 address; @@ -431,6 +436,8 @@ struct ffa_msg_ops { int (*sync_send_receive)(struct ffa_device *dev, struct ffa_send_direct_data *data); int (*indirect_send)(struct ffa_device *dev, void *buf, size_t sz); + int (*sync_send_receive2)(struct ffa_device *dev, const uuid_t *uuid, + struct ffa_send_direct_data2 *data); }; struct ffa_mem_ops { -- cgit v1.2.3 From e68ac2b488495fa4d127d6105ce633849859957a Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Thu, 15 Aug 2024 11:15:40 -0600 Subject: softirq: Remove unused 'action' parameter from action callback When soft interrupt actions are called, they are passed a pointer to the struct softirq action which contains the action's function pointer. This pointer isn't useful, as the action callback already knows what function it is. And since each callback handles a specific soft interrupt, the callback also knows which soft interrupt number is running. No soft interrupt action callback actually uses this parameter, so remove it from the function pointer signature. This clarifies that soft interrupt actions are global routines and makes it slightly cheaper to call them. Signed-off-by: Caleb Sander Mateos Signed-off-by: Thomas Gleixner Reviewed-by: Jens Axboe Link: https://lore.kernel.org/all/20240815171549.3260003-1-csander@purestorage.com --- include/linux/interrupt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 3f30c88e0b4c..694de61e0b38 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -594,7 +594,7 @@ extern const char * const softirq_to_name[NR_SOFTIRQS]; struct softirq_action { - void (*action)(struct softirq_action *); + void (*action)(void); }; asmlinkage void do_softirq(void); @@ -609,7 +609,7 @@ static inline void do_softirq_post_smp_call_flush(unsigned int unused) } #endif -extern void open_softirq(int nr, void (*action)(struct softirq_action *)); +extern void open_softirq(int nr, void (*action)(void)); extern void softirq_init(void); extern void __raise_softirq_irqoff(unsigned int nr); -- cgit v1.2.3 From 2fea0c26b82f304f43b3905e56d954cf98a6d0e9 Mon Sep 17 00:00:00 2001 From: Fan Wu Date: Fri, 2 Aug 2024 23:08:19 -0700 Subject: initramfs,lsm: add a security hook to do_populate_rootfs() This patch introduces a new hook to notify security system that the content of initramfs has been unpacked into the rootfs. Upon receiving this notification, the security system can activate a policy to allow only files that originated from the initramfs to execute or load into kernel during the early stages of booting. This approach is crucial for minimizing the attack surface by ensuring that only trusted files from the initramfs are operational in the critical boot phase. Signed-off-by: Fan Wu [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 ++ include/linux/security.h | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 520730fe2d94..22a14fc794fe 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -449,3 +449,5 @@ LSM_HOOK(int, 0, uring_override_creds, const struct cred *new) LSM_HOOK(int, 0, uring_sqpoll, void) LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd) #endif /* CONFIG_IO_URING */ + +LSM_HOOK(void, LSM_RET_VOID, initramfs_populated, void) diff --git a/include/linux/security.h b/include/linux/security.h index 62233fec8ead..3298855abdbc 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2256,4 +2256,12 @@ static inline int security_uring_cmd(struct io_uring_cmd *ioucmd) #endif /* CONFIG_SECURITY */ #endif /* CONFIG_IO_URING */ +#ifdef CONFIG_SECURITY +extern void security_initramfs_populated(void); +#else +static inline void security_initramfs_populated(void) +{ +} +#endif /* CONFIG_SECURITY */ + #endif /* ! __LINUX_SECURITY_H */ -- cgit v1.2.3 From 7138679ff2a2b1674f16618558d6cabea6ab2c53 Mon Sep 17 00:00:00 2001 From: Fan Wu Date: Fri, 2 Aug 2024 23:08:21 -0700 Subject: lsm: add new securityfs delete function When deleting a directory in the security file system, the existing securityfs_remove requires the directory to be empty, otherwise it will do nothing. This leads to a potential risk that the security file system might be in an unclean state when the intended deletion did not happen. This commit introduces a new function securityfs_recursive_remove to recursively delete a directory without leaving an unclean state. Co-developed-by: Christian Brauner (Microsoft) Signed-off-by: Fan Wu [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/security.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index 3298855abdbc..f6d2bc69cfa6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2090,6 +2090,7 @@ struct dentry *securityfs_create_symlink(const char *name, const char *target, const struct inode_operations *iops); extern void securityfs_remove(struct dentry *dentry); +extern void securityfs_recursive_remove(struct dentry *dentry); #else /* CONFIG_SECURITYFS */ -- cgit v1.2.3 From f44554b5067b36c14cc91ed811fa1bd58baed34a Mon Sep 17 00:00:00 2001 From: Deven Bowers Date: Fri, 2 Aug 2024 23:08:23 -0700 Subject: audit,ipe: add IPE auditing support Users of IPE require a way to identify when and why an operation fails, allowing them to both respond to violations of policy and be notified of potentially malicious actions on their systems with respect to IPE itself. This patch introduces 3 new audit events. AUDIT_IPE_ACCESS(1420) indicates the result of an IPE policy evaluation of a resource. AUDIT_IPE_CONFIG_CHANGE(1421) indicates the current active IPE policy has been changed to another loaded policy. AUDIT_IPE_POLICY_LOAD(1422) indicates a new IPE policy has been loaded into the kernel. This patch also adds support for success auditing, allowing users to identify why an allow decision was made for a resource. However, it is recommended to use this option with caution, as it is quite noisy. Here are some examples of the new audit record types: AUDIT_IPE_ACCESS(1420): audit: AUDIT1420 ipe_op=EXECUTE ipe_hook=BPRM_CHECK enforcing=1 pid=297 comm="sh" path="/root/vol/bin/hello" dev="tmpfs" ino=3897 rule="op=EXECUTE boot_verified=TRUE action=ALLOW" audit: AUDIT1420 ipe_op=EXECUTE ipe_hook=BPRM_CHECK enforcing=1 pid=299 comm="sh" path="/mnt/ipe/bin/hello" dev="dm-0" ino=2 rule="DEFAULT action=DENY" audit: AUDIT1420 ipe_op=EXECUTE ipe_hook=BPRM_CHECK enforcing=1 pid=300 path="/tmp/tmpdp2h1lub/deny/bin/hello" dev="tmpfs" ino=131 rule="DEFAULT action=DENY" The above three records were generated when the active IPE policy only allows binaries from the initramfs to run. The three identical `hello` binary were placed at different locations, only the first hello from the rootfs(initramfs) was allowed. Field ipe_op followed by the IPE operation name associated with the log. Field ipe_hook followed by the name of the LSM hook that triggered the IPE event. Field enforcing followed by the enforcement state of IPE. (it will be introduced in the next commit) Field pid followed by the pid of the process that triggered the IPE event. Field comm followed by the command line program name of the process that triggered the IPE event. Field path followed by the file's path name. Field dev followed by the device name as found in /dev where the file is from. Note that for device mappers it will use the name `dm-X` instead of the name in /dev/mapper. For a file in a temp file system, which is not from a device, it will use `tmpfs` for the field. The implementation of this part is following another existing use case LSM_AUDIT_DATA_INODE in security/lsm_audit.c Field ino followed by the file's inode number. Field rule followed by the IPE rule made the access decision. The whole rule must be audited because the decision is based on the combination of all property conditions in the rule. Along with the syscall audit event, user can know why a blocked happened. For example: audit: AUDIT1420 ipe_op=EXECUTE ipe_hook=BPRM_CHECK enforcing=1 pid=2138 comm="bash" path="/mnt/ipe/bin/hello" dev="dm-0" ino=2 rule="DEFAULT action=DENY" audit[1956]: SYSCALL arch=c000003e syscall=59 success=no exit=-13 a0=556790138df0 a1=556790135390 a2=5567901338b0 a3=ab2a41a67f4f1f4e items=1 ppid=147 pid=1956 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=4294967295 comm="bash" exe="/usr/bin/bash" key=(null) The above two records showed bash used execve to run "hello" and got blocked by IPE. Note that the IPE records are always prior to a SYSCALL record. AUDIT_IPE_CONFIG_CHANGE(1421): audit: AUDIT1421 old_active_pol_name="Allow_All" old_active_pol_version=0.0.0 old_policy_digest=sha256:E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649 new_active_pol_name="boot_verified" new_active_pol_version=0.0.0 new_policy_digest=sha256:820EEA5B40CA42B51F68962354BA083122A20BB846F auid=4294967295 ses=4294967295 lsm=ipe res=1 The above record showed the current IPE active policy switch from `Allow_All` to `boot_verified` along with the version and the hash digest of the two policies. Note IPE can only have one policy active at a time, all access decision evaluation is based on the current active policy. The normal procedure to deploy a policy is loading the policy to deploy into the kernel first, then switch the active policy to it. AUDIT_IPE_POLICY_LOAD(1422): audit: AUDIT1422 policy_name="boot_verified" policy_version=0.0.0 policy_digest=sha256:820EEA5B40CA42B51F68962354BA083122A20BB846F2676 auid=4294967295 ses=4294967295 lsm=ipe res=1 The above record showed a new policy has been loaded into the kernel with the policy name, policy version and policy hash. Signed-off-by: Deven Bowers Signed-off-by: Fan Wu [PM: subject line tweak] Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index d676ed2b246e..75e21a135483 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -143,6 +143,9 @@ #define AUDIT_MAC_UNLBL_STCDEL 1417 /* NetLabel: del a static label */ #define AUDIT_MAC_CALIPSO_ADD 1418 /* NetLabel: add CALIPSO DOI entry */ #define AUDIT_MAC_CALIPSO_DEL 1419 /* NetLabel: del CALIPSO DOI entry */ +#define AUDIT_IPE_ACCESS 1420 /* IPE denial or grant */ +#define AUDIT_IPE_CONFIG_CHANGE 1421 /* IPE config change */ +#define AUDIT_IPE_POLICY_LOAD 1422 /* IPE policy load */ #define AUDIT_FIRST_KERN_ANOM_MSG 1700 #define AUDIT_LAST_KERN_ANOM_MSG 1799 -- cgit v1.2.3 From b55d26bd1891423a3cdccf816b386aec8bbefc87 Mon Sep 17 00:00:00 2001 From: Deven Bowers Date: Fri, 2 Aug 2024 23:08:25 -0700 Subject: block,lsm: add LSM blob and new LSM hooks for block devices This patch introduces a new LSM blob to the block_device structure, enabling the security subsystem to store security-sensitive data related to block devices. Currently, for a device mapper's mapped device containing a dm-verity target, critical security information such as the roothash and its signing state are not readily accessible. Specifically, while the dm-verity volume creation process passes the dm-verity roothash and its signature from userspace to the kernel, the roothash is stored privately within the dm-verity target, and its signature is discarded post-verification. This makes it extremely hard for the security subsystem to utilize these data. With the addition of the LSM blob to the block_device structure, the security subsystem can now retain and manage important security metadata such as the roothash and the signing state of a dm-verity by storing them inside the blob. Access decisions can then be based on these stored data. The implementation follows the same approach used for security blobs in other structures like struct file, struct inode, and struct superblock. The initialization of the security blob occurs after the creation of the struct block_device, performed by the security subsystem. Similarly, the security blob is freed by the security subsystem before the struct block_device is deallocated or freed. This patch also introduces a new hook security_bdev_setintegrity() to save block device's integrity data to the new LSM blob. For example, for dm-verity, it can use this hook to expose its roothash and signing state to LSMs, then LSMs can save these data into the LSM blob. Please note that the new hook should be invoked every time the security information is updated to keep these data current. For example, in dm-verity, if the mapping table is reloaded and configured to use a different dm-verity target with a new roothash and signing information, the previously stored data in the LSM blob will become obsolete. It is crucial to re-invoke the hook to refresh these data and ensure they are up to date. This necessity arises from the design of device-mapper, where a device-mapper device is first created, and then targets are subsequently loaded into it. These targets can be modified multiple times during the device's lifetime. Therefore, while the LSM blob is allocated during the creation of the block device, its actual contents are not initialized at this stage and can change substantially over time. This includes alterations from data that the LSM 'trusts' to those it does not, making it essential to handle these changes correctly. Failure to address this dynamic aspect could potentially allow for bypassing LSM checks. Signed-off-by: Deven Bowers Signed-off-by: Fan Wu [PM: merge fuzz, subject line tweaks] Signed-off-by: Paul Moore --- include/linux/blk_types.h | 3 +++ include/linux/lsm_hook_defs.h | 5 +++++ include/linux/lsm_hooks.h | 1 + include/linux/security.h | 26 ++++++++++++++++++++++++++ 4 files changed, 35 insertions(+) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 36ed96133217..413ebdff974b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -71,6 +71,9 @@ struct block_device { struct partition_meta_info *bd_meta_info; int bd_writers; +#ifdef CONFIG_SECURITY + void *bd_security; +#endif /* * keep this out-of-line as it's both big and not needed in the fast * path diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 22a14fc794fe..860821f3bf6f 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -451,3 +451,8 @@ LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd) #endif /* CONFIG_IO_URING */ LSM_HOOK(void, LSM_RET_VOID, initramfs_populated, void) + +LSM_HOOK(int, 0, bdev_alloc_security, struct block_device *bdev) +LSM_HOOK(void, LSM_RET_VOID, bdev_free_security, struct block_device *bdev) +LSM_HOOK(int, 0, bdev_setintegrity, struct block_device *bdev, + enum lsm_integrity_type type, const void *value, size_t size) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 11ea0063228f..4687985b9175 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -83,6 +83,7 @@ struct lsm_blob_sizes { int lbs_task; int lbs_xattr_count; /* number of xattr slots in new_xattrs array */ int lbs_tun_dev; + int lbs_bdev; }; /* diff --git a/include/linux/security.h b/include/linux/security.h index f6d2bc69cfa6..d7cab2d5002f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -83,6 +83,10 @@ enum lsm_event { LSM_POLICY_CHANGE, }; +enum lsm_integrity_type { + __LSM_INT_MAX +}; + /* * These are reasons that can be passed to the security_locked_down() * LSM hook. Lockdown reasons that protect kernel integrity (ie, the @@ -509,6 +513,11 @@ int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); int security_locked_down(enum lockdown_reason what); int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len, void *val, size_t val_len, u64 id, u64 flags); +int security_bdev_alloc(struct block_device *bdev); +void security_bdev_free(struct block_device *bdev); +int security_bdev_setintegrity(struct block_device *bdev, + enum lsm_integrity_type type, const void *value, + size_t size); #else /* CONFIG_SECURITY */ static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) @@ -1483,6 +1492,23 @@ static inline int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, { return -EOPNOTSUPP; } + +static inline int security_bdev_alloc(struct block_device *bdev) +{ + return 0; +} + +static inline void security_bdev_free(struct block_device *bdev) +{ +} + +static inline int security_bdev_setintegrity(struct block_device *bdev, + enum lsm_integrity_type type, + const void *value, size_t size) +{ + return 0; +} + #endif /* CONFIG_SECURITY */ #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) -- cgit v1.2.3 From a6af7bc3d72ff52c5526a392144347fcb3094149 Mon Sep 17 00:00:00 2001 From: Deven Bowers Date: Fri, 2 Aug 2024 23:08:26 -0700 Subject: dm-verity: expose root hash digest and signature data to LSMs dm-verity provides a strong guarantee of a block device's integrity. As a generic way to check the integrity of a block device, it provides those integrity guarantees to its higher layers, including the filesystem level. However, critical security metadata like the dm-verity roothash and its signing information are not easily accessible to the LSMs. To address this limitation, this patch introduces a mechanism to store and manage these essential security details within a newly added LSM blob in the block_device structure. This addition allows LSMs to make access control decisions on the integrity data stored within the block_device, enabling more flexible security policies. For instance, LSMs can now revoke access to dm-verity devices based on their roothashes, ensuring that only authorized and verified content is accessible. Additionally, LSMs can enforce policies to only allow files from dm-verity devices that have a valid digital signature to execute, effectively blocking any unsigned files from execution, thus enhancing security against unauthorized modifications. The patch includes new hook calls, `security_bdev_setintegrity()`, in dm-verity to expose the dm-verity roothash and the roothash signature to LSMs via preresume() callback. By using the preresume() callback, it ensures that the security metadata is consistently in sync with the metadata of the dm-verity target in the current active mapping table. The hook calls are depended on CONFIG_SECURITY. Signed-off-by: Deven Bowers Signed-off-by: Fan Wu Reviewed-by: Mikulas Patocka [PM: moved sig_size field as discussed] Signed-off-by: Paul Moore --- include/linux/security.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index d7cab2d5002f..e383022467db 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -83,8 +83,15 @@ enum lsm_event { LSM_POLICY_CHANGE, }; +struct dm_verity_digest { + const char *alg; + const u8 *digest; + size_t digest_len; +}; + enum lsm_integrity_type { - __LSM_INT_MAX + LSM_INT_DMVERITY_SIG_VALID, + LSM_INT_DMVERITY_ROOTHASH, }; /* -- cgit v1.2.3 From fb55e177d5936fb80fb2586036d195c57e7f6892 Mon Sep 17 00:00:00 2001 From: Fan Wu Date: Fri, 2 Aug 2024 23:08:28 -0700 Subject: lsm: add security_inode_setintegrity() hook This patch introduces a new hook to save inode's integrity data. For example, for fsverity enabled files, LSMs can use this hook to save the existence of verified fsverity builtin signature into the inode's security blob, and LSMs can make access decisions based on this data. Signed-off-by: Fan Wu [PM: subject line tweak, removed changelog] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 ++ include/linux/security.h | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 860821f3bf6f..1d59513bf230 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -180,6 +180,8 @@ LSM_HOOK(void, LSM_RET_VOID, inode_getsecid, struct inode *inode, u32 *secid) LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new) LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, struct dentry *src, const char *name) +LSM_HOOK(int, 0, inode_setintegrity, const struct inode *inode, + enum lsm_integrity_type type, const void *value, size_t size) LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir, struct kernfs_node *kn) LSM_HOOK(int, 0, file_permission, struct file *file, int mask) diff --git a/include/linux/security.h b/include/linux/security.h index e383022467db..97b7c57e6560 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -410,6 +410,9 @@ int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer void security_inode_getsecid(struct inode *inode, u32 *secid); int security_inode_copy_up(struct dentry *src, struct cred **new); int security_inode_copy_up_xattr(struct dentry *src, const char *name); +int security_inode_setintegrity(const struct inode *inode, + enum lsm_integrity_type type, const void *value, + size_t size); int security_kernfs_init_security(struct kernfs_node *kn_dir, struct kernfs_node *kn); int security_file_permission(struct file *file, int mask); @@ -1026,6 +1029,13 @@ static inline int security_inode_copy_up(struct dentry *src, struct cred **new) return 0; } +static inline int security_inode_setintegrity(const struct inode *inode, + enum lsm_integrity_type type, + const void *value, size_t size) +{ + return 0; +} + static inline int security_kernfs_init_security(struct kernfs_node *kn_dir, struct kernfs_node *kn) { -- cgit v1.2.3 From 7c373e4f1445263728d3eeab7e33e932c8f4a288 Mon Sep 17 00:00:00 2001 From: Fan Wu Date: Fri, 2 Aug 2024 23:08:29 -0700 Subject: fsverity: expose verified fsverity built-in signatures to LSMs This patch enhances fsverity's capabilities to support both integrity and authenticity protection by introducing the exposure of built-in signatures through a new LSM hook. This functionality allows LSMs, e.g. IPE, to enforce policies based on the authenticity and integrity of files, specifically focusing on built-in fsverity signatures. It enables a policy enforcement layer within LSMs for fsverity, offering granular control over the usage of authenticity claims. For instance, a policy could be established to only permit the execution of all files with verified built-in fsverity signatures. The introduction of a security_inode_setintegrity() hook call within fsverity's workflow ensures that the verified built-in signature of a file is exposed to LSMs. This enables LSMs to recognize and label fsverity files that contain a verified built-in fsverity signature. This hook is invoked subsequent to the fsverity_verify_signature() process, guaranteeing the signature's verification against fsverity's keyring. This mechanism is crucial for maintaining system security, as it operates in kernel space, effectively thwarting attempts by malicious binaries to bypass user space stack interactions. The second to last commit in this patch set will add a link to the IPE documentation in fsverity.rst. Signed-off-by: Deven Bowers Signed-off-by: Fan Wu Acked-by: Eric Biggers Signed-off-by: Paul Moore --- include/linux/security.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index 97b7c57e6560..c37c32ebbdcd 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -92,6 +92,7 @@ struct dm_verity_digest { enum lsm_integrity_type { LSM_INT_DMVERITY_SIG_VALID, LSM_INT_DMVERITY_ROOTHASH, + LSM_INT_FSVERITY_BUILTINSIG_VALID, }; /* -- cgit v1.2.3 From 7cff549daa67c7549c5a8688674cea2df8c2ec80 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 16 Aug 2024 17:43:05 +0200 Subject: kernel: Add helper macros for loop unrolling This helps in easily initializing blocks of code (e.g. static calls and keys). UNROLL(N, MACRO, __VA_ARGS__) calls MACRO N times with the first argument as the index of the iteration. This allows string pasting to create unique tokens for variable names, function calls etc. As an example: #include #define MACRO(N, a, b) \ int add_##N(int a, int b) \ { \ return a + b + N; \ } UNROLL(2, MACRO, x, y) expands to: int add_0(int x, int y) { return x + y + 0; } int add_1(int x, int y) { return x + y + 1; } Tested-by: Guenter Roeck Reviewed-by: Kees Cook Reviewed-by: Casey Schaufler Reviewed-by: John Johansen Acked-by: Jiri Olsa Acked-by: Song Liu Acked-by: Andrii Nakryiko Acked-by: Casey Schaufler Nacked-by: Tetsuo Handa Signed-off-by: KP Singh Signed-off-by: Paul Moore --- include/linux/unroll.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 include/linux/unroll.h (limited to 'include') diff --git a/include/linux/unroll.h b/include/linux/unroll.h new file mode 100644 index 000000000000..d42fd6366373 --- /dev/null +++ b/include/linux/unroll.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (C) 2023 Google LLC. + */ + +#ifndef __UNROLL_H +#define __UNROLL_H + +#include + +#define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args) + +#define __UNROLL_0(MACRO, args...) +#define __UNROLL_1(MACRO, args...) __UNROLL_0(MACRO, args) MACRO(0, args) +#define __UNROLL_2(MACRO, args...) __UNROLL_1(MACRO, args) MACRO(1, args) +#define __UNROLL_3(MACRO, args...) __UNROLL_2(MACRO, args) MACRO(2, args) +#define __UNROLL_4(MACRO, args...) __UNROLL_3(MACRO, args) MACRO(3, args) +#define __UNROLL_5(MACRO, args...) __UNROLL_4(MACRO, args) MACRO(4, args) +#define __UNROLL_6(MACRO, args...) __UNROLL_5(MACRO, args) MACRO(5, args) +#define __UNROLL_7(MACRO, args...) __UNROLL_6(MACRO, args) MACRO(6, args) +#define __UNROLL_8(MACRO, args...) __UNROLL_7(MACRO, args) MACRO(7, args) +#define __UNROLL_9(MACRO, args...) __UNROLL_8(MACRO, args) MACRO(8, args) +#define __UNROLL_10(MACRO, args...) __UNROLL_9(MACRO, args) MACRO(9, args) +#define __UNROLL_11(MACRO, args...) __UNROLL_10(MACRO, args) MACRO(10, args) +#define __UNROLL_12(MACRO, args...) __UNROLL_11(MACRO, args) MACRO(11, args) +#define __UNROLL_13(MACRO, args...) __UNROLL_12(MACRO, args) MACRO(12, args) +#define __UNROLL_14(MACRO, args...) __UNROLL_13(MACRO, args) MACRO(13, args) +#define __UNROLL_15(MACRO, args...) __UNROLL_14(MACRO, args) MACRO(14, args) +#define __UNROLL_16(MACRO, args...) __UNROLL_15(MACRO, args) MACRO(15, args) +#define __UNROLL_17(MACRO, args...) __UNROLL_16(MACRO, args) MACRO(16, args) +#define __UNROLL_18(MACRO, args...) __UNROLL_17(MACRO, args) MACRO(17, args) +#define __UNROLL_19(MACRO, args...) __UNROLL_18(MACRO, args) MACRO(18, args) +#define __UNROLL_20(MACRO, args...) __UNROLL_19(MACRO, args) MACRO(19, args) + +#endif /* __UNROLL_H */ -- cgit v1.2.3 From 9b59a85a84dc37ca4f2c54df5e06aff4c1eae5d3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 12:38:08 -0700 Subject: workqueue: Don't call va_start / va_end twice Calling va_start / va_end multiple times is undefined and causes problems with certain compiler / platforms. Change alloc_ordered_workqueue_lockdep_map to a macro and updated __alloc_workqueue to take a va_list argument. Cc: Sergey Senozhatsky Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Matthew Brost Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 04dff07a9e2b..f3cc15940b4d 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -543,20 +543,8 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ -__printf(1, 4) static inline struct workqueue_struct * -alloc_ordered_workqueue_lockdep_map(const char *fmt, unsigned int flags, - struct lockdep_map *lockdep_map, ...) -{ - struct workqueue_struct *wq; - va_list args; - - va_start(args, lockdep_map); - wq = alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | flags, - 1, lockdep_map, args); - va_end(args); - - return wq; -} +#define alloc_ordered_workqueue_lockdep_map(fmt, flags, lockdep_map, args...) \ + alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, lockdep_map, ##args) #endif /** -- cgit v1.2.3 From 051c86afc342aed1f84d66ff5d09dc9e1c1685a1 Mon Sep 17 00:00:00 2001 From: Tejas Vipin Date: Sun, 18 Aug 2024 11:38:15 +0530 Subject: drm/mipi-dsi: Add mipi_dsi_dcs_set_tear_scanline_multi mipi_dsi_dcs_set_tear_scanline_multi can heavily benefit from being converted to a multi style function as it is often called in the context of similar functions. Reviewed-by: Douglas Anderson Signed-off-by: Tejas Vipin Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240818060816.848784-2-tejasvipin76@gmail.com --- include/drm/drm_mipi_dsi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index b78aae45cae7..f725f8654611 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -375,6 +375,8 @@ void mipi_dsi_dcs_set_column_address_multi(struct mipi_dsi_multi_context *ctx, u16 start, u16 end); void mipi_dsi_dcs_set_page_address_multi(struct mipi_dsi_multi_context *ctx, u16 start, u16 end); +void mipi_dsi_dcs_set_tear_scanline_multi(struct mipi_dsi_multi_context *ctx, + u16 scanline); /** * mipi_dsi_generic_write_seq - transmit data using a generic write packet -- cgit v1.2.3 From 13cfd6a6d7ac78e845768278ab745acbd19c43ce Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 18 Aug 2024 14:43:51 +0300 Subject: net: Silence false field-spanning write warning in metadata_dst memcpy When metadata_dst struct is allocated (using metadata_dst_alloc()), it reserves room for options at the end of the struct. Change the memcpy() to unsafe_memcpy() as it is guaranteed that enough room (md_size bytes) was allocated and the field-spanning write is intentional. This resolves the following warning: ------------[ cut here ]------------ memcpy: detected field-spanning write (size 104) of single field "&new_md->u.tun_info" at include/net/dst_metadata.h:166 (size 96) WARNING: CPU: 2 PID: 391470 at include/net/dst_metadata.h:166 tun_dst_unclone+0x114/0x138 [geneve] Modules linked in: act_tunnel_key geneve ip6_udp_tunnel udp_tunnel act_vlan act_mirred act_skbedit cls_matchall nfnetlink_cttimeout act_gact cls_flower sch_ingress sbsa_gwdt ipmi_devintf ipmi_msghandler xfrm_interface xfrm6_tunnel tunnel6 tunnel4 xfrm_user xfrm_algo nvme_fabrics overlay optee openvswitch nsh nf_conncount ib_srp scsi_transport_srp rpcrdma rdma_ucm ib_iser rdma_cm ib_umad iw_cm libiscsi ib_ipoib scsi_transport_iscsi ib_cm uio_pdrv_genirq uio mlxbf_pmc pwr_mlxbf mlxbf_bootctl bluefield_edac nft_chain_nat binfmt_misc xt_MASQUERADE nf_nat xt_tcpmss xt_NFLOG nfnetlink_log xt_recent xt_hashlimit xt_state xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xt_mark xt_comment ipt_REJECT nf_reject_ipv4 nft_compat nf_tables nfnetlink sch_fq_codel dm_multipath fuse efi_pstore ip_tables btrfs blake2b_generic raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor xor_neon raid6_pq raid1 raid0 nvme nvme_core mlx5_ib ib_uverbs ib_core ipv6 crc_ccitt mlx5_core crct10dif_ce mlxfw psample i2c_mlxbf gpio_mlxbf2 mlxbf_gige mlxbf_tmfifo CPU: 2 PID: 391470 Comm: handler6 Not tainted 6.10.0-rc1 #1 Hardware name: https://www.mellanox.com BlueField SoC/BlueField SoC, BIOS 4.5.0.12993 Dec 6 2023 pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : tun_dst_unclone+0x114/0x138 [geneve] lr : tun_dst_unclone+0x114/0x138 [geneve] sp : ffffffc0804533f0 x29: ffffffc0804533f0 x28: 000000000000024e x27: 0000000000000000 x26: ffffffdcfc0e8e40 x25: ffffff8086fa6600 x24: ffffff8096a0c000 x23: 0000000000000068 x22: 0000000000000008 x21: ffffff8092ad7000 x20: ffffff8081e17900 x19: ffffff8092ad7900 x18: 00000000fffffffd x17: 0000000000000000 x16: ffffffdcfa018488 x15: 695f6e75742e753e x14: 2d646d5f77656e26 x13: 6d5f77656e262220 x12: 646c65696620656c x11: ffffffdcfbe33ae8 x10: ffffffdcfbe1baa8 x9 : ffffffdcfa0a4c10 x8 : 0000000000017fe8 x7 : c0000000ffffefff x6 : 0000000000000001 x5 : ffffff83fdeeb010 x4 : 0000000000000000 x3 : 0000000000000027 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffffff80913f6780 Call trace: tun_dst_unclone+0x114/0x138 [geneve] geneve_xmit+0x214/0x10e0 [geneve] dev_hard_start_xmit+0xc0/0x220 __dev_queue_xmit+0xa14/0xd38 dev_queue_xmit+0x14/0x28 [openvswitch] ovs_vport_send+0x98/0x1c8 [openvswitch] do_output+0x80/0x1a0 [openvswitch] do_execute_actions+0x172c/0x1958 [openvswitch] ovs_execute_actions+0x64/0x1a8 [openvswitch] ovs_packet_cmd_execute+0x258/0x2d8 [openvswitch] genl_family_rcv_msg_doit+0xc8/0x138 genl_rcv_msg+0x1ec/0x280 netlink_rcv_skb+0x64/0x150 genl_rcv+0x40/0x60 netlink_unicast+0x2e4/0x348 netlink_sendmsg+0x1b0/0x400 __sock_sendmsg+0x64/0xc0 ____sys_sendmsg+0x284/0x308 ___sys_sendmsg+0x88/0xf0 __sys_sendmsg+0x70/0xd8 __arm64_sys_sendmsg+0x2c/0x40 invoke_syscall+0x50/0x128 el0_svc_common.constprop.0+0x48/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x38/0x100 el0t_64_sync_handler+0xc0/0xc8 el0t_64_sync+0x1a4/0x1a8 ---[ end trace 0000000000000000 ]--- Reviewed-by: Cosmin Ratiu Reviewed-by: Tariq Toukan Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20240818114351.3612692-1-gal@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/dst_metadata.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 4160731dcb6e..84c15402931c 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -163,8 +163,11 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) if (!new_md) return ERR_PTR(-ENOMEM); - memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, - sizeof(struct ip_tunnel_info) + md_size); + unsafe_memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, + sizeof(struct ip_tunnel_info) + md_size, + /* metadata_dst_alloc() reserves room (md_size bytes) for + * options right after the ip_tunnel_info struct. + */); #ifdef CONFIG_DST_CACHE /* Unclone the dst cache if there is one */ if (new_md->u.tun_info.dst_cache.cache) { -- cgit v1.2.3 From 8fb9f31984bdc0aaa1b0f7c7e7a27bd3137f8744 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Thu, 1 Aug 2024 09:33:51 +0800 Subject: f2fs: clean up val{>>,<<}F2FS_BLKSIZE_BITS Use F2FS_BYTES_TO_BLK(bytes) and F2FS_BLK_TO_BYTES(blk) for cleanup Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 01bee2b289c2..f17f89a259e2 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -19,7 +19,6 @@ #define F2FS_BLKSIZE_BITS PAGE_SHIFT /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ #define F2FS_EXTENSION_LEN 8 /* max size of extension */ -#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ @@ -28,6 +27,7 @@ #define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) #define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) #define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1) +#define F2FS_BLK_ALIGN(x) (F2FS_BYTES_TO_BLK((x) + F2FS_BLKSIZE - 1)) /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 -- cgit v1.2.3 From 013f510d1fce563f37f420bf231b065885a16f21 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 1 Aug 2024 21:31:21 +0800 Subject: dm: Remove unused declaration dm_get_rq_mapinfo() Commit ae6ad75e5c3c ("dm: remove unused dm_get_rq_mapinfo()") removed the implementation but leave declaration. Signed-off-by: Yue Haibing Reviewed-by: Damien Le Moal Signed-off-by: Mikulas Patocka --- include/linux/device-mapper.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 53ca3a913d06..8321f65897f3 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -524,7 +524,6 @@ int dm_post_suspending(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors); void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone); -union map_info *dm_get_rq_mapinfo(struct request *rq); #ifdef CONFIG_BLK_DEV_ZONED struct dm_report_zones_args { -- cgit v1.2.3 From 0e1d5731d3c1e2214249ef36dcd13ad51ad304cf Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 20 Aug 2024 08:35:30 +0206 Subject: printk: Check printk_deferred_enter()/_exit() usage Add validation that printk_deferred_enter()/_exit() are called in non-migration contexts. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-5-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/printk.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index b937cefcb31c..eee8e97da681 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -161,15 +161,16 @@ int _printk(const char *fmt, ...); */ __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...); -extern void __printk_safe_enter(void); -extern void __printk_safe_exit(void); +extern void __printk_deferred_enter(void); +extern void __printk_deferred_exit(void); + /* * The printk_deferred_enter/exit macros are available only as a hack for * some code paths that need to defer all printk console printing. Interrupts * must be disabled for the deferred duration. */ -#define printk_deferred_enter __printk_safe_enter -#define printk_deferred_exit __printk_safe_exit +#define printk_deferred_enter() __printk_deferred_enter() +#define printk_deferred_exit() __printk_deferred_exit() /* * Please don't use printk_ratelimit(), because it shares ratelimiting state -- cgit v1.2.3 From b7049d88c1763d5f133b0e93e39da8e89a9f944b Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:32 +0206 Subject: printk: nbcon: Remove return value for write_atomic() The return value of write_atomic() does not provide any useful information. On the contrary, it makes things more complicated for the caller to appropriately deal with the information. Change write_atomic() to not have a return value. If the message did not get printed due to loss of ownership, the caller will notice this on its own. If ownership was not lost, it will be assumed that the driver successfully printed the message and the sequence number for that console will be incremented. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-7-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/console.h b/include/linux/console.h index 31a8f5b85f5d..577b157fe4fb 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -345,7 +345,7 @@ struct console { struct hlist_node node; /* nbcon console specific members */ - bool (*write_atomic)(struct console *con, + void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt); atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; -- cgit v1.2.3 From 7d56936c1e5208b5eeea2a9a2f70e85248f6da49 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:33 +0206 Subject: printk: nbcon: Add detailed doc for write_atomic() The write_atomic() callback has special requirements and is allowed to use special helper functions. Provide detailed documentation of the callback so that a developer has a chance of implementing it correctly. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-8-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/console.h b/include/linux/console.h index 577b157fe4fb..35c64ee3827b 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -303,7 +303,7 @@ struct nbcon_write_context { /** * struct console - The console descriptor structure * @name: The name of the console driver - * @write: Write callback to output messages (Optional) + * @write: Legacy write callback to output messages (Optional) * @read: Read callback for console input (Optional) * @device: The underlying TTY device driver (Optional) * @unblank: Callback to unblank the console (Optional) @@ -320,7 +320,6 @@ struct nbcon_write_context { * @data: Driver private data * @node: hlist node for the console list * - * @write_atomic: Write callback for atomic context * @nbcon_state: State for nbcon consoles * @nbcon_seq: Sequence number of the next record for nbcon to print * @pbufs: Pointer to nbcon private buffer @@ -345,8 +344,34 @@ struct console { struct hlist_node node; /* nbcon console specific members */ - void (*write_atomic)(struct console *con, - struct nbcon_write_context *wctxt); + + /** + * @write_atomic: + * + * NBCON callback to write out text in any context. (Optional) + * + * This callback is called with the console already acquired. However, + * a higher priority context is allowed to take it over by default. + * + * The callback must call nbcon_enter_unsafe() and nbcon_exit_unsafe() + * around any code where the takeover is not safe, for example, when + * manipulating the serial port registers. + * + * nbcon_enter_unsafe() will fail if the context has lost the console + * ownership in the meantime. In this case, the callback is no longer + * allowed to go forward. It must back out immediately and carefully. + * The buffer content is also no longer trusted since it no longer + * belongs to the context. + * + * The callback should allow the takeover whenever it is safe. It + * increases the chance to see messages when the system is in trouble. + * + * The callback can be called from any context (including NMI). + * Therefore it must avoid usage of any locking and instead rely + * on the console ownership for synchronization. + */ + void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt); + atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct printk_buffers *pbufs; -- cgit v1.2.3 From 7a16a771890746b4060333d665ebe674945a3cfa Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:34 +0206 Subject: printk: nbcon: Add callbacks to synchronize with driver Console drivers typically must deal with access to the hardware via user input/output (such as an interactive login shell) and output of kernel messages via printk() calls. To provide the necessary synchronization, usually some driver-specific locking mechanism is used (for example, the port spinlock for uart serial consoles). Until now, usage of this driver-specific locking has been hidden from the printk-subsystem and implemented within the various console callbacks. However, nbcon consoles would need to use it even in the generic code. Add device_lock() and device_unlock() callback which will need to get implemented by nbcon consoles. The callbacks will use whatever synchronization mechanism the driver is using for itself. The minimum requirement is to prevent CPU migration. It would allow a context friendly acquiring of nbcon console ownership in non-emergency and non-panic context. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-9-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include') diff --git a/include/linux/console.h b/include/linux/console.h index 35c64ee3827b..46b3c210b931 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -372,6 +372,49 @@ struct console { */ void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt); + /** + * @device_lock: + * + * NBCON callback to begin synchronization with driver code. + * + * Console drivers typically must deal with access to the hardware + * via user input/output (such as an interactive login shell) and + * output of kernel messages via printk() calls. This callback is + * called by the printk-subsystem whenever it needs to synchronize + * with hardware access by the driver. It should be implemented to + * use whatever synchronization mechanism the driver is using for + * itself (for example, the port lock for uart serial consoles). + * + * The callback is always called from task context. It may use any + * synchronization method required by the driver. + * + * IMPORTANT: The callback MUST disable migration. The console driver + * may be using a synchronization mechanism that already takes + * care of this (such as spinlocks). Otherwise this function must + * explicitly call migrate_disable(). + * + * The flags argument is provided as a convenience to the driver. It + * will be passed again to device_unlock(). It can be ignored if the + * driver does not need it. + */ + void (*device_lock)(struct console *con, unsigned long *flags); + + /** + * @device_unlock: + * + * NBCON callback to finish synchronization with driver code. + * + * It is the counterpart to device_lock(). + * + * This callback is always called from task context. It must + * appropriately re-enable migration (depending on how device_lock() + * disabled migration). + * + * The flags argument is the value of the same variable that was + * passed to device_lock(). + */ + void (*device_unlock)(struct console *con, unsigned long flags); + atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct printk_buffers *pbufs; -- cgit v1.2.3 From 77e73c0687f5bc884fa1b0cb97aca912bcc01957 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:36 +0206 Subject: serial: core: Provide low-level functions to lock port It will be necessary at times for the uart nbcon console drivers to acquire the port lock directly (without the additional nbcon functionality of the port lock wrappers). These are special cases such as the implementation of the device_lock()/device_unlock() callbacks or for internal port lock wrapper synchronization. Provide low-level variants __uart_port_lock_irqsave() and __uart_port_unlock_irqrestore() for this purpose. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240820063001.36405-11-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/serial_core.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index aea25eef9a1a..8872cd21e70a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -590,6 +590,24 @@ struct uart_port { void *private_data; /* generic platform data pointer */ }; +/* + * Only for console->device_lock()/_unlock() callbacks and internal + * port lock wrapper synchronization. + */ +static inline void __uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) +{ + spin_lock_irqsave(&up->lock, *flags); +} + +/* + * Only for console->device_lock()/_unlock() callbacks and internal + * port lock wrapper synchronization. + */ +static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) +{ + spin_unlock_irqrestore(&up->lock, flags); +} + /** * uart_port_lock - Lock the UART port * @up: Pointer to UART port structure -- cgit v1.2.3 From eabd4600dafacf9895184259373f2445ff748654 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:37 +0206 Subject: serial: core: Introduce wrapper to set @uart_port->cons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce uart_port_set_cons() as a wrapper to set @cons of a uart_port. The wrapper sets @cons under the port lock in order to prevent @cons from disappearing while another context is holding the port lock. This is necessary for a follow-up commit relating to the port lock wrappers, which rely on @cons not changing between lock and unlock. Signed-off-by: John Ogness Tested-by: Théo Lebrun # EyeQ5, AMBA-PL011 Acked-by: Greg Kroah-Hartman Reviewed-by: Petr Mladek Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240820063001.36405-12-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/serial_core.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8872cd21e70a..2cf03ff2056a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -608,6 +608,23 @@ static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned spin_unlock_irqrestore(&up->lock, flags); } +/** + * uart_port_set_cons - Safely set the @cons field for a uart + * @up: The uart port to set + * @con: The new console to set to + * + * This function must be used to set @up->cons. It uses the port lock to + * synchronize with the port lock wrappers in order to ensure that the console + * cannot change or disappear while another context is holding the port lock. + */ +static inline void uart_port_set_cons(struct uart_port *up, struct console *con) +{ + unsigned long flags; + + __uart_port_lock_irqsave(up, &flags); + up->cons = con; + __uart_port_unlock_irqrestore(up, flags); +} /** * uart_port_lock - Lock the UART port * @up: Pointer to UART port structure -- cgit v1.2.3 From dc219d8d858d95549543aa7a41b6e64a0da9e406 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:38 +0206 Subject: console: Improve console_srcu_read_flags() comments It was not clear when exactly console_srcu_read_flags() must be used vs. directly reading @console->flags. Refactor and clarify that console_srcu_read_flags() is only needed if the console is registered or the caller is in a context where the registration status of the console may change (due to another context). The function requires the caller holds @console_srcu, which will ensure that the caller sees an appropriate @flags value for the registered console and that exit/cleanup routines will not run if the console is in the process of unregistration. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-13-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/console.h b/include/linux/console.h index 46b3c210b931..aafe3121b74e 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -446,28 +446,34 @@ extern void console_list_unlock(void) __releases(console_mutex); extern struct hlist_head console_list; /** - * console_srcu_read_flags - Locklessly read the console flags + * console_srcu_read_flags - Locklessly read flags of a possibly registered + * console * @con: struct console pointer of console to read flags from * - * This function provides the necessary READ_ONCE() and data_race() - * notation for locklessly reading the console flags. The READ_ONCE() - * in this function matches the WRITE_ONCE() when @flags are modified - * for registered consoles with console_srcu_write_flags(). + * Locklessly reading @con->flags provides a consistent read value because + * there is at most one CPU modifying @con->flags and that CPU is using only + * read-modify-write operations to do so. * - * Only use this function to read console flags when locklessly - * iterating the console list via srcu. + * Requires console_srcu_read_lock to be held, which implies that @con might + * be a registered console. The purpose of holding console_srcu_read_lock is + * to guarantee that the console state is valid (CON_SUSPENDED/CON_ENABLED) + * and that no exit/cleanup routines will run if the console is currently + * undergoing unregistration. + * + * If the caller is holding the console_list_lock or it is _certain_ that + * @con is not and will not become registered, the caller may read + * @con->flags directly instead. * * Context: Any context. + * Return: The current value of the @con->flags field. */ static inline short console_srcu_read_flags(const struct console *con) { WARN_ON_ONCE(!console_srcu_read_lock_is_held()); /* - * Locklessly reading console->flags provides a consistent - * read value because there is at most one CPU modifying - * console->flags and that CPU is using only read-modify-write - * operations to do so. + * The READ_ONCE() matches the WRITE_ONCE() when @flags are modified + * for registered consoles with console_srcu_write_flags(). */ return data_race(READ_ONCE(con->flags)); } -- cgit v1.2.3 From adf6f37d142e14896115929f3892bbc0a86e25bf Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:39 +0206 Subject: nbcon: Add API to acquire context for non-printing operations Provide functions nbcon_device_try_acquire() and nbcon_device_release() which will try to acquire the nbcon console ownership with NBCON_PRIO_NORMAL and mark it unsafe for handover/takeover. These functions are to be used together with the device-specific locking when performing non-printing activities on the console device. They will allow synchronization against the atomic_write() callback which will be serialized, for higher priority contexts, only by acquiring the console context ownership. Pitfalls: The API requires to be called in a context with migration disabled because it uses per-CPU variables internally. The context is set unsafe for a takeover all the time. It guarantees full serialization against any atomic_write() caller except for the final flush in panic() which might try an unsafe takeover. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-14-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 2 ++ include/linux/printk.h | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/console.h b/include/linux/console.h index aafe3121b74e..3706f944de46 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -322,6 +322,7 @@ struct nbcon_write_context { * * @nbcon_state: State for nbcon consoles * @nbcon_seq: Sequence number of the next record for nbcon to print + * @nbcon_device_ctxt: Context available for non-printing operations * @pbufs: Pointer to nbcon private buffer */ struct console { @@ -417,6 +418,7 @@ struct console { atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; + struct nbcon_context __private nbcon_device_ctxt; struct printk_buffers *pbufs; }; diff --git a/include/linux/printk.h b/include/linux/printk.h index eee8e97da681..9687089f5ace 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -9,6 +9,8 @@ #include #include +struct console; + extern const char linux_banner[]; extern const char linux_proc_banner[]; @@ -198,6 +200,8 @@ extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; void printk_trigger_flush(void); void console_try_replay_all(void); +extern bool nbcon_device_try_acquire(struct console *con); +extern void nbcon_device_release(struct console *con); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -280,6 +284,16 @@ static inline void printk_trigger_flush(void) static inline void console_try_replay_all(void) { } + +static inline bool nbcon_device_try_acquire(struct console *con) +{ + return false; +} + +static inline void nbcon_device_release(struct console *con) +{ +} + #endif bool this_cpu_in_panic(void); -- cgit v1.2.3 From 4126f149c48b2ae757d11ea24675f7071ab22ebf Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:40 +0206 Subject: serial: core: Acquire nbcon context in port->lock wrapper Currently the port->lock wrappers uart_port_lock(), uart_port_unlock() (and their variants) only lock/unlock the spin_lock. If the port is an nbcon console that has implemented the write_atomic() callback, the wrappers must also acquire/release the console context and mark the region as unsafe. This allows general port->lock synchronization to be synchronized against the nbcon write_atomic() callback. Note that __uart_port_using_nbcon() relies on the port->lock being held while a console is added and removed from the console list (i.e. all uart nbcon drivers *must* take the port->lock in their device_lock() callbacks). Signed-off-by: John Ogness Acked-by: Greg Kroah-Hartman Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-15-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/serial_core.h | 82 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 2cf03ff2056a..4ab65874a850 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -625,6 +627,60 @@ static inline void uart_port_set_cons(struct uart_port *up, struct console *con) up->cons = con; __uart_port_unlock_irqrestore(up, flags); } + +/* Only for internal port lock wrapper usage. */ +static inline bool __uart_port_using_nbcon(struct uart_port *up) +{ + lockdep_assert_held_once(&up->lock); + + if (likely(!uart_console(up))) + return false; + + /* + * @up->cons is only modified under the port lock. Therefore it is + * certain that it cannot disappear here. + * + * @up->cons->node is added/removed from the console list under the + * port lock. Therefore it is certain that the registration status + * cannot change here, thus @up->cons->flags can be read directly. + */ + if (hlist_unhashed_lockless(&up->cons->node) || + !(up->cons->flags & CON_NBCON) || + !up->cons->write_atomic) { + return false; + } + + return true; +} + +/* Only for internal port lock wrapper usage. */ +static inline bool __uart_port_nbcon_try_acquire(struct uart_port *up) +{ + if (!__uart_port_using_nbcon(up)) + return true; + + return nbcon_device_try_acquire(up->cons); +} + +/* Only for internal port lock wrapper usage. */ +static inline void __uart_port_nbcon_acquire(struct uart_port *up) +{ + if (!__uart_port_using_nbcon(up)) + return; + + while (!nbcon_device_try_acquire(up->cons)) + cpu_relax(); +} + +/* Only for internal port lock wrapper usage. */ +static inline void __uart_port_nbcon_release(struct uart_port *up) +{ + if (!__uart_port_using_nbcon(up)) + return; + + nbcon_device_release(up->cons); +} + /** * uart_port_lock - Lock the UART port * @up: Pointer to UART port structure @@ -632,6 +688,7 @@ static inline void uart_port_set_cons(struct uart_port *up, struct console *con) static inline void uart_port_lock(struct uart_port *up) { spin_lock(&up->lock); + __uart_port_nbcon_acquire(up); } /** @@ -641,6 +698,7 @@ static inline void uart_port_lock(struct uart_port *up) static inline void uart_port_lock_irq(struct uart_port *up) { spin_lock_irq(&up->lock); + __uart_port_nbcon_acquire(up); } /** @@ -651,6 +709,7 @@ static inline void uart_port_lock_irq(struct uart_port *up) static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) { spin_lock_irqsave(&up->lock, *flags); + __uart_port_nbcon_acquire(up); } /** @@ -661,7 +720,15 @@ static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *f */ static inline bool uart_port_trylock(struct uart_port *up) { - return spin_trylock(&up->lock); + if (!spin_trylock(&up->lock)) + return false; + + if (!__uart_port_nbcon_try_acquire(up)) { + spin_unlock(&up->lock); + return false; + } + + return true; } /** @@ -673,7 +740,15 @@ static inline bool uart_port_trylock(struct uart_port *up) */ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) { - return spin_trylock_irqsave(&up->lock, *flags); + if (!spin_trylock_irqsave(&up->lock, *flags)) + return false; + + if (!__uart_port_nbcon_try_acquire(up)) { + spin_unlock_irqrestore(&up->lock, *flags); + return false; + } + + return true; } /** @@ -682,6 +757,7 @@ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long */ static inline void uart_port_unlock(struct uart_port *up) { + __uart_port_nbcon_release(up); spin_unlock(&up->lock); } @@ -691,6 +767,7 @@ static inline void uart_port_unlock(struct uart_port *up) */ static inline void uart_port_unlock_irq(struct uart_port *up) { + __uart_port_nbcon_release(up); spin_unlock_irq(&up->lock); } @@ -701,6 +778,7 @@ static inline void uart_port_unlock_irq(struct uart_port *up) */ static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) { + __uart_port_nbcon_release(up); spin_unlock_irqrestore(&up->lock, flags); } -- cgit v1.2.3 From 5dde3b7354133846079fa51e55e74ef90a836759 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:52 +0206 Subject: printk: nbcon: Add unsafe flushing on panic Add nbcon_atomic_flush_unsafe() to flush all nbcon consoles using the write_atomic() callback and allowing unsafe hostile takeovers. Call this at the end of panic() as a final attempt to flush any pending messages. Note that legacy consoles use unsafe methods for flushing from the beginning of panic (see bust_spinlocks()). Therefore, systems using both legacy and nbcon consoles may still fail to see panic messages due to unsafe legacy console usage. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-27-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/printk.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index 9687089f5ace..2e083f01f8a3 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -202,6 +202,7 @@ void printk_trigger_flush(void); void console_try_replay_all(void); extern bool nbcon_device_try_acquire(struct console *con); extern void nbcon_device_release(struct console *con); +void nbcon_atomic_flush_unsafe(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -294,6 +295,10 @@ static inline void nbcon_device_release(struct console *con) { } +static inline void nbcon_atomic_flush_unsafe(void) +{ +} + #endif bool this_cpu_in_panic(void); -- cgit v1.2.3 From e35a8884270bae11196eedf3b0a5bf22619f11f2 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 20 Aug 2024 08:35:55 +0206 Subject: printk: Coordinate direct printing in panic If legacy and nbcon consoles are registered and the nbcon consoles are allowed to flush (i.e. no boot consoles registered), the legacy consoles will no longer perform direct printing on the panic CPU until after the backtrace has been stored. This will give the safe nbcon consoles a chance to print the panic messages before allowing the unsafe legacy consoles to print. If no nbcon consoles are registered or they are not allowed to flush because boot consoles are registered, there is no change in behavior (i.e. legacy consoles will always attempt to print from the printk() caller context). Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-30-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/printk.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index 2e083f01f8a3..eca9bb2ee637 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -200,6 +200,7 @@ extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; void printk_trigger_flush(void); void console_try_replay_all(void); +void printk_legacy_allow_panic_sync(void); extern bool nbcon_device_try_acquire(struct console *con); extern void nbcon_device_release(struct console *con); void nbcon_atomic_flush_unsafe(void); @@ -286,6 +287,10 @@ static inline void console_try_replay_all(void) { } +static inline void printk_legacy_allow_panic_sync(void) +{ +} + static inline bool nbcon_device_try_acquire(struct console *con) { return false; -- cgit v1.2.3 From ecb5e1aa82c86642ec1eaafefd4e317dfba3a238 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Aug 2024 08:35:57 +0206 Subject: printk: nbcon: Implement emergency sections In emergency situations (something has gone wrong but the system continues to operate), usually important information (such as a backtrace) is generated via printk(). This information should be pushed out to the consoles ASAP. Add per-CPU emergency nesting tracking because an emergency can arise while in an emergency situation. Add functions to mark the beginning and end of emergency sections where the urgent messages are generated. Perform direct console flushing at the emergency priority if the current CPU is in an emergency state and it is safe to do so. Note that the emergency state is not system-wide. While one CPU is in an emergency state, another CPU may attempt to print console messages at normal priority. Also note that printk() already attempts to flush consoles in the caller context for normal priority. However, follow-up changes will introduce printing kthreads, in which case the normal priority printk() calls will offload to the kthreads. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240820063001.36405-32-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/console.h b/include/linux/console.h index 3706f944de46..9a13f91b0c43 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -553,10 +553,14 @@ static inline bool console_is_registered(const struct console *con) hlist_for_each_entry(con, &console_list, node) #ifdef CONFIG_PRINTK +extern void nbcon_cpu_emergency_enter(void); +extern void nbcon_cpu_emergency_exit(void); extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); #else +static inline void nbcon_cpu_emergency_enter(void) { } +static inline void nbcon_cpu_emergency_exit(void) { } static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } -- cgit v1.2.3 From 3568affcddd68743e25aa3ec1647d9b82797757b Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 20 Aug 2024 13:29:30 -0700 Subject: soc: qcom: pmic_glink: Fix race during initialization As pointed out by Stephen Boyd it is possible that during initialization of the pmic_glink child drivers, the protection-domain notifiers fires, and the associated work is scheduled, before the client registration returns and as a result the local "client" pointer has been initialized. The outcome of this is a NULL pointer dereference as the "client" pointer is blindly dereferenced. Timeline provided by Stephen: CPU0 CPU1 ---- ---- ucsi->client = NULL; devm_pmic_glink_register_client() client->pdr_notify(client->priv, pg->client_state) pmic_glink_ucsi_pdr_notify() schedule_work(&ucsi->register_work) pmic_glink_ucsi_register() ucsi_register() pmic_glink_ucsi_read_version() pmic_glink_ucsi_read() pmic_glink_ucsi_read() pmic_glink_send(ucsi->client) ucsi->client = client // Too late! This code is identical across the altmode, battery manager and usci child drivers. Resolve this by splitting the allocation of the "client" object and the registration thereof into two operations. This only happens if the protection domain registry is populated at the time of registration, which by the introduction of commit '1ebcde047c54 ("soc: qcom: add pd-mapper implementation")' became much more likely. Reported-by: Amit Pundir Closes: https://lore.kernel.org/all/CAMi1Hd2_a7TjA7J9ShrAbNOd_CoZ3D87twmO5t+nZxC9sX18tA@mail.gmail.com/ Reported-by: Johan Hovold Closes: https://lore.kernel.org/all/ZqiyLvP0gkBnuekL@hovoldconsulting.com/ Reported-by: Stephen Boyd Closes: https://lore.kernel.org/all/CAE-0n52JgfCBWiFQyQWPji8cq_rCsviBpW-m72YitgNfdaEhQg@mail.gmail.com/ Fixes: 58ef4ece1e41 ("soc: qcom: pmic_glink: Introduce base PMIC GLINK driver") Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Reviewed-by: Neil Armstrong Tested-by: Amit Pundir Reviewed-by: Johan Hovold Acked-by: Sebastian Reichel Tested-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240820-pmic-glink-v6-11-races-v3-1-eec53c750a04@quicinc.com Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/pmic_glink.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/soc/qcom/pmic_glink.h b/include/linux/soc/qcom/pmic_glink.h index fd124aa18c81..7cddf1027752 100644 --- a/include/linux/soc/qcom/pmic_glink.h +++ b/include/linux/soc/qcom/pmic_glink.h @@ -23,10 +23,11 @@ struct pmic_glink_hdr { int pmic_glink_send(struct pmic_glink_client *client, void *data, size_t len); -struct pmic_glink_client *devm_pmic_glink_register_client(struct device *dev, - unsigned int id, - void (*cb)(const void *, size_t, void *), - void (*pdr)(void *, int), - void *priv); +struct pmic_glink_client *devm_pmic_glink_client_alloc(struct device *dev, + unsigned int id, + void (*cb)(const void *, size_t, void *), + void (*pdr)(void *, int), + void *priv); +void pmic_glink_client_register(struct pmic_glink_client *client); #endif -- cgit v1.2.3 From d156263e247c334a8872578118e0709ed63c4806 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Aug 2024 06:37:39 -1000 Subject: workqueue: Fix another htmldocs build warning Fix htmldocs build warning introduced by 9b59a85a84dc ("workqueue: Don't call va_start / va_end twice"). Signed-off-by: Tejun Heo Reported-by: Stephen Rothwell Cc: Matthew Brost --- include/linux/workqueue.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f3cc15940b4d..59c2695e12e7 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -534,7 +534,7 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) * @lockdep_map: user-defined lockdep_map - * @...: args for @fmt + * @args: args for @fmt * * Same as alloc_ordered_workqueue but with the a user-define lockdep_map. * Useful for workqueues created with the same purpose and to avoid leaking a @@ -544,7 +544,8 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue_lockdep_map(fmt, flags, lockdep_map, args...) \ - alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, lockdep_map, ##args) + alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), \ + 1, lockdep_map, ##args) #endif /** -- cgit v1.2.3 From 496ddd19a0fad22f250fc7a7b7a8000155418934 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 8 Aug 2024 16:22:28 -0700 Subject: bpf: extract iterator argument type and name validation logic Verifier enforces that all iterator structs are named `bpf_iter_` and that whenever iterator is passed to a kfunc it's passed as a valid PTR -> STRUCT chain (with potentially const modifiers in between). We'll need this check for upcoming changes, so instead of duplicating the logic, extract it into a helper function. Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240808232230.2848712-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index cffb43133c68..b8a583194c4a 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -580,6 +580,7 @@ bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type); bool btf_types_are_same(const struct btf *btf1, u32 id1, const struct btf *btf2, u32 id2); +int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx); #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) @@ -654,6 +655,10 @@ static inline bool btf_types_are_same(const struct btf *btf1, u32 id1, { return false; } +static inline int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx) +{ + return -EOPNOTSUPP; +} #endif static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t) -- cgit v1.2.3 From 5151fa35ae5979821d091b80096b4c790b187bac Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Fri, 16 Aug 2024 14:52:28 +0300 Subject: drm/fourcc: define Intel Xe2 related tile4 ccs modifiers Add Tile4 type ccs modifiers to indicate presence of compression on Xe2. Here is defined I915_FORMAT_MOD_4_TILED_LNL_CCS which is meant for integrated graphics with igpu related limitations Here is also defined I915_FORMAT_MOD_4_TILED_BMG_CCS which is meant for discrete graphics with dgpu related limitations Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Lucas De Marchi Acked-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240816115229.531671-3-juhapekka.heikkila@gmail.com Signed-off-by: Rodrigo Vivi --- include/uapi/drm/drm_fourcc.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 2d84a8052b15..78abd819fd62 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -702,6 +702,31 @@ extern "C" { */ #define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS_CC fourcc_mod_code(INTEL, 15) +/* + * Intel Color Control Surfaces (CCS) for graphics ver. 20 unified compression + * on integrated graphics + * + * The main surface is Tile 4 and at plane index 0. For semi-planar formats + * like NV12, the Y and UV planes are Tile 4 and are located at plane indices + * 0 and 1, respectively. The CCS for all planes are stored outside of the + * GEM object in a reserved memory area dedicated for the storage of the + * CCS data for all compressible GEM objects. + */ +#define I915_FORMAT_MOD_4_TILED_LNL_CCS fourcc_mod_code(INTEL, 16) + +/* + * Intel Color Control Surfaces (CCS) for graphics ver. 20 unified compression + * on discrete graphics + * + * The main surface is Tile 4 and at plane index 0. For semi-planar formats + * like NV12, the Y and UV planes are Tile 4 and are located at plane indices + * 0 and 1, respectively. The CCS for all planes are stored outside of the + * GEM object in a reserved memory area dedicated for the storage of the + * CCS data for all compressible GEM objects. The GEM object must be stored in + * contiguous memory with a size aligned to 64KB + */ +#define I915_FORMAT_MOD_4_TILED_BMG_CCS fourcc_mod_code(INTEL, 17) + /* * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks * -- cgit v1.2.3 From 74b1e94e94ea369923e5656eeb10b26b16591327 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 20 Aug 2024 13:51:19 -0700 Subject: net: repack struct netdev_queue Adding the NAPI pointer to struct netdev_queue made it grow into another cacheline, even though there was 44 bytes of padding available. The struct was historically grouped as follows: /* read-mostly stuff (align) */ /* ... random control path fields ... */ /* write-mostly stuff (align) */ /* ... 40 byte hole ... */ /* struct dql (align) */ It seems that people want to add control path fields after the read only fields. struct dql looks pretty innocent but it forces its own alignment and nothing indicates that there is a lot of empty space above it. Move dql above the xmit_lock. This shifts the empty space to the end of the struct rather than in the middle of it. Move two example fields there to set an example. Hopefully people will now add new fields at the end of the struct. A lot of the read-only stuff is also control path-only, but if we move it all we'll have another hole in the middle. Before: /* size: 384, cachelines: 6, members: 16 */ /* sum members: 284, holes: 3, sum holes: 100 */ After: /* size: 320, cachelines: 5, members: 16 */ /* sum members: 284, holes: 1, sum holes: 8 */ Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240820205119.1321322-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0ef3eaa23f4b..614ec5d3d75b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -644,9 +644,6 @@ struct netdev_queue { struct Qdisc __rcu *qdisc_sleeping; #ifdef CONFIG_SYSFS struct kobject kobj; -#endif -#if defined(CONFIG_XPS) && defined(CONFIG_NUMA) - int numa_node; #endif unsigned long tx_maxrate; /* @@ -660,13 +657,13 @@ struct netdev_queue { #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif - /* NAPI instance for the queue - * Readers and writers must hold RTNL - */ - struct napi_struct *napi; + /* * write-mostly part */ +#ifdef CONFIG_BQL + struct dql dql; +#endif spinlock_t _xmit_lock ____cacheline_aligned_in_smp; int xmit_lock_owner; /* @@ -676,8 +673,16 @@ struct netdev_queue { unsigned long state; -#ifdef CONFIG_BQL - struct dql dql; +/* + * slow- / control-path part + */ + /* NAPI instance for the queue + * Readers and writers must hold RTNL + */ + struct napi_struct *napi; + +#if defined(CONFIG_XPS) && defined(CONFIG_NUMA) + int numa_node; #endif } ____cacheline_aligned_in_smp; -- cgit v1.2.3 From ba0fb44aed47693cc2482427f63ba6cd19051327 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Sun, 11 Aug 2024 10:09:35 +0300 Subject: dma-mapping: replace zone_dma_bits by zone_dma_limit The hardware DMA limit might not be power of 2. When RAM range starts above 0, say 4GB, DMA limit of 30 bits should end at 5GB. A single high bit can not encode this limit. Use a plain address for the DMA zone limit instead. Since the DMA zone can now potentially span beyond 4GB physical limit of DMA32, make sure to use DMA zone for GFP_DMA32 allocations in that case. Signed-off-by: Catalin Marinas Co-developed-by: Baruch Siach Signed-off-by: Baruch Siach Reviewed-by: Catalin Marinas Reviewed-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- include/linux/dma-direct.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index edbe13d00776..d7e30d4f7503 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -12,7 +12,7 @@ #include #include -extern unsigned int zone_dma_bits; +extern u64 zone_dma_limit; /* * Record the mapping of CPU physical to DMA addresses for a given region. -- cgit v1.2.3 From b5c58b2fdc427e7958412ecb2de2804a1f7c1572 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 24 Jul 2024 21:04:49 +0300 Subject: dma-mapping: direct calls for dma-iommu Directly call into dma-iommu just like we have been doing for dma-direct for a while. This avoids the indirect call overhead for IOMMU ops and removes the need to have DMA ops entirely for many common configurations. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Acked-by: Greg Kroah-Hartman Acked-by: Robin Murphy Signed-off-by: Christoph Hellwig --- include/linux/device.h | 5 ++ include/linux/dma-map-ops.h | 13 ---- include/linux/iommu-dma.h | 147 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 13 deletions(-) create mode 100644 include/linux/iommu-dma.h (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 34eb20f5966f..1c5280d28bc3 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -707,6 +707,8 @@ struct device_physical_location { * for dma allocations. This flag is managed by the dma ops * instance from ->dma_supported. * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers. + * @dma_iommu: Device is using default IOMMU implementation for DMA and + * doesn't rely on dma_ops structure. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -822,6 +824,9 @@ struct device { #ifdef CONFIG_DMA_NEED_SYNC bool dma_skip_sync:1; #endif +#ifdef CONFIG_IOMMU_DMA + bool dma_iommu:1; +#endif }; /** diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 02a1c825896b..077b15c93bb8 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -13,20 +13,7 @@ struct cma; struct iommu_ops; -/* - * Values for struct dma_map_ops.flags: - * - * DMA_F_PCI_P2PDMA_SUPPORTED: Indicates the dma_map_ops implementation can - * handle PCI P2PDMA pages in the map_sg/unmap_sg operation. - * DMA_F_CAN_SKIP_SYNC: DMA sync operations can be skipped if the device is - * coherent and it's not an SWIOTLB buffer. - */ -#define DMA_F_PCI_P2PDMA_SUPPORTED (1 << 0) -#define DMA_F_CAN_SKIP_SYNC (1 << 1) - struct dma_map_ops { - unsigned int flags; - void *(*alloc)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h new file mode 100644 index 000000000000..d30a58bf00fd --- /dev/null +++ b/include/linux/iommu-dma.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved + * + * DMA operations that map physical memory through IOMMU. + */ +#ifndef _LINUX_IOMMU_DMA_H +#define _LINUX_IOMMU_DMA_H + +#include + +#ifdef CONFIG_IOMMU_DMA +dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); +void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, unsigned long attrs); +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); +void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); +void *iommu_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, unsigned long attrs); +int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +unsigned long iommu_dma_get_merge_boundary(struct device *dev); +size_t iommu_dma_opt_mapping_size(void); +size_t iommu_dma_max_mapping_size(struct device *dev); +void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, unsigned long attrs); +dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs); +void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, unsigned long attrs); +struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); +void iommu_dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir); +void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir); +void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir); +void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir); +void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir); +#else +static inline dma_addr_t iommu_dma_map_page(struct device *dev, + struct page *page, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + return DMA_MAPPING_ERROR; +} +static inline void iommu_dma_unmap_page(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + return -EINVAL; +} +static inline void iommu_dma_unmap_sg(struct device *dev, + struct scatterlist *sg, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline void *iommu_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, unsigned long attrs) +{ + return NULL; +} +static inline int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + return -EINVAL; +} +static inline int iommu_dma_get_sgtable(struct device *dev, + struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, + size_t size, unsigned long attrs) +{ + return -EINVAL; +} +static inline unsigned long iommu_dma_get_merge_boundary(struct device *dev) +{ + return 0; +} +static inline size_t iommu_dma_opt_mapping_size(void) +{ + return 0; +} +static inline size_t iommu_dma_max_mapping_size(struct device *dev) +{ + return 0; +} +static inline void iommu_dma_free(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle, unsigned long attrs) +{ +} +static inline dma_addr_t iommu_dma_map_resource(struct device *dev, + phys_addr_t phys, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + return DMA_MAPPING_ERROR; +} +static inline void iommu_dma_unmap_resource(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline struct sg_table * +iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) +{ + return NULL; +} +static inline void iommu_dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ +} +#endif /* CONFIG_IOMMU_DMA */ +#endif /* _LINUX_IOMMU_DMA_H */ -- cgit v1.2.3 From 273f8c142003dfd874d45b1a60965809e95ccd50 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Sat, 17 Aug 2024 15:18:18 +0200 Subject: net: ipv6: ioam6: new feature tunsrc This patch provides a new feature (i.e., "tunsrc") for the tunnel (i.e., "encap") mode of ioam6. Just like seg6 already does, except it is attached to a route. The "tunsrc" is optional: when not provided (by default), the automatic resolution is applied. Using "tunsrc" when possible has a benefit: performance. See the comparison: - before (= "encap" mode): https://ibb.co/bNCzvf7 - after (= "encap" mode with "tunsrc"): https://ibb.co/PT8L6yq Signed-off-by: Justin Iurman Signed-off-by: Paolo Abeni --- include/uapi/linux/ioam6_iptunnel.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h index 38f6a8fdfd34..8aef21e4a8c1 100644 --- a/include/uapi/linux/ioam6_iptunnel.h +++ b/include/uapi/linux/ioam6_iptunnel.h @@ -50,6 +50,12 @@ enum { IOAM6_IPTUNNEL_FREQ_K, /* u32 */ IOAM6_IPTUNNEL_FREQ_N, /* u32 */ + /* Tunnel src address. + * For encap,auto modes. + * Optional (automatic if not provided). + */ + IOAM6_IPTUNNEL_SRC, /* struct in6_addr */ + __IOAM6_IPTUNNEL_MAX, }; -- cgit v1.2.3 From a139c98f760efa1b6f0ee2f36ea6f62f04c8b20a Mon Sep 17 00:00:00 2001 From: Chris Wulff Date: Sat, 17 Aug 2024 10:28:51 -0400 Subject: USB: gadget: f_hid: Add GET_REPORT via userspace IOCTL While supporting GET_REPORT is a mandatory request per the HID specification the current implementation of the GET_REPORT request responds to the USB Host with an empty reply of the request length. However, some USB Hosts will request the contents of feature reports via the GET_REPORT request. In addition, some proprietary HID 'protocols' will expect different data, for the same report ID, to be to become available in the feature report by sending a preceding SET_REPORT to the USB Device that defines what data is to be presented when that feature report is subsequently retrieved via GET_REPORT (with a very fast < 5ms turn around between the SET_REPORT and the GET_REPORT). There are two other patch sets already submitted for adding GET_REPORT support. The first [1] allows for pre-priming a list of reports via IOCTLs which then allows the USB Host to perform the request, with no further userspace interaction possible during the GET_REPORT request. And another [2] which allows for a single report to be setup by userspace via IOCTL, which will be fetched and returned by the kernel for subsequent GET_REPORT requests by the USB Host, also with no further userspace interaction possible. This patch, while loosely based on both the patch sets, differs by allowing the option for userspace to respond to each GET_REPORT request by setting up a poll to notify userspace that a new GET_REPORT request has arrived. To support this, two extra IOCTLs are supplied. The first of which is used to retrieve the report ID of the GET_REPORT request (in the case of having non-zero report IDs in the HID descriptor). The second IOCTL allows for storing report responses in a list for responding to requests. The report responses are stored in a list (it will be either added if it does not exist or updated if it exists already). A flag (userspace_req) can be set to whether subsequent requests notify userspace or not. Basic operation when a GET_REPORT request arrives from USB Host: - If the report ID exists in the list and it is set for immediate return (i.e. userspace_req == false) then response is sent immediately, userspace is not notified - The report ID does not exist, or exists but is set to notify userspace (i.e. userspace_req == true) then notify userspace via poll: - If userspace responds, and either adds or update the response in the list and respond to the host with the contents - If userspace does not respond within the fixed timeout (2500ms) but the report has been set prevously, then send 'old' report contents - If userspace does not respond within the fixed timeout (2500ms) and the report does not exist in the list then send an empty report Note that userspace could 'prime' the report list at any other time. While this patch allows for flexibility in how the system responds to requests, and therefore the HID 'protocols' that could be supported, a drawback is the time it takes to service the requests and therefore the maximum throughput that would be achievable. The USB HID Specification v1.11 itself states that GET_REPORT is not intended for periodic data polling, so this limitation is not severe. Testing on an iMX8M Nano Ultra Lite with a heavy multi-core CPU loading showed that userspace can typically respond to the GET_REPORT request within 1200ms - which is well within the 5000ms most operating systems seem to allow, and within the 2500ms set by this patch. [1] https://lore.kernel.org/all/20220805070507.123151-2-sunil@amarulasolutions.com/ [2] https://lore.kernel.org/all/20220726005824.2817646-1-vi@endrift.com/ Signed-off-by: David Sands Signed-off-by: Chris Wulff Link: https://lore.kernel.org/r/20240817142850.1311460-2-crwulff@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/g_hid.h | 40 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/usb/gadgetfs.h | 2 +- 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/usb/g_hid.h (limited to 'include') diff --git a/include/uapi/linux/usb/g_hid.h b/include/uapi/linux/usb/g_hid.h new file mode 100644 index 000000000000..b965092db476 --- /dev/null +++ b/include/uapi/linux/usb/g_hid.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + +#ifndef __UAPI_LINUX_USB_G_HID_H +#define __UAPI_LINUX_USB_G_HID_H + +#include + +/* Maximum HID report length for High-Speed USB (i.e. USB 2.0) */ +#define MAX_REPORT_LENGTH 64 + +/** + * struct usb_hidg_report - response to GET_REPORT + * @report_id: report ID that this is a response for + * @userspace_req: + * !0 this report is used for any pending GET_REPORT request + * but wait on userspace to issue a new report on future requests + * 0 this report is to be used for any future GET_REPORT requests + * @length: length of the report response + * @data: report response + * @padding: padding for 32/64 bit compatibility + * + * Structure used by GADGET_HID_WRITE_GET_REPORT ioctl on /dev/hidg*. + */ +struct usb_hidg_report { + __u8 report_id; + __u8 userspace_req; + __u16 length; + __u8 data[MAX_REPORT_LENGTH]; + __u8 padding[4]; +}; + +/* The 'g' code is used by gadgetfs and hid gadget ioctl requests. + * Don't add any colliding codes to either driver, and keep + * them in unique ranges. + */ + +#define GADGET_HID_READ_GET_REPORT_ID _IOR('g', 0x41, __u8) +#define GADGET_HID_WRITE_GET_REPORT _IOW('g', 0x42, struct usb_hidg_report) + +#endif /* __UAPI_LINUX_USB_G_HID_H */ diff --git a/include/uapi/linux/usb/gadgetfs.h b/include/uapi/linux/usb/gadgetfs.h index 835473910a49..9754822b2a40 100644 --- a/include/uapi/linux/usb/gadgetfs.h +++ b/include/uapi/linux/usb/gadgetfs.h @@ -62,7 +62,7 @@ struct usb_gadgetfs_event { }; -/* The 'g' code is also used by printer gadget ioctl requests. +/* The 'g' code is also used by printer and hid gadget ioctl requests. * Don't add any colliding codes to either driver, and keep * them in unique ranges (size 0x20 for now). */ -- cgit v1.2.3 From 1a9e3b0af301413210319e6946fb4b0b1ad71ccc Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Thu, 22 Aug 2024 14:32:02 +0800 Subject: ASoC: tas2781: mark const variables tas2563_dvc_table as __maybe_unused In case of tas2781, tas2563_dvc_table will be unused, so mark it as __maybe_unused. Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20240822063205.662-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- include/sound/tas2563-tlv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/tas2563-tlv.h b/include/sound/tas2563-tlv.h index faa3e194f73b..bb269b21f460 100644 --- a/include/sound/tas2563-tlv.h +++ b/include/sound/tas2563-tlv.h @@ -18,7 +18,7 @@ static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1); /* pow(10, db/20) * pow(2,30) */ -static const unsigned char tas2563_dvc_table[][4] = { +static const __maybe_unused unsigned char tas2563_dvc_table[][4] = { { 0X00, 0X00, 0X00, 0X00 }, /* -121.5db */ { 0X00, 0X00, 0X03, 0XBC }, /* -121.0db */ { 0X00, 0X00, 0X03, 0XF5 }, /* -120.5db */ -- cgit v1.2.3 From fd69dfe6789f4ed46d1fdb52e223cff83946d997 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 21 Aug 2024 02:13:56 +0000 Subject: ASoC: soc-pcm: Indicate warning if dpcm_playback/capture were used for availability limition I have been wondering why DPCM needs special flag (= dpcm_playback/capture) to use it. Below is the history why it was added to ASoC. (A) In beginning, there was no dpcm_xxx flag on ASoC. It checks channels_min for DPCM, same as current non-DPCM. Let's name it as "validation check" here. if (rtd->dai_link->dynamic || rtd->dai_link->no_pcm) { if (cpu_dai->driver->playback.channels_min) playback = 1; if (cpu_dai->driver->capture.channels_min) capture = 1; (B) commit 1e9de42f4324 ("ASoC: dpcm: Explicitly set BE DAI link supported stream directions") force to use dpcm_xxx flag on DPCM. According to this commit log, this is because "Some BE dummy DAI doesn't set channels_min for playback/capture". But we don't know which DAI is it, and not know why it can't/don't have channels_min. Let's name it as "no_chan_DAI" here. According to the code and git-log, it is used as DCPM-BE and is CPU DAI. I think the correct solution was set channels_min on "no_chan_DAI" side, not update ASoC framework side. But everything is under smoke today. if (rtd->dai_link->dynamic || rtd->dai_link->no_pcm) { playback = rtd->dai_link->dpcm_playback; capture = rtd->dai_link->dpcm_capture; (C) commit 9b5db059366a ("ASoC: soc-pcm: dpcm: Only allow playback/capture if supported") checks channels_min (= validation check) again. Because DPCM availability was handled by dpcm_xxx flag at that time, but some Sound Card set it even though it wasn't available. Clearly there's a contradiction here. I think correct solution was update Sound Card side instead of ASoC framework. Sound Card side will be updated to handle this issue later (commit 25612477d20b ("ASoC: soc-dai: set dai_link dpcm_ flags with a helper")) if (rtd->dai_link->dynamic || rtd->dai_link->no_pcm) { ... playback = rtd->dai_link->dpcm_playback && snd_soc_dai_stream_valid(cpu_dai, ...); capture = rtd->dai_link->dpcm_capture && snd_soc_dai_stream_valid(cpu_dai, ...); This (C) patch should have broken "no_chan_DAI" which doesn't have channels_min, but there was no such report during this 4 years. Possibilities case are as follows - No one is using "no_chan_DAI" - "no_chan_DAI" is no longer exist : was removed ? - "no_chan_DAI" is no longer exist : has channels_min ? Because of these history, this dpcm_xxx is unneeded flag today. But because we have been used it for 10 years since (B), it may have been used differently. For example some DAI available both playback/capture, but it set dpcm_playback flag only, in this case dpcm_xxx flag is used as availability limitation. We can use playback_only flag instead in this case, but it is very difficult to find such DAI today. Let's add grace time to remove dpcm_playback/capture flag. This patch don't use dpcm_xxx flag anymore, and indicates warning to use xxx_only flag if both playback/capture were available but using only one of dpcm_xxx flag, and not using xxx_only flag. Link: https://lore.kernel.org/r/87edaym2cg.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Kuninori Morimoto Tested-by: Jerome Brunet Link: https://patch.msgid.link/87seuyaahn.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index e844f6afc5b5..81ef380b2e06 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -815,6 +815,7 @@ struct snd_soc_dai_link { /* This DAI link can route to other DAI links at runtime (Frontend)*/ unsigned int dynamic:1; + /* REMOVE ME */ /* DPCM capture and Playback support */ unsigned int dpcm_capture:1; unsigned int dpcm_playback:1; -- cgit v1.2.3 From 12806510481497a01d01edd64d7bb53a4d9ec28d Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 21 Aug 2024 02:14:02 +0000 Subject: ASoC: remove snd_soc_dai_link_set_capabilities() dpcm_xxx flags are no longer needed. We need to use xxx_only flags instead if needed, but snd_soc_dai_link_set_capabilities() user adds dpcm_xxx if playback/capture were available. Thus converting dpcm_xxx to xxx_only is not needed. Just remove it. Signed-off-by: Kuninori Morimoto Tested-by: Jerome Brunet Link: https://patch.msgid.link/87r0aiaahh.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index ab4e109fe71d..0d1b215f24f4 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -219,7 +219,6 @@ void snd_soc_dai_resume(struct snd_soc_dai *dai); int snd_soc_dai_compress_new(struct snd_soc_dai *dai, struct snd_soc_pcm_runtime *rtd, int num); bool snd_soc_dai_stream_valid(const struct snd_soc_dai *dai, int stream); -void snd_soc_dai_link_set_capabilities(struct snd_soc_dai_link *dai_link); void snd_soc_dai_action(struct snd_soc_dai *dai, int stream, int action); static inline void snd_soc_dai_activate(struct snd_soc_dai *dai, -- cgit v1.2.3 From 90dc34da02aca2fe529ae1ed1822e8fc2a0d9ebc Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 22 Aug 2024 15:55:35 +0100 Subject: ASoC: cs35l56: Make struct regmap_config const It's now possible to declare instances of struct regmap_config as const data. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20240822145535.336407-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index a51acefa785f..94e8185c4795 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -282,9 +282,9 @@ static inline bool cs35l56_is_otp_register(unsigned int reg) return (reg >> 16) == 3; } -extern struct regmap_config cs35l56_regmap_i2c; -extern struct regmap_config cs35l56_regmap_spi; -extern struct regmap_config cs35l56_regmap_sdw; +extern const struct regmap_config cs35l56_regmap_i2c; +extern const struct regmap_config cs35l56_regmap_spi; +extern const struct regmap_config cs35l56_regmap_sdw; extern const struct cirrus_amp_cal_controls cs35l56_calibration_controls; -- cgit v1.2.3 From ae010757a55b57c8b82628e8ea9b7da2269131d9 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 22 Aug 2024 01:41:07 -0700 Subject: bpf: rename nocsr -> bpf_fastcall in verifier Attribute used by LLVM implementation of the feature had been changed from no_caller_saved_registers to bpf_fastcall (see [1]). This commit replaces references to nocsr by references to bpf_fastcall to keep LLVM and Kernel parts in sync. [1] https://github.com/llvm/llvm-project/pull/105417 Acked-by: Yonghong Song Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240822084112.3257995-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 +++--- include/linux/bpf_verifier.h | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f0192c173ed8..00dc4dd28cbd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -808,12 +808,12 @@ struct bpf_func_proto { bool gpl_only; bool pkt_access; bool might_sleep; - /* set to true if helper follows contract for gcc/llvm - * attribute no_caller_saved_registers: + /* set to true if helper follows contract for llvm + * attribute bpf_fastcall: * - void functions do not scratch r0 * - functions taking N arguments scratch only registers r1-rN */ - bool allow_nocsr; + bool allow_fastcall; enum bpf_return_type ret_type; union { struct { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5cea15c81b8a..634a302a39e3 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -577,13 +577,13 @@ struct bpf_insn_aux_data { bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */ u8 alu_state; /* used in combination with alu_limit */ /* true if STX or LDX instruction is a part of a spill/fill - * pattern for a no_caller_saved_registers call. + * pattern for a bpf_fastcall call. */ - u8 nocsr_pattern:1; + u8 fastcall_pattern:1; /* for CALL instructions, a number of spill/fill pairs in the - * no_caller_saved_registers pattern. + * bpf_fastcall pattern. */ - u8 nocsr_spills_num:3; + u8 fastcall_spills_num:3; /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ @@ -653,10 +653,10 @@ struct bpf_subprog_info { u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u16 stack_depth; /* max. stack depth used by this function */ u16 stack_extra; - /* offsets in range [stack_depth .. nocsr_stack_off) - * are used for no_caller_saved_registers spills and fills. + /* offsets in range [stack_depth .. fastcall_stack_off) + * are used for bpf_fastcall spills and fills. */ - s16 nocsr_stack_off; + s16 fastcall_stack_off; bool has_tail_call: 1; bool tail_call_reachable: 1; bool has_ld_abs: 1; @@ -664,8 +664,8 @@ struct bpf_subprog_info { bool is_async_cb: 1; bool is_exception_cb: 1; bool args_cached: 1; - /* true if nocsr stack region is used by functions that can't be inlined */ - bool keep_nocsr_stack: 1; + /* true if bpf_fastcall stack region is used by functions that can't be inlined */ + bool keep_fastcall_stack: 1; u8 arg_cnt; struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS]; -- cgit v1.2.3 From aa35e56a5217b86f9c05420c36c908baf3b2df5f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 19 Aug 2024 18:00:19 +0200 Subject: thermal: core: Introduce .should_bind() thermal zone callback The current design of the code binding cooling devices to trip points in thermal zones is convoluted and hard to follow. Namely, a driver that registers a thermal zone can provide .bind() and .unbind() operations for it, which are required to call either thermal_bind_cdev_to_trip() and thermal_unbind_cdev_from_trip(), respectively, or thermal_zone_bind_cooling_device() and thermal_zone_unbind_cooling_device(), respectively, for every relevant trip point and the given cooling device. Moreover, if .bind() is provided and .unbind() is not, the cleanup necessary during the removal of a thermal zone or a cooling device may not be carried out. In other words, the core relies on the thermal zone owners to do the right thing, which is error prone and far from obvious, even though all of that is not really necessary. Specifically, if the core could ask the thermal zone owner, through a special thermal zone callback, whether or not a given cooling device should be bound to a given trip point in the given thermal zone, it might as well carry out all of the binding and unbinding by itself. In particular, the unbinding can be done automatically without involving the thermal zone owner at all because all of the thermal instances associated with a thermal zone or cooling device going away must be deleted regardless. Accordingly, introduce a new thermal zone operation, .should_bind(), that can be invoked by the thermal core for a given thermal zone, trip point and cooling device combination in order to check whether or not the cooling device should be bound to the trip point at hand. It takes an additional cooling_spec argument allowing the thermal zone owner to specify the highest and lowest cooling states of the cooling device and its weight for the given trip point binding. Make the thermal core use this operation, if present, in the absence of .bind() and .unbind(). Note that .should_bind() will be called under the thermal zone lock. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Zhang Rui Acked-by: Huisong Li Reviewed-by: Daniel Lezcano Link: https://patch.msgid.link/9334403.CDJkKcVGEf@rjwysocki.net --- include/linux/thermal.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index ba6d74c11620..9c5d1f14224f 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -85,11 +85,21 @@ struct thermal_trip { struct thermal_zone_device; +struct cooling_spec { + unsigned long upper; /* Highest cooling state */ + unsigned long lower; /* Lowest cooling state */ + unsigned int weight; /* Cooling device weight */ +}; + struct thermal_zone_device_ops { int (*bind) (struct thermal_zone_device *, struct thermal_cooling_device *); int (*unbind) (struct thermal_zone_device *, struct thermal_cooling_device *); + bool (*should_bind) (struct thermal_zone_device *, + const struct thermal_trip *, + struct thermal_cooling_device *, + struct cooling_spec *); int (*get_temp) (struct thermal_zone_device *, int *); int (*set_trips) (struct thermal_zone_device *, int, int); int (*change_mode) (struct thermal_zone_device *, -- cgit v1.2.3 From 2fb85633641770d55b50c934db6b4fb02411a1fe Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 19 Aug 2024 18:05:00 +0200 Subject: thermal: core: Unexport thermal_bind_cdev_to_trip() and thermal_unbind_cdev_from_trip() Since thermal_bind_cdev_to_trip() and thermal_unbind_cdev_from_trip() are only called locally in the thermal core now, they can be static, so change their definitions accordingly and drop their headers from the global thermal header file. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Zhang Rui Acked-by: Huisong Li Reviewed-by: Daniel Lezcano Link: https://patch.msgid.link/3512161.QJadu78ljV@rjwysocki.net --- include/linux/thermal.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 9c5d1f14224f..6599a26847f7 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -247,18 +247,10 @@ const char *thermal_zone_device_type(struct thermal_zone_device *tzd); int thermal_zone_device_id(struct thermal_zone_device *tzd); struct device *thermal_zone_device(struct thermal_zone_device *tzd); -int thermal_bind_cdev_to_trip(struct thermal_zone_device *tz, - const struct thermal_trip *trip, - struct thermal_cooling_device *cdev, - unsigned long upper, unsigned long lower, - unsigned int weight); int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *, unsigned long, unsigned long, unsigned int); -int thermal_unbind_cdev_from_trip(struct thermal_zone_device *tz, - const struct thermal_trip *trip, - struct thermal_cooling_device *cdev); int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *, -- cgit v1.2.3 From d51e783c17bab0c139bf78d6bd9d1f66673f7903 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 16 Aug 2024 17:43:06 +0200 Subject: lsm: count the LSMs enabled at compile time These macros are a clever trick to determine a count of the number of LSMs that are enabled in the config to ascertain the maximum number of static calls that need to be configured per LSM hook. Without this one would need to generate static calls for the total number of LSMs in the kernel (even if they are not compiled) times the number of LSM hooks which ends up being quite wasteful. Tested-by: Guenter Roeck Suggested-by: Kui-Feng Lee Suggested-by: Andrii Nakryiko Reviewed-by: Kees Cook Reviewed-by: Casey Schaufler Reviewed-by: John Johansen Acked-by: Casey Schaufler Acked-by: Song Liu Acked-by: Andrii Nakryiko Nacked-by: Tetsuo Handa Signed-off-by: KP Singh [PM: added IPE to the count during merge] Signed-off-by: Paul Moore --- include/linux/args.h | 6 +-- include/linux/lsm_count.h | 135 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 3 deletions(-) create mode 100644 include/linux/lsm_count.h (limited to 'include') diff --git a/include/linux/args.h b/include/linux/args.h index 8ff60a54eb7d..2e8e65d975c7 100644 --- a/include/linux/args.h +++ b/include/linux/args.h @@ -17,9 +17,9 @@ * that as _n. */ -/* This counts to 12. Any more, it will return 13th argument. */ -#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n -#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +/* This counts to 15. Any more, it will return 16th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) /* Concatenate two parameters, but allow them to be expanded beforehand. */ #define __CONCAT(a, b) a ## b diff --git a/include/linux/lsm_count.h b/include/linux/lsm_count.h new file mode 100644 index 000000000000..16eb49761b25 --- /dev/null +++ b/include/linux/lsm_count.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (C) 2023 Google LLC. + */ + +#ifndef __LINUX_LSM_COUNT_H +#define __LINUX_LSM_COUNT_H + +#include + +#ifdef CONFIG_SECURITY + +/* + * Macros to count the number of LSMs enabled in the kernel at compile time. + */ + +/* + * Capabilities is enabled when CONFIG_SECURITY is enabled. + */ +#if IS_ENABLED(CONFIG_SECURITY) +#define CAPABILITIES_ENABLED 1, +#else +#define CAPABILITIES_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_SELINUX) +#define SELINUX_ENABLED 1, +#else +#define SELINUX_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_SMACK) +#define SMACK_ENABLED 1, +#else +#define SMACK_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_APPARMOR) +#define APPARMOR_ENABLED 1, +#else +#define APPARMOR_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_TOMOYO) +#define TOMOYO_ENABLED 1, +#else +#define TOMOYO_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_YAMA) +#define YAMA_ENABLED 1, +#else +#define YAMA_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_LOADPIN) +#define LOADPIN_ENABLED 1, +#else +#define LOADPIN_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_LOCKDOWN_LSM) +#define LOCKDOWN_ENABLED 1, +#else +#define LOCKDOWN_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_SAFESETID) +#define SAFESETID_ENABLED 1, +#else +#define SAFESETID_ENABLED +#endif + +#if IS_ENABLED(CONFIG_BPF_LSM) +#define BPF_LSM_ENABLED 1, +#else +#define BPF_LSM_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_LANDLOCK) +#define LANDLOCK_ENABLED 1, +#else +#define LANDLOCK_ENABLED +#endif + +#if IS_ENABLED(CONFIG_IMA) +#define IMA_ENABLED 1, +#else +#define IMA_ENABLED +#endif + +#if IS_ENABLED(CONFIG_EVM) +#define EVM_ENABLED 1, +#else +#define EVM_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_IPE) +#define IPE_ENABLED 1, +#else +#define IPE_ENABLED +#endif + +/* + * There is a trailing comma that we need to be accounted for. This is done by + * using a skipped argument in __COUNT_LSMS + */ +#define __COUNT_LSMS(skipped_arg, args...) COUNT_ARGS(args...) +#define COUNT_LSMS(args...) __COUNT_LSMS(args) + +#define MAX_LSM_COUNT \ + COUNT_LSMS( \ + CAPABILITIES_ENABLED \ + SELINUX_ENABLED \ + SMACK_ENABLED \ + APPARMOR_ENABLED \ + TOMOYO_ENABLED \ + YAMA_ENABLED \ + LOADPIN_ENABLED \ + LOCKDOWN_ENABLED \ + SAFESETID_ENABLED \ + BPF_LSM_ENABLED \ + LANDLOCK_ENABLED \ + IMA_ENABLED \ + EVM_ENABLED \ + IPE_ENABLED) + +#else + +#define MAX_LSM_COUNT 0 + +#endif /* CONFIG_SECURITY */ + +#endif /* __LINUX_LSM_COUNT_H */ -- cgit v1.2.3 From 417c5643cd67a55f424b203b492082035d0236c3 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 16 Aug 2024 17:43:07 +0200 Subject: lsm: replace indirect LSM hook calls with static calls LSM hooks are currently invoked from a linked list as indirect calls which are invoked using retpolines as a mitigation for speculative attacks (Branch History / Target injection) and add extra overhead which is especially bad in kernel hot paths: security_file_ioctl: 0xff...0320 <+0>: endbr64 0xff...0324 <+4>: push %rbp 0xff...0325 <+5>: push %r15 0xff...0327 <+7>: push %r14 0xff...0329 <+9>: push %rbx 0xff...032a <+10>: mov %rdx,%rbx 0xff...032d <+13>: mov %esi,%ebp 0xff...032f <+15>: mov %rdi,%r14 0xff...0332 <+18>: mov $0xff...7030,%r15 0xff...0339 <+25>: mov (%r15),%r15 0xff...033c <+28>: test %r15,%r15 0xff...033f <+31>: je 0xff...0358 0xff...0341 <+33>: mov 0x18(%r15),%r11 0xff...0345 <+37>: mov %r14,%rdi 0xff...0348 <+40>: mov %ebp,%esi 0xff...034a <+42>: mov %rbx,%rdx 0xff...034d <+45>: call 0xff...2e0 <__x86_indirect_thunk_array+352> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Indirect calls that use retpolines leading to overhead, not just due to extra instruction but also branch misses. 0xff...0352 <+50>: test %eax,%eax 0xff...0354 <+52>: je 0xff...0339 0xff...0356 <+54>: jmp 0xff...035a 0xff...0358 <+56>: xor %eax,%eax 0xff...035a <+58>: pop %rbx 0xff...035b <+59>: pop %r14 0xff...035d <+61>: pop %r15 0xff...035f <+63>: pop %rbp 0xff...0360 <+64>: jmp 0xff...47c4 <__x86_return_thunk> The indirect calls are not really needed as one knows the addresses of enabled LSM callbacks at boot time and only the order can possibly change at boot time with the lsm= kernel command line parameter. An array of static calls is defined per LSM hook and the static calls are updated at boot time once the order has been determined. With the hook now exposed as a static call, one can see that the retpolines are no longer there and the LSM callbacks are invoked directly: security_file_ioctl: 0xff...0ca0 <+0>: endbr64 0xff...0ca4 <+4>: nopl 0x0(%rax,%rax,1) 0xff...0ca9 <+9>: push %rbp 0xff...0caa <+10>: push %r14 0xff...0cac <+12>: push %rbx 0xff...0cad <+13>: mov %rdx,%rbx 0xff...0cb0 <+16>: mov %esi,%ebp 0xff...0cb2 <+18>: mov %rdi,%r14 0xff...0cb5 <+21>: jmp 0xff...0cc7 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Static key enabled for SELinux 0xffffffff818f0cb7 <+23>: jmp 0xff...0cde ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Static key enabled for BPF LSM. This is something that is changed to default to false to avoid the existing side effect issues of BPF LSM [1] in a subsequent patch. 0xff...0cb9 <+25>: xor %eax,%eax 0xff...0cbb <+27>: xchg %ax,%ax 0xff...0cbd <+29>: pop %rbx 0xff...0cbe <+30>: pop %r14 0xff...0cc0 <+32>: pop %rbp 0xff...0cc1 <+33>: cs jmp 0xff...0000 <__x86_return_thunk> 0xff...0cc7 <+39>: endbr64 0xff...0ccb <+43>: mov %r14,%rdi 0xff...0cce <+46>: mov %ebp,%esi 0xff...0cd0 <+48>: mov %rbx,%rdx 0xff...0cd3 <+51>: call 0xff...3230 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Direct call to SELinux. 0xff...0cd8 <+56>: test %eax,%eax 0xff...0cda <+58>: jne 0xff...0cbd 0xff...0cdc <+60>: jmp 0xff...0cb7 0xff...0cde <+62>: endbr64 0xff...0ce2 <+66>: mov %r14,%rdi 0xff...0ce5 <+69>: mov %ebp,%esi 0xff...0ce7 <+71>: mov %rbx,%rdx 0xff...0cea <+74>: call 0xff...e220 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Direct call to BPF LSM. 0xff...0cef <+79>: test %eax,%eax 0xff...0cf1 <+81>: jne 0xff...0cbd 0xff...0cf3 <+83>: jmp 0xff...0cb9 0xff...0cf5 <+85>: endbr64 0xff...0cf9 <+89>: mov %r14,%rdi 0xff...0cfc <+92>: mov %ebp,%esi 0xff...0cfe <+94>: mov %rbx,%rdx 0xff...0d01 <+97>: pop %rbx 0xff...0d02 <+98>: pop %r14 0xff...0d04 <+100>: pop %rbp 0xff...0d05 <+101>: ret 0xff...0d06 <+102>: int3 0xff...0d07 <+103>: int3 0xff...0d08 <+104>: int3 0xff...0d09 <+105>: int3 While this patch uses static_branch_unlikely indicating that an LSM hook is likely to be not present. In most cases this is still a better choice as even when an LSM with one hook is added, empty slots are created for all LSM hooks (especially when many LSMs that do not initialize most hooks are present on the system). There are some hooks that don't use the call_int_hook or call_void_hook. These hooks are updated to use a new macro called lsm_for_each_hook where the lsm_callback is directly invoked as an indirect call. Below are results of the relevant Unixbench system benchmarks with BPF LSM and SELinux enabled with default policies enabled with and without these patches. Benchmark Delta(%): (+ is better) ========================================================================== Execl Throughput +1.9356 File Write 1024 bufsize 2000 maxblocks +6.5953 Pipe Throughput +9.5499 Pipe-based Context Switching +3.0209 Process Creation +2.3246 Shell Scripts (1 concurrent) +1.4975 System Call Overhead +2.7815 System Benchmarks Index Score (Partial Only): +3.4859 In the best case, some syscalls like eventfd_create benefitted to about ~10%. Tested-by: Guenter Roeck Reviewed-by: Casey Schaufler Reviewed-by: Kees Cook Acked-by: Song Liu Acked-by: Andrii Nakryiko Signed-off-by: KP Singh Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 52 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 4687985b9175..090d1d3e19fe 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -30,19 +30,47 @@ #include #include #include +#include +#include +#include +#include union security_list_options { #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); #include "lsm_hook_defs.h" #undef LSM_HOOK + void *lsm_func_addr; }; -struct security_hook_heads { - #define LSM_HOOK(RET, DEFAULT, NAME, ...) struct hlist_head NAME; - #include "lsm_hook_defs.h" - #undef LSM_HOOK +/* + * @key: static call key as defined by STATIC_CALL_KEY + * @trampoline: static call trampoline as defined by STATIC_CALL_TRAMP + * @hl: The security_hook_list as initialized by the owning LSM. + * @active: Enabled when the static call has an LSM hook associated. + */ +struct lsm_static_call { + struct static_call_key *key; + void *trampoline; + struct security_hook_list *hl; + /* this needs to be true or false based on what the key defaults to */ + struct static_key_false *active; } __randomize_layout; +/* + * Table of the static calls for each LSM hook. + * Once the LSMs are initialized, their callbacks will be copied to these + * tables such that the calls are filled backwards (from last to first). + * This way, we can jump directly to the first used static call, and execute + * all of them after. This essentially makes the entry point + * dynamic to adapt the number of static calls to the number of callbacks. + */ +struct lsm_static_calls_table { + #define LSM_HOOK(RET, DEFAULT, NAME, ...) \ + struct lsm_static_call NAME[MAX_LSM_COUNT]; + #include + #undef LSM_HOOK +} __packed __randomize_layout; + /** * struct lsm_id - Identify a Linux Security Module. * @lsm: name of the LSM, must be approved by the LSM maintainers @@ -58,10 +86,14 @@ struct lsm_id { /* * Security module hook list structure. * For use with generic list macros for common operations. + * + * struct security_hook_list - Contents of a cacheable, mappable object. + * @scalls: The beginning of the array of static calls assigned to this hook. + * @hook: The callback for the hook. + * @lsm: The name of the lsm that owns this hook. */ struct security_hook_list { - struct hlist_node list; - struct hlist_head *head; + struct lsm_static_call *scalls; union security_list_options hook; const struct lsm_id *lsmid; } __randomize_layout; @@ -98,8 +130,11 @@ struct lsm_blob_sizes { * care of the common case and reduces the amount of * text involved. */ -#define LSM_HOOK_INIT(HEAD, HOOK) \ - { .head = &security_hook_heads.HEAD, .hook = { .HEAD = HOOK } } +#define LSM_HOOK_INIT(NAME, HOOK) \ + { \ + .scalls = static_calls_table.NAME, \ + .hook = { .NAME = HOOK } \ + } extern void security_add_hooks(struct security_hook_list *hooks, int count, const struct lsm_id *lsmid); @@ -134,7 +169,6 @@ struct lsm_info { /* DO NOT tamper with these variables outside of the LSM framework */ extern char *lsm_names; -extern struct security_hook_heads security_hook_heads; extern struct lsm_static_calls_table static_calls_table __ro_after_init; extern struct lsm_info __start_lsm_info[], __end_lsm_info[]; extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[]; -- cgit v1.2.3 From 6c65914a40d33e96ceedc757be0129139cdf7852 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 16 Aug 2024 11:54:23 -0700 Subject: Input: keypad-nomadik-ske - remove the driver The users of this driver were removed in 2013 in commit 28633c54bda6 ("ARM: ux500: Rip out keypad initialisation which is no longer used"). Remove the driver as well. Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/Zr-gX0dfN4te_8VG@google.com Signed-off-by: Dmitry Torokhov --- include/linux/platform_data/keypad-nomadik-ske.h | 50 ------------------------ 1 file changed, 50 deletions(-) delete mode 100644 include/linux/platform_data/keypad-nomadik-ske.h (limited to 'include') diff --git a/include/linux/platform_data/keypad-nomadik-ske.h b/include/linux/platform_data/keypad-nomadik-ske.h deleted file mode 100644 index 7efabbca1dca..000000000000 --- a/include/linux/platform_data/keypad-nomadik-ske.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson SA 2010 - * - * Author: Naveen Kumar Gaddipati - * - * ux500 Scroll key and Keypad Encoder (SKE) header - */ - -#ifndef __SKE_H -#define __SKE_H - -#include - -/* register definitions for SKE peripheral */ -#define SKE_CR 0x00 -#define SKE_VAL0 0x04 -#define SKE_VAL1 0x08 -#define SKE_DBCR 0x0C -#define SKE_IMSC 0x10 -#define SKE_RIS 0x14 -#define SKE_MIS 0x18 -#define SKE_ICR 0x1C - -/* - * Keypad module - */ - -/** - * struct keypad_platform_data - structure for platform specific data - * @init: pointer to keypad init function - * @exit: pointer to keypad deinitialisation function - * @keymap_data: matrix scan code table for keycodes - * @krow: maximum number of rows - * @kcol: maximum number of columns - * @debounce_ms: platform specific debounce time - * @no_autorepeat: flag for auto repetition - * @wakeup_enable: allow waking up the system - */ -struct ske_keypad_platform_data { - int (*init)(void); - int (*exit)(void); - const struct matrix_keymap_data *keymap_data; - u8 krow; - u8 kcol; - u8 debounce_ms; - bool no_autorepeat; - bool wakeup_enable; -}; -#endif /*__SKE_KPD_H*/ -- cgit v1.2.3 From 5bc46b49c828a6dfaab80b71ecb63fe76a1096d2 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Jul 2024 14:02:20 +0200 Subject: nvme-tcp: check for invalidated or revoked key key_lookup() will always return a key, even if that key is revoked or invalidated. So check for invalid keys before continuing. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme-keyring.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h index e10333d78dbb..19d2b256180f 100644 --- a/include/linux/nvme-keyring.h +++ b/include/linux/nvme-keyring.h @@ -12,7 +12,7 @@ key_serial_t nvme_tls_psk_default(struct key *keyring, const char *hostnqn, const char *subnqn); key_serial_t nvme_keyring_id(void); - +struct key *nvme_tls_key_lookup(key_serial_t key_id); #else static inline key_serial_t nvme_tls_psk_default(struct key *keyring, @@ -24,5 +24,9 @@ static inline key_serial_t nvme_keyring_id(void) { return 0; } +static inline struct key *nvme_tls_key_lookup(key_serial_t key_id) +{ + return ERR_PTR(-ENOTSUPP); +} #endif /* !CONFIG_NVME_KEYRING */ #endif /* _NVME_KEYRING_H */ -- cgit v1.2.3 From 5ac86f0ed04bce41242167ffa12ad92038788a95 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Jul 2024 16:15:55 -0700 Subject: virt: vbox: struct vmmdev_hgcm_pagelist: Replace 1-element array with flexible array Replace the deprecated[1] use of a 1-element array in struct vmmdev_hgcm_pagelist with a modern flexible array. As this is UAPI, we cannot trivially change the size of the struct, so use a union to retain the old first element's size, but switch "pages" to a flexible array. No binary differences are present after this conversion. Link: https://github.com/KSPP/linux/issues/79 [1] Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20240710231555.work.406-kees@kernel.org Signed-off-by: Kees Cook --- include/uapi/linux/vbox_vmmdev_types.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/vbox_vmmdev_types.h b/include/uapi/linux/vbox_vmmdev_types.h index f8a8d6b3c521..6073858d52a2 100644 --- a/include/uapi/linux/vbox_vmmdev_types.h +++ b/include/uapi/linux/vbox_vmmdev_types.h @@ -282,7 +282,10 @@ struct vmmdev_hgcm_pagelist { __u32 flags; /** VMMDEV_HGCM_F_PARM_*. */ __u16 offset_first_page; /** Data offset in the first page. */ __u16 page_count; /** Number of pages. */ - __u64 pages[1]; /** Page addresses. */ + union { + __u64 unused; /** Deprecated place-holder for first "pages" entry. */ + __DECLARE_FLEX_ARRAY(__u64, pages); /** Page addresses. */ + }; }; VMMDEV_ASSERT_SIZE(vmmdev_hgcm_pagelist, 4 + 2 + 2 + 8); -- cgit v1.2.3 From 559048d156ff3391c4b793779a824c9193e20442 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 5 Aug 2024 14:43:44 -0700 Subject: string: Check for "nonstring" attribute on strscpy() arguments GCC already checks for arguments that are marked with the "nonstring"[1] attribute when used on standard C String API functions (e.g. strcpy). Gain this compile-time checking also for the kernel's primary string copying function, strscpy(). Note that Clang has neither "nonstring" nor __builtin_has_attribute(). Link: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-nonstring-variable-attribute [1] Reviewed-by: Miguel Ojeda Tested-by: Miguel Ojeda Link: https://lore.kernel.org/r/20240805214340.work.339-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/compiler.h | 3 +++ include/linux/compiler_types.h | 7 +++++++ include/linux/string.h | 12 ++++++++---- 3 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 2df665fa2964..ec55bcce4146 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -242,6 +242,9 @@ static inline void *offset_to_ptr(const int *off) /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +/* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */ +#define __must_be_cstr(p) BUILD_BUG_ON_ZERO(__annotated(p, nonstring)) + /* * This returns a constant expression while determining if an argument is * a constant expression, most importantly without evaluating the argument. diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index f14c275950b5..1a957ea2f4fe 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -421,6 +421,13 @@ struct ftrace_likely_data { #define __member_size(p) __builtin_object_size(p, 1) #endif +/* Determine if an attribute has been applied to a variable. */ +#if __has_builtin(__builtin_has_attribute) +#define __annotated(var, attr) __builtin_has_attribute(var, attr) +#else +#define __annotated(var, attr) (false) +#endif + /* * Some versions of gcc do not mark 'asm goto' volatile: * diff --git a/include/linux/string.h b/include/linux/string.h index 9edace076ddb..95b3fc308f4f 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -76,12 +76,16 @@ ssize_t sized_strscpy(char *, const char *, size_t); * known size. */ #define __strscpy0(dst, src, ...) \ - sized_strscpy(dst, src, sizeof(dst) + __must_be_array(dst)) -#define __strscpy1(dst, src, size) sized_strscpy(dst, src, size) + sized_strscpy(dst, src, sizeof(dst) + __must_be_array(dst) + \ + __must_be_cstr(dst) + __must_be_cstr(src)) +#define __strscpy1(dst, src, size) \ + sized_strscpy(dst, src, size + __must_be_cstr(dst) + __must_be_cstr(src)) #define __strscpy_pad0(dst, src, ...) \ - sized_strscpy_pad(dst, src, sizeof(dst) + __must_be_array(dst)) -#define __strscpy_pad1(dst, src, size) sized_strscpy_pad(dst, src, size) + sized_strscpy_pad(dst, src, sizeof(dst) + __must_be_array(dst) + \ + __must_be_cstr(dst) + __must_be_cstr(src)) +#define __strscpy_pad1(dst, src, size) \ + sized_strscpy_pad(dst, src, size + __must_be_cstr(dst) + __must_be_cstr(src)) /** * strscpy - Copy a C-string into a sized buffer -- cgit v1.2.3 From 89835a58f5f54d52537709f2513fb91024e2d069 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Wed, 21 Aug 2024 08:54:11 +0300 Subject: scsi: ufs: Move UFS trace events to private header UFS trace events are called exclusively from the UFS core drivers. Make those events private to the core driver. The MAINTAINERS file does not need updating as the maintainership remains the same and the relevant directory is already covered. Reviewed-by: Bart Van Assche Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20240821055411.3128159-1-avri.altman@wdc.com Acked-by: Bean Huo Signed-off-by: Martin K. Petersen --- include/trace/events/ufs.h | 399 --------------------------------------------- include/ufs/ufs.h | 4 +- 2 files changed, 2 insertions(+), 401 deletions(-) delete mode 100644 include/trace/events/ufs.h (limited to 'include') diff --git a/include/trace/events/ufs.h b/include/trace/events/ufs.h deleted file mode 100644 index c4e209fbdfbb..000000000000 --- a/include/trace/events/ufs.h +++ /dev/null @@ -1,399 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved. - */ - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM ufs - -#if !defined(_TRACE_UFS_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_UFS_H - -#include - -#define str_opcode(opcode) \ - __print_symbolic(opcode, \ - { WRITE_16, "WRITE_16" }, \ - { WRITE_10, "WRITE_10" }, \ - { READ_16, "READ_16" }, \ - { READ_10, "READ_10" }, \ - { SYNCHRONIZE_CACHE, "SYNC" }, \ - { UNMAP, "UNMAP" }) - -#define UFS_LINK_STATES \ - EM(UIC_LINK_OFF_STATE, "UIC_LINK_OFF_STATE") \ - EM(UIC_LINK_ACTIVE_STATE, "UIC_LINK_ACTIVE_STATE") \ - EMe(UIC_LINK_HIBERN8_STATE, "UIC_LINK_HIBERN8_STATE") - -#define UFS_PWR_MODES \ - EM(UFS_ACTIVE_PWR_MODE, "UFS_ACTIVE_PWR_MODE") \ - EM(UFS_SLEEP_PWR_MODE, "UFS_SLEEP_PWR_MODE") \ - EM(UFS_POWERDOWN_PWR_MODE, "UFS_POWERDOWN_PWR_MODE") \ - EMe(UFS_DEEPSLEEP_PWR_MODE, "UFS_DEEPSLEEP_PWR_MODE") - -#define UFSCHD_CLK_GATING_STATES \ - EM(CLKS_OFF, "CLKS_OFF") \ - EM(CLKS_ON, "CLKS_ON") \ - EM(REQ_CLKS_OFF, "REQ_CLKS_OFF") \ - EMe(REQ_CLKS_ON, "REQ_CLKS_ON") - -#define UFS_CMD_TRACE_STRINGS \ - EM(UFS_CMD_SEND, "send_req") \ - EM(UFS_CMD_COMP, "complete_rsp") \ - EM(UFS_DEV_COMP, "dev_complete") \ - EM(UFS_QUERY_SEND, "query_send") \ - EM(UFS_QUERY_COMP, "query_complete") \ - EM(UFS_QUERY_ERR, "query_complete_err") \ - EM(UFS_TM_SEND, "tm_send") \ - EM(UFS_TM_COMP, "tm_complete") \ - EMe(UFS_TM_ERR, "tm_complete_err") - -#define UFS_CMD_TRACE_TSF_TYPES \ - EM(UFS_TSF_CDB, "CDB") \ - EM(UFS_TSF_OSF, "OSF") \ - EM(UFS_TSF_TM_INPUT, "TM_INPUT") \ - EMe(UFS_TSF_TM_OUTPUT, "TM_OUTPUT") - -/* Enums require being exported to userspace, for user tool parsing */ -#undef EM -#undef EMe -#define EM(a, b) TRACE_DEFINE_ENUM(a); -#define EMe(a, b) TRACE_DEFINE_ENUM(a); - -UFS_LINK_STATES; -UFS_PWR_MODES; -UFSCHD_CLK_GATING_STATES; -UFS_CMD_TRACE_STRINGS -UFS_CMD_TRACE_TSF_TYPES - -/* - * Now redefine the EM() and EMe() macros to map the enums to the strings - * that will be printed in the output. - */ -#undef EM -#undef EMe -#define EM(a, b) {a, b}, -#define EMe(a, b) {a, b} - -#define show_ufs_cmd_trace_str(str_t) \ - __print_symbolic(str_t, UFS_CMD_TRACE_STRINGS) -#define show_ufs_cmd_trace_tsf(tsf) \ - __print_symbolic(tsf, UFS_CMD_TRACE_TSF_TYPES) - -TRACE_EVENT(ufshcd_clk_gating, - - TP_PROTO(const char *dev_name, int state), - - TP_ARGS(dev_name, state), - - TP_STRUCT__entry( - __string(dev_name, dev_name) - __field(int, state) - ), - - TP_fast_assign( - __assign_str(dev_name); - __entry->state = state; - ), - - TP_printk("%s: gating state changed to %s", - __get_str(dev_name), - __print_symbolic(__entry->state, UFSCHD_CLK_GATING_STATES)) -); - -TRACE_EVENT(ufshcd_clk_scaling, - - TP_PROTO(const char *dev_name, const char *state, const char *clk, - u32 prev_state, u32 curr_state), - - TP_ARGS(dev_name, state, clk, prev_state, curr_state), - - TP_STRUCT__entry( - __string(dev_name, dev_name) - __string(state, state) - __string(clk, clk) - __field(u32, prev_state) - __field(u32, curr_state) - ), - - TP_fast_assign( - __assign_str(dev_name); - __assign_str(state); - __assign_str(clk); - __entry->prev_state = prev_state; - __entry->curr_state = curr_state; - ), - - TP_printk("%s: %s %s from %u to %u Hz", - __get_str(dev_name), __get_str(state), __get_str(clk), - __entry->prev_state, __entry->curr_state) -); - -TRACE_EVENT(ufshcd_auto_bkops_state, - - TP_PROTO(const char *dev_name, const char *state), - - TP_ARGS(dev_name, state), - - TP_STRUCT__entry( - __string(dev_name, dev_name) - __string(state, state) - ), - - TP_fast_assign( - __assign_str(dev_name); - __assign_str(state); - ), - - TP_printk("%s: auto bkops - %s", - __get_str(dev_name), __get_str(state)) -); - -DECLARE_EVENT_CLASS(ufshcd_profiling_template, - TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us, - int err), - - TP_ARGS(dev_name, profile_info, time_us, err), - - TP_STRUCT__entry( - __string(dev_name, dev_name) - __string(profile_info, profile_info) - __field(s64, time_us) - __field(int, err) - ), - - TP_fast_assign( - __assign_str(dev_name); - __assign_str(profile_info); - __entry->time_us = time_us; - __entry->err = err; - ), - - TP_printk("%s: %s: took %lld usecs, err %d", - __get_str(dev_name), __get_str(profile_info), - __entry->time_us, __entry->err) -); - -DEFINE_EVENT(ufshcd_profiling_template, ufshcd_profile_hibern8, - TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us, - int err), - TP_ARGS(dev_name, profile_info, time_us, err)); - -DEFINE_EVENT(ufshcd_profiling_template, ufshcd_profile_clk_gating, - TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us, - int err), - TP_ARGS(dev_name, profile_info, time_us, err)); - -DEFINE_EVENT(ufshcd_profiling_template, ufshcd_profile_clk_scaling, - TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us, - int err), - TP_ARGS(dev_name, profile_info, time_us, err)); - -DECLARE_EVENT_CLASS(ufshcd_template, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - - TP_ARGS(dev_name, err, usecs, dev_state, link_state), - - TP_STRUCT__entry( - __field(s64, usecs) - __field(int, err) - __string(dev_name, dev_name) - __field(int, dev_state) - __field(int, link_state) - ), - - TP_fast_assign( - __entry->usecs = usecs; - __entry->err = err; - __assign_str(dev_name); - __entry->dev_state = dev_state; - __entry->link_state = link_state; - ), - - TP_printk( - "%s: took %lld usecs, dev_state: %s, link_state: %s, err %d", - __get_str(dev_name), - __entry->usecs, - __print_symbolic(__entry->dev_state, UFS_PWR_MODES), - __print_symbolic(__entry->link_state, UFS_LINK_STATES), - __entry->err - ) -); - -DEFINE_EVENT(ufshcd_template, ufshcd_system_suspend, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_system_resume, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_runtime_suspend, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_runtime_resume, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_init, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_wl_suspend, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_wl_resume, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_wl_runtime_suspend, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_wl_runtime_resume, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -TRACE_EVENT(ufshcd_command, - TP_PROTO(struct scsi_device *sdev, enum ufs_trace_str_t str_t, - unsigned int tag, u32 doorbell, u32 hwq_id, int transfer_len, - u32 intr, u64 lba, u8 opcode, u8 group_id), - - TP_ARGS(sdev, str_t, tag, doorbell, hwq_id, transfer_len, intr, lba, - opcode, group_id), - - TP_STRUCT__entry( - __field(struct scsi_device *, sdev) - __field(enum ufs_trace_str_t, str_t) - __field(unsigned int, tag) - __field(u32, doorbell) - __field(u32, hwq_id) - __field(u32, intr) - __field(u64, lba) - __field(int, transfer_len) - __field(u8, opcode) - __field(u8, group_id) - ), - - TP_fast_assign( - __entry->sdev = sdev; - __entry->str_t = str_t; - __entry->tag = tag; - __entry->doorbell = doorbell; - __entry->hwq_id = hwq_id; - __entry->intr = intr; - __entry->lba = lba; - __entry->transfer_len = transfer_len; - __entry->opcode = opcode; - __entry->group_id = group_id; - ), - - TP_printk( - "%s: %s: tag: %u, DB: 0x%x, size: %d, IS: %u, LBA: %llu, opcode: 0x%x (%s), group_id: 0x%x, hwq_id: %d", - show_ufs_cmd_trace_str(__entry->str_t), - dev_name(&__entry->sdev->sdev_dev), __entry->tag, - __entry->doorbell, __entry->transfer_len, __entry->intr, - __entry->lba, (u32)__entry->opcode, str_opcode(__entry->opcode), - (u32)__entry->group_id, __entry->hwq_id - ) -); - -TRACE_EVENT(ufshcd_uic_command, - TP_PROTO(const char *dev_name, enum ufs_trace_str_t str_t, u32 cmd, - u32 arg1, u32 arg2, u32 arg3), - - TP_ARGS(dev_name, str_t, cmd, arg1, arg2, arg3), - - TP_STRUCT__entry( - __string(dev_name, dev_name) - __field(enum ufs_trace_str_t, str_t) - __field(u32, cmd) - __field(u32, arg1) - __field(u32, arg2) - __field(u32, arg3) - ), - - TP_fast_assign( - __assign_str(dev_name); - __entry->str_t = str_t; - __entry->cmd = cmd; - __entry->arg1 = arg1; - __entry->arg2 = arg2; - __entry->arg3 = arg3; - ), - - TP_printk( - "%s: %s: cmd: 0x%x, arg1: 0x%x, arg2: 0x%x, arg3: 0x%x", - show_ufs_cmd_trace_str(__entry->str_t), __get_str(dev_name), - __entry->cmd, __entry->arg1, __entry->arg2, __entry->arg3 - ) -); - -TRACE_EVENT(ufshcd_upiu, - TP_PROTO(const char *dev_name, enum ufs_trace_str_t str_t, void *hdr, - void *tsf, enum ufs_trace_tsf_t tsf_t), - - TP_ARGS(dev_name, str_t, hdr, tsf, tsf_t), - - TP_STRUCT__entry( - __string(dev_name, dev_name) - __field(enum ufs_trace_str_t, str_t) - __array(unsigned char, hdr, 12) - __array(unsigned char, tsf, 16) - __field(enum ufs_trace_tsf_t, tsf_t) - ), - - TP_fast_assign( - __assign_str(dev_name); - __entry->str_t = str_t; - memcpy(__entry->hdr, hdr, sizeof(__entry->hdr)); - memcpy(__entry->tsf, tsf, sizeof(__entry->tsf)); - __entry->tsf_t = tsf_t; - ), - - TP_printk( - "%s: %s: HDR:%s, %s:%s", - show_ufs_cmd_trace_str(__entry->str_t), __get_str(dev_name), - __print_hex(__entry->hdr, sizeof(__entry->hdr)), - show_ufs_cmd_trace_tsf(__entry->tsf_t), - __print_hex(__entry->tsf, sizeof(__entry->tsf)) - ) -); - -TRACE_EVENT(ufshcd_exception_event, - - TP_PROTO(const char *dev_name, u16 status), - - TP_ARGS(dev_name, status), - - TP_STRUCT__entry( - __string(dev_name, dev_name) - __field(u16, status) - ), - - TP_fast_assign( - __assign_str(dev_name); - __entry->status = status; - ), - - TP_printk("%s: status 0x%x", - __get_str(dev_name), __entry->status - ) -); - -#endif /* if !defined(_TRACE_UFS_H) || defined(TRACE_HEADER_MULTI_READ) */ - -/* This part must be outside protection */ -#include diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index 853e95957c31..e594abe5d05f 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -597,7 +597,7 @@ struct ufs_dev_info { }; /* - * This enum is used in string mapping in include/trace/events/ufs.h. + * This enum is used in string mapping in ufs_trace.h. */ enum ufs_trace_str_t { UFS_CMD_SEND, UFS_CMD_COMP, UFS_DEV_COMP, @@ -607,7 +607,7 @@ enum ufs_trace_str_t { /* * Transaction Specific Fields (TSF) type in the UPIU package, this enum is - * used in include/trace/events/ufs.h for UFS command trace. + * used in ufs_trace.h for UFS command trace. */ enum ufs_trace_tsf_t { UFS_TSF_CDB, UFS_TSF_OSF, UFS_TSF_TM_INPUT, UFS_TSF_TM_OUTPUT -- cgit v1.2.3 From d77381c2f62a557f630a64280ff09675128be363 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 22 Aug 2024 12:59:10 -0700 Subject: scsi: fcoe: Simplify alloc_ordered_workqueue() invocations Let alloc_ordered_workqueue() format the workqueue name instead of calling snprintf() explicitly. Reviewed-by: Hannes Reinecke Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240822195944.654691-7-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- include/scsi/fcoe_sysfs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/scsi/fcoe_sysfs.h b/include/scsi/fcoe_sysfs.h index 4b1216de3f22..2b28a05e492b 100644 --- a/include/scsi/fcoe_sysfs.h +++ b/include/scsi/fcoe_sysfs.h @@ -50,9 +50,7 @@ struct fcoe_ctlr_device { struct fcoe_sysfs_function_template *f; struct list_head fcfs; - char work_q_name[20]; struct workqueue_struct *work_q; - char devloss_work_q_name[20]; struct workqueue_struct *devloss_work_q; struct mutex lock; -- cgit v1.2.3 From 06d53789761cad6bbbc3c00e8c96a631a41f4bf3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 22 Aug 2024 12:59:19 -0700 Subject: scsi: scsi_transport_fc: Simplify alloc_workqueue() invocations Let alloc_workqueue() format the workqueue name instead of calling snprintf() explicitly. Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240822195944.654691-16-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- include/scsi/scsi_transport_fc.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index 4b884b8013e0..8e6c60090c62 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -575,9 +575,7 @@ struct fc_host_attrs { u16 npiv_vports_inuse; /* work queues for rport state manipulation */ - char work_q_name[20]; struct workqueue_struct *work_q; - char devloss_work_q_name[20]; struct workqueue_struct *devloss_work_q; /* bsg support */ @@ -654,12 +652,8 @@ struct fc_host_attrs { (((struct fc_host_attrs *)(x)->shost_data)->next_vport_number) #define fc_host_npiv_vports_inuse(x) \ (((struct fc_host_attrs *)(x)->shost_data)->npiv_vports_inuse) -#define fc_host_work_q_name(x) \ - (((struct fc_host_attrs *)(x)->shost_data)->work_q_name) #define fc_host_work_q(x) \ (((struct fc_host_attrs *)(x)->shost_data)->work_q) -#define fc_host_devloss_work_q_name(x) \ - (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q_name) #define fc_host_devloss_work_q(x) \ (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q) #define fc_host_dev_loss_tmo(x) \ -- cgit v1.2.3 From ba52850cb6b4db5f4ec4636c73d2ad85d0e9adba Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 22 Aug 2024 12:59:22 -0700 Subject: scsi: core: Simplify an alloc_workqueue() invocation Let alloc_workqueue() format the workqueue name. Remove the work_q_name[] member from struct Scsi_Host because it is no longer used by any SCSI driver nor by the SCSI core. Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240822195944.654691-19-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- include/scsi/scsi_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 19a1c5c48935..2b4ab0369ffb 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -677,7 +677,6 @@ struct Scsi_Host { /* * Optional work queue to be utilized by the transport */ - char work_q_name[20]; struct workqueue_struct *work_q; /* -- cgit v1.2.3 From 54f2f78d6b9f1f90e91aaf5dbb34a6198f65fdfd Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 21 Aug 2024 16:30:13 +0100 Subject: xfrm: Correct spelling in xfrm.h Correct spelling in xfrm.h. As reported by codespell. Signed-off-by: Simon Horman Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 54cef89f6c1e..f7244ac4fa08 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -79,7 +79,7 @@ policy entry has list of up to XFRM_MAX_DEPTH transformations, described by templates xfrm_tmpl. Each template is resolved to a complete xfrm_state (see below) and we pack bundle of transformations - to a dst_entry returned to requestor. + to a dst_entry returned to requester. dst -. xfrm .-> xfrm_state #1 |---. child .-> dst -. xfrm .-> xfrm_state #2 @@ -1016,7 +1016,7 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); struct xfrm_if_parms { int link; /* ifindex of underlying L2 interface */ - u32 if_id; /* interface identifyer */ + u32 if_id; /* interface identifier */ bool collect_md; }; -- cgit v1.2.3 From 997daa8de64ccbb4dc68d250510893597d485de4 Mon Sep 17 00:00:00 2001 From: Sunyeal Hong Date: Thu, 22 Aug 2024 08:26:49 +0900 Subject: dt-bindings: clock: add ExynosAuto v920 SoC CMU bindings Add dt-schema for ExynosAuto v920 SoC clock controller. Add device tree clock binding definitions for below CMU blocks. - CMU_TOP - CMU_PERIC0/1 - CMU_MISC - CMU_HSI0/1 Signed-off-by: Sunyeal Hong Link: https://lore.kernel.org/r/20240821232652.1077701-2-sunyeal.hong@samsung.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/samsung,exynosautov920.h | 191 +++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 include/dt-bindings/clock/samsung,exynosautov920.h (limited to 'include') diff --git a/include/dt-bindings/clock/samsung,exynosautov920.h b/include/dt-bindings/clock/samsung,exynosautov920.h new file mode 100644 index 000000000000..c720f344b6bf --- /dev/null +++ b/include/dt-bindings/clock/samsung,exynosautov920.h @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. + * Author: Sunyeal Hong + * + * Device Tree binding constants for ExynosAuto v920 clock controller. + */ + +#ifndef _DT_BINDINGS_CLOCK_EXYNOSAUTOV920_H +#define _DT_BINDINGS_CLOCK_EXYNOSAUTOV920_H + +/* CMU_TOP */ +#define FOUT_SHARED0_PLL 1 +#define FOUT_SHARED1_PLL 2 +#define FOUT_SHARED2_PLL 3 +#define FOUT_SHARED3_PLL 4 +#define FOUT_SHARED4_PLL 5 +#define FOUT_SHARED5_PLL 6 +#define FOUT_MMC_PLL 7 + +/* MUX in CMU_TOP */ +#define MOUT_SHARED0_PLL 8 +#define MOUT_SHARED1_PLL 9 +#define MOUT_SHARED2_PLL 10 +#define MOUT_SHARED3_PLL 11 +#define MOUT_SHARED4_PLL 12 +#define MOUT_SHARED5_PLL 13 +#define MOUT_MMC_PLL 14 +#define MOUT_CLKCMU_CMU_BOOST 15 +#define MOUT_CLKCMU_CMU_CMUREF 16 +#define MOUT_CLKCMU_ACC_NOC 17 +#define MOUT_CLKCMU_ACC_ORB 18 +#define MOUT_CLKCMU_APM_NOC 19 +#define MOUT_CLKCMU_AUD_CPU 20 +#define MOUT_CLKCMU_AUD_NOC 21 +#define MOUT_CLKCMU_CPUCL0_SWITCH 22 +#define MOUT_CLKCMU_CPUCL0_CLUSTER 23 +#define MOUT_CLKCMU_CPUCL0_DBG 24 +#define MOUT_CLKCMU_CPUCL1_SWITCH 25 +#define MOUT_CLKCMU_CPUCL1_CLUSTER 26 +#define MOUT_CLKCMU_CPUCL2_SWITCH 27 +#define MOUT_CLKCMU_CPUCL2_CLUSTER 28 +#define MOUT_CLKCMU_DNC_NOC 29 +#define MOUT_CLKCMU_DPTX_NOC 30 +#define MOUT_CLKCMU_DPTX_DPGTC 31 +#define MOUT_CLKCMU_DPTX_DPOSC 32 +#define MOUT_CLKCMU_DPUB_NOC 33 +#define MOUT_CLKCMU_DPUB_DSIM 34 +#define MOUT_CLKCMU_DPUF0_NOC 35 +#define MOUT_CLKCMU_DPUF1_NOC 36 +#define MOUT_CLKCMU_DPUF2_NOC 37 +#define MOUT_CLKCMU_DSP_NOC 38 +#define MOUT_CLKCMU_G3D_SWITCH 39 +#define MOUT_CLKCMU_G3D_NOCP 40 +#define MOUT_CLKCMU_GNPU_NOC 41 +#define MOUT_CLKCMU_HSI0_NOC 42 +#define MOUT_CLKCMU_HSI1_NOC 43 +#define MOUT_CLKCMU_HSI1_USBDRD 44 +#define MOUT_CLKCMU_HSI1_MMC_CARD 45 +#define MOUT_CLKCMU_HSI2_NOC 46 +#define MOUT_CLKCMU_HSI2_NOC_UFS 47 +#define MOUT_CLKCMU_HSI2_UFS_EMBD 48 +#define MOUT_CLKCMU_HSI2_ETHERNET 49 +#define MOUT_CLKCMU_ISP_NOC 50 +#define MOUT_CLKCMU_M2M_NOC 51 +#define MOUT_CLKCMU_M2M_JPEG 52 +#define MOUT_CLKCMU_MFC_MFC 53 +#define MOUT_CLKCMU_MFC_WFD 54 +#define MOUT_CLKCMU_MFD_NOC 55 +#define MOUT_CLKCMU_MIF_SWITCH 56 +#define MOUT_CLKCMU_MIF_NOCP 57 +#define MOUT_CLKCMU_MISC_NOC 58 +#define MOUT_CLKCMU_NOCL0_NOC 59 +#define MOUT_CLKCMU_NOCL1_NOC 60 +#define MOUT_CLKCMU_NOCL2_NOC 61 +#define MOUT_CLKCMU_PERIC0_NOC 62 +#define MOUT_CLKCMU_PERIC0_IP 63 +#define MOUT_CLKCMU_PERIC1_NOC 64 +#define MOUT_CLKCMU_PERIC1_IP 65 +#define MOUT_CLKCMU_SDMA_NOC 66 +#define MOUT_CLKCMU_SNW_NOC 67 +#define MOUT_CLKCMU_SSP_NOC 68 +#define MOUT_CLKCMU_TAA_NOC 69 + +/* DIV in CMU_TOP */ +#define DOUT_SHARED0_DIV1 70 +#define DOUT_SHARED0_DIV2 71 +#define DOUT_SHARED0_DIV3 72 +#define DOUT_SHARED0_DIV4 73 +#define DOUT_SHARED1_DIV1 74 +#define DOUT_SHARED1_DIV2 75 +#define DOUT_SHARED1_DIV3 76 +#define DOUT_SHARED1_DIV4 77 +#define DOUT_SHARED2_DIV1 78 +#define DOUT_SHARED2_DIV2 79 +#define DOUT_SHARED2_DIV3 80 +#define DOUT_SHARED2_DIV4 81 +#define DOUT_SHARED3_DIV1 82 +#define DOUT_SHARED3_DIV2 83 +#define DOUT_SHARED3_DIV3 84 +#define DOUT_SHARED3_DIV4 85 +#define DOUT_SHARED4_DIV1 86 +#define DOUT_SHARED4_DIV2 87 +#define DOUT_SHARED4_DIV3 88 +#define DOUT_SHARED4_DIV4 89 +#define DOUT_SHARED5_DIV1 90 +#define DOUT_SHARED5_DIV2 91 +#define DOUT_SHARED5_DIV3 92 +#define DOUT_SHARED5_DIV4 93 +#define DOUT_CLKCMU_CMU_BOOST 94 +#define DOUT_CLKCMU_ACC_NOC 95 +#define DOUT_CLKCMU_ACC_ORB 96 +#define DOUT_CLKCMU_APM_NOC 97 +#define DOUT_CLKCMU_AUD_CPU 98 +#define DOUT_CLKCMU_AUD_NOC 99 +#define DOUT_CLKCMU_CPUCL0_SWITCH 100 +#define DOUT_CLKCMU_CPUCL0_CLUSTER 101 +#define DOUT_CLKCMU_CPUCL0_DBG 102 +#define DOUT_CLKCMU_CPUCL1_SWITCH 103 +#define DOUT_CLKCMU_CPUCL1_CLUSTER 104 +#define DOUT_CLKCMU_CPUCL2_SWITCH 105 +#define DOUT_CLKCMU_CPUCL2_CLUSTER 106 +#define DOUT_CLKCMU_DNC_NOC 107 +#define DOUT_CLKCMU_DPTX_NOC 108 +#define DOUT_CLKCMU_DPTX_DPGTC 109 +#define DOUT_CLKCMU_DPTX_DPOSC 110 +#define DOUT_CLKCMU_DPUB_NOC 111 +#define DOUT_CLKCMU_DPUB_DSIM 112 +#define DOUT_CLKCMU_DPUF0_NOC 113 +#define DOUT_CLKCMU_DPUF1_NOC 114 +#define DOUT_CLKCMU_DPUF2_NOC 115 +#define DOUT_CLKCMU_DSP_NOC 116 +#define DOUT_CLKCMU_G3D_SWITCH 117 +#define DOUT_CLKCMU_G3D_NOCP 118 +#define DOUT_CLKCMU_GNPU_NOC 119 +#define DOUT_CLKCMU_HSI0_NOC 120 +#define DOUT_CLKCMU_HSI1_NOC 121 +#define DOUT_CLKCMU_HSI1_USBDRD 122 +#define DOUT_CLKCMU_HSI1_MMC_CARD 123 +#define DOUT_CLKCMU_HSI2_NOC 124 +#define DOUT_CLKCMU_HSI2_NOC_UFS 125 +#define DOUT_CLKCMU_HSI2_UFS_EMBD 126 +#define DOUT_CLKCMU_HSI2_ETHERNET 127 +#define DOUT_CLKCMU_ISP_NOC 128 +#define DOUT_CLKCMU_M2M_NOC 129 +#define DOUT_CLKCMU_M2M_JPEG 130 +#define DOUT_CLKCMU_MFC_MFC 131 +#define DOUT_CLKCMU_MFC_WFD 132 +#define DOUT_CLKCMU_MFD_NOC 133 +#define DOUT_CLKCMU_MIF_NOCP 134 +#define DOUT_CLKCMU_MISC_NOC 135 +#define DOUT_CLKCMU_NOCL0_NOC 136 +#define DOUT_CLKCMU_NOCL1_NOC 137 +#define DOUT_CLKCMU_NOCL2_NOC 138 +#define DOUT_CLKCMU_PERIC0_NOC 139 +#define DOUT_CLKCMU_PERIC0_IP 140 +#define DOUT_CLKCMU_PERIC1_NOC 141 +#define DOUT_CLKCMU_PERIC1_IP 142 +#define DOUT_CLKCMU_SDMA_NOC 143 +#define DOUT_CLKCMU_SNW_NOC 144 +#define DOUT_CLKCMU_SSP_NOC 145 +#define DOUT_CLKCMU_TAA_NOC 146 + +/* CMU_PERIC0 */ +#define CLK_MOUT_PERIC0_IP_USER 1 +#define CLK_MOUT_PERIC0_NOC_USER 2 +#define CLK_MOUT_PERIC0_USI00_USI 3 +#define CLK_MOUT_PERIC0_USI01_USI 4 +#define CLK_MOUT_PERIC0_USI02_USI 5 +#define CLK_MOUT_PERIC0_USI03_USI 6 +#define CLK_MOUT_PERIC0_USI04_USI 7 +#define CLK_MOUT_PERIC0_USI05_USI 8 +#define CLK_MOUT_PERIC0_USI06_USI 9 +#define CLK_MOUT_PERIC0_USI07_USI 10 +#define CLK_MOUT_PERIC0_USI08_USI 11 +#define CLK_MOUT_PERIC0_USI_I2C 12 +#define CLK_MOUT_PERIC0_I3C 13 + +#define CLK_DOUT_PERIC0_USI00_USI 14 +#define CLK_DOUT_PERIC0_USI01_USI 15 +#define CLK_DOUT_PERIC0_USI02_USI 16 +#define CLK_DOUT_PERIC0_USI03_USI 17 +#define CLK_DOUT_PERIC0_USI04_USI 18 +#define CLK_DOUT_PERIC0_USI05_USI 19 +#define CLK_DOUT_PERIC0_USI06_USI 20 +#define CLK_DOUT_PERIC0_USI07_USI 21 +#define CLK_DOUT_PERIC0_USI08_USI 22 +#define CLK_DOUT_PERIC0_USI_I2C 23 +#define CLK_DOUT_PERIC0_I3C 24 + +#endif /* _DT_BINDINGS_CLOCK_EXYNOSAUTOV920_H */ -- cgit v1.2.3 From b58b133e680b20d219940e0fdb6f6132c2b60f38 Mon Sep 17 00:00:00 2001 From: Pranjal Shrivastava Date: Fri, 16 Aug 2024 10:49:06 +0000 Subject: iommu: Handle iommu faults for a bad iopf setup The iommu_report_device_fault function was updated to return void while assuming that drivers only need to call iommu_report_device_fault() for reporting an iopf. This implementation causes following problems: 1. The drivers rely on the core code to call it's page_reponse, however, when a fault is received and no fault capable domain is attached / iopf_param is NULL, the ops->page_response is NOT called causing the device to stall in case the fault type was PAGE_REQ. 2. The arm_smmu_v3 driver relies on the returned value to log errors returning void from iommu_report_device_fault causes these events to be missed while logging. Modify the iommu_report_device_fault function to return -EINVAL for cases where no fault capable domain is attached or iopf_param was NULL and calls back to the driver (ops->page_response) in case the fault type was IOMMU_FAULT_PAGE_REQ. The returned value can be used by the drivers to log the fault/event as needed. Reported-by: Kunkun Jiang Closes: https://lore.kernel.org/all/6147caf0-b9a0-30ca-795e-a1aa502a5c51@huawei.com/ Fixes: 3dfa64aecbaf ("iommu: Make iommu_report_device_fault() return void") Signed-off-by: Jason Gunthorpe Signed-off-by: Pranjal Shrivastava Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20240816104906.1010626-1-praan@google.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 04cbdae0052e..bd722f473635 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1563,7 +1563,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); void iopf_free_group(struct iopf_group *group); -void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); +int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); void iopf_group_response(struct iopf_group *group, enum iommu_page_response_code status); #else @@ -1601,9 +1601,10 @@ static inline void iopf_free_group(struct iopf_group *group) { } -static inline void +static inline int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { + return -ENODEV; } static inline void iopf_group_response(struct iopf_group *group, -- cgit v1.2.3 From 84429b675bcfd2a518ae167ee4661cdf7539aa7d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 22 Aug 2024 15:50:09 +0200 Subject: fs: Allow fine-grained control of folio sizes We need filesystems to be able to communicate acceptable folio sizes to the pagecache for a variety of uses (e.g. large block sizes). Support a range of folio sizes between order-0 and order-31. Signed-off-by: Matthew Wilcox (Oracle) Co-developed-by: Pankaj Raghav Signed-off-by: Pankaj Raghav Link: https://lore.kernel.org/r/20240822135018.1931258-2-kernel@pankajraghav.com Tested-by: David Howells Reviewed-by: Hannes Reinecke Reviewed-by: Darrick J. Wong Reviewed-by: Daniel Gomez Signed-off-by: Christian Brauner --- include/linux/pagemap.h | 90 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 76 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d9c7edb6422b..c60025bb584c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -204,14 +204,21 @@ enum mapping_flags { AS_EXITING = 4, /* final truncate in progress */ /* writeback related tags are not used */ AS_NO_WRITEBACK_TAGS = 5, - AS_LARGE_FOLIO_SUPPORT = 6, - AS_RELEASE_ALWAYS, /* Call ->release_folio(), even if no private data */ - AS_STABLE_WRITES, /* must wait for writeback before modifying + AS_RELEASE_ALWAYS = 6, /* Call ->release_folio(), even if no private data */ + AS_STABLE_WRITES = 7, /* must wait for writeback before modifying folio contents */ - AS_INACCESSIBLE, /* Do not attempt direct R/W access to the mapping, - including to move the mapping */ + AS_INACCESSIBLE = 8, /* Do not attempt direct R/W access to the mapping */ + /* Bits 16-25 are used for FOLIO_ORDER */ + AS_FOLIO_ORDER_BITS = 5, + AS_FOLIO_ORDER_MIN = 16, + AS_FOLIO_ORDER_MAX = AS_FOLIO_ORDER_MIN + AS_FOLIO_ORDER_BITS, }; +#define AS_FOLIO_ORDER_BITS_MASK ((1u << AS_FOLIO_ORDER_BITS) - 1) +#define AS_FOLIO_ORDER_MIN_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MIN) +#define AS_FOLIO_ORDER_MAX_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MAX) +#define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK) + /** * mapping_set_error - record a writeback error in the address_space * @mapping: the mapping in which an error should be set @@ -367,9 +374,51 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) #define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1) #define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER) +/* + * mapping_set_folio_order_range() - Set the orders supported by a file. + * @mapping: The address space of the file. + * @min: Minimum folio order (between 0-MAX_PAGECACHE_ORDER inclusive). + * @max: Maximum folio order (between @min-MAX_PAGECACHE_ORDER inclusive). + * + * The filesystem should call this function in its inode constructor to + * indicate which base size (min) and maximum size (max) of folio the VFS + * can use to cache the contents of the file. This should only be used + * if the filesystem needs special handling of folio sizes (ie there is + * something the core cannot know). + * Do not tune it based on, eg, i_size. + * + * Context: This should not be called while the inode is active as it + * is non-atomic. + */ +static inline void mapping_set_folio_order_range(struct address_space *mapping, + unsigned int min, + unsigned int max) +{ + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + return; + + if (min > MAX_PAGECACHE_ORDER) + min = MAX_PAGECACHE_ORDER; + + if (max > MAX_PAGECACHE_ORDER) + max = MAX_PAGECACHE_ORDER; + + if (max < min) + max = min; + + mapping->flags = (mapping->flags & ~AS_FOLIO_ORDER_MASK) | + (min << AS_FOLIO_ORDER_MIN) | (max << AS_FOLIO_ORDER_MAX); +} + +static inline void mapping_set_folio_min_order(struct address_space *mapping, + unsigned int min) +{ + mapping_set_folio_order_range(mapping, min, MAX_PAGECACHE_ORDER); +} + /** * mapping_set_large_folios() - Indicate the file supports large folios. - * @mapping: The file. + * @mapping: The address space of the file. * * The filesystem should call this function in its inode constructor to * indicate that the VFS can use large folios to cache the contents of @@ -380,7 +429,23 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) */ static inline void mapping_set_large_folios(struct address_space *mapping) { - __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); + mapping_set_folio_order_range(mapping, 0, MAX_PAGECACHE_ORDER); +} + +static inline unsigned int +mapping_max_folio_order(const struct address_space *mapping) +{ + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + return 0; + return (mapping->flags & AS_FOLIO_ORDER_MAX_MASK) >> AS_FOLIO_ORDER_MAX; +} + +static inline unsigned int +mapping_min_folio_order(const struct address_space *mapping) +{ + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + return 0; + return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN; } /* @@ -389,20 +454,17 @@ static inline void mapping_set_large_folios(struct address_space *mapping) */ static inline bool mapping_large_folio_support(struct address_space *mapping) { - /* AS_LARGE_FOLIO_SUPPORT is only reasonable for pagecache folios */ + /* AS_FOLIO_ORDER is only reasonable for pagecache folios */ VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON, "Anonymous mapping always supports large folio"); - return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && - test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); + return mapping_max_folio_order(mapping) > 0; } /* Return the maximum folio size for this pagecache mapping, in bytes. */ -static inline size_t mapping_max_folio_size(struct address_space *mapping) +static inline size_t mapping_max_folio_size(const struct address_space *mapping) { - if (mapping_large_folio_support(mapping)) - return PAGE_SIZE << MAX_PAGECACHE_ORDER; - return PAGE_SIZE; + return PAGE_SIZE << mapping_max_folio_order(mapping); } static inline int filemap_nr_thps(struct address_space *mapping) -- cgit v1.2.3 From ab95d23bab220ef845c0d422f49452a475330eaf Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Thu, 22 Aug 2024 15:50:10 +0200 Subject: filemap: allocate mapping_min_order folios in the page cache filemap_create_folio() and do_read_cache_folio() were always allocating folio of order 0. __filemap_get_folio was trying to allocate higher order folios when fgp_flags had higher order hint set but it will default to order 0 folio if higher order memory allocation fails. Supporting mapping_min_order implies that we guarantee each folio in the page cache has at least an order of mapping_min_order. When adding new folios to the page cache we must also ensure the index used is aligned to the mapping_min_order as the page cache requires the index to be aligned to the order of the folio. Co-developed-by: Luis Chamberlain Signed-off-by: Luis Chamberlain Signed-off-by: Pankaj Raghav Link: https://lore.kernel.org/r/20240822135018.1931258-3-kernel@pankajraghav.com Tested-by: David Howells Reviewed-by: Hannes Reinecke Reviewed-by: Darrick J. Wong Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Daniel Gomez Reviewed-by: Dave Chinner Signed-off-by: Christian Brauner --- include/linux/pagemap.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index c60025bb584c..4cc170949e9c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -448,6 +448,26 @@ mapping_min_folio_order(const struct address_space *mapping) return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN; } +static inline unsigned long +mapping_min_folio_nrpages(struct address_space *mapping) +{ + return 1UL << mapping_min_folio_order(mapping); +} + +/** + * mapping_align_index() - Align index for this mapping. + * @mapping: The address_space. + * + * The index of a folio must be naturally aligned. If you are adding a + * new folio to the page cache and need to know what index to give it, + * call this function. + */ +static inline pgoff_t mapping_align_index(struct address_space *mapping, + pgoff_t index) +{ + return round_down(index, mapping_min_folio_nrpages(mapping)); +} + /* * Large folio support currently depends on THP. These dependencies are * being worked on but are not yet fixed. -- cgit v1.2.3 From 3849687869092094003ba009dc00e2e0237a3b8a Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 21 Aug 2024 17:09:55 +0200 Subject: net: phy: Introduce ethernet link topology representation Link topologies containing multiple network PHYs attached to the same net_device can be found when using a PHY as a media converter for use with an SFP connector, on which an SFP transceiver containing a PHY can be used. With the current model, the transceiver's PHY can't be used for operations such as cable testing, timestamping, macsec offload, etc. The reason being that most of the logic for these configuration, coming from either ethtool netlink or ioctls tend to use netdev->phydev, which in multi-phy systems will reference the PHY closest to the MAC. Introduce a numbering scheme allowing to enumerate PHY devices that belong to any netdev, which can in turn allow userspace to take more precise decisions with regard to each PHY's configuration. The numbering is maintained per-netdev, in a phy_device_list. The numbering works similarly to a netdevice's ifindex, with identifiers that are only recycled once INT_MAX has been reached. This prevents races that could occur between PHY listing and SFP transceiver removal/insertion. The identifiers are assigned at phy_attach time, as the numbering depends on the netdevice the phy is attached to. The PHY index can be re-used for PHYs that are persistent. Signed-off-by: Maxime Chevallier Reviewed-by: Christophe Leroy Tested-by: Christophe Leroy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +- include/linux/phy.h | 4 ++ include/linux/phy_link_topology.h | 82 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/ethtool.h | 16 ++++++++ 4 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 include/linux/phy_link_topology.h (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 614ec5d3d75b..289a6133769c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -40,7 +40,6 @@ #include #endif #include - #include #include #include @@ -81,6 +80,7 @@ struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; struct ethtool_netdev_state; +struct phy_link_topology; typedef u32 xdp_features_t; @@ -1951,6 +1951,7 @@ enum netdev_reg_state { * @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp * * @priomap: XXX: need comments on this one + * @link_topo: Physical link topology tracking attached PHYs * @phydev: Physical device may attach itself * for hardware timestamping * @sfp_bus: attached &struct sfp_bus structure. @@ -2342,6 +2343,7 @@ struct net_device { #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map __rcu *priomap; #endif + struct phy_link_topology *link_topo; struct phy_device *phydev; struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; diff --git a/include/linux/phy.h b/include/linux/phy.h index 6b7d40d49129..3b0f4bf80650 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -554,6 +554,9 @@ struct macsec_ops; * @drv: Pointer to the driver for this PHY instance * @devlink: Create a link between phy dev and mac dev, if the external phy * used by current mac interface is managed by another mac interface. + * @phyindex: Unique id across the phy's parent tree of phys to address the PHY + * from userspace, similar to ifindex. A zero index means the PHY + * wasn't assigned an id yet. * @phy_id: UID for this device found during discovery * @c45_ids: 802.3-c45 Device Identifiers if is_c45. * @is_c45: Set to true if this PHY uses clause 45 addressing. @@ -656,6 +659,7 @@ struct phy_device { struct device_link *devlink; + u32 phyindex; u32 phy_id; struct phy_c45_device_ids c45_ids; diff --git a/include/linux/phy_link_topology.h b/include/linux/phy_link_topology.h new file mode 100644 index 000000000000..68a59e25821c --- /dev/null +++ b/include/linux/phy_link_topology.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PHY device list allow maintaining a list of PHY devices that are + * part of a netdevice's link topology. PHYs can for example be chained, + * as is the case when using a PHY that exposes an SFP module, on which an + * SFP transceiver that embeds a PHY is connected. + * + * This list can then be used by userspace to leverage individual PHY + * capabilities. + */ +#ifndef __PHY_LINK_TOPOLOGY_H +#define __PHY_LINK_TOPOLOGY_H + +#include +#include + +struct xarray; +struct phy_device; +struct sfp_bus; + +struct phy_link_topology { + struct xarray phys; + u32 next_phy_index; +}; + +struct phy_device_node { + enum phy_upstream upstream_type; + + union { + struct net_device *netdev; + struct phy_device *phydev; + } upstream; + + struct sfp_bus *parent_sfp_bus; + + struct phy_device *phy; +}; + +#if IS_ENABLED(CONFIG_PHYLIB) +int phy_link_topo_add_phy(struct net_device *dev, + struct phy_device *phy, + enum phy_upstream upt, void *upstream); + +void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy); + +static inline struct phy_device * +phy_link_topo_get_phy(struct net_device *dev, u32 phyindex) +{ + struct phy_link_topology *topo = dev->link_topo; + struct phy_device_node *pdn; + + if (!topo) + return NULL; + + pdn = xa_load(&topo->phys, phyindex); + if (pdn) + return pdn->phy; + + return NULL; +} + +#else +static inline int phy_link_topo_add_phy(struct net_device *dev, + struct phy_device *phy, + enum phy_upstream upt, void *upstream) +{ + return 0; +} + +static inline void phy_link_topo_del_phy(struct net_device *dev, + struct phy_device *phy) +{ +} + +static inline struct phy_device * +phy_link_topo_get_phy(struct net_device *dev, u32 phyindex) +{ + return NULL; +} +#endif + +#endif /* __PHY_LINK_TOPOLOGY_H */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 4a0a6e703483..c405ed63acfa 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2533,4 +2533,20 @@ struct ethtool_link_settings { * __u32 map_lp_advertising[link_mode_masks_nwords]; */ }; + +/** + * enum phy_upstream - Represents the upstream component a given PHY device + * is connected to, as in what is on the other end of the MII bus. Most PHYs + * will be attached to an Ethernet MAC controller, but in some cases, there's + * an intermediate PHY used as a media-converter, which will driver another + * MII interface as its output. + * @PHY_UPSTREAM_MAC: Upstream component is a MAC (a switch port, + * or ethernet controller) + * @PHY_UPSTREAM_PHY: Upstream component is a PHY (likely a media converter) + */ +enum phy_upstream { + PHY_UPSTREAM_MAC, + PHY_UPSTREAM_PHY, +}; + #endif /* _UAPI_LINUX_ETHTOOL_H */ -- cgit v1.2.3 From 4d76f115ab9121f4458330da962ae9ef5430e60b Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 21 Aug 2024 17:09:56 +0200 Subject: net: sfp: pass the phy_device when disconnecting an sfp module's PHY Pass the phy_device as a parameter to the sfp upstream .disconnect_phy operation. This is preparatory work to help track phy devices across a net_device's link. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Reviewed-by: Christophe Leroy Tested-by: Christophe Leroy Signed-off-by: David S. Miller --- include/linux/sfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index b14be59550e3..54abb4d22b2e 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -550,7 +550,7 @@ struct sfp_upstream_ops { void (*link_down)(void *priv); void (*link_up)(void *priv); int (*connect_phy)(void *priv, struct phy_device *); - void (*disconnect_phy)(void *priv); + void (*disconnect_phy)(void *priv, struct phy_device *); }; #if IS_ENABLED(CONFIG_SFP) -- cgit v1.2.3 From b2db6f4ace72e71fa09b8d1354f8ac9854140d74 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 21 Aug 2024 17:09:57 +0200 Subject: net: phy: add helpers to handle sfp phy connect/disconnect There are a few PHY drivers that can handle SFP modules through their sfp_upstream_ops. Introduce Phylib helpers to keep track of connected SFP PHYs in a netdevice's namespace, by adding the SFP PHY to the upstream PHY's netdev's namespace. By doing so, these SFP PHYs can be enumerated and exposed to users, which will be able to use their capabilities. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Reviewed-by: Christophe Leroy Tested-by: Christophe Leroy Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 3b0f4bf80650..a98bc91a0cde 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1781,6 +1781,8 @@ int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); int __phy_resume(struct phy_device *phydev); int phy_loopback(struct phy_device *phydev, bool enable); +int phy_sfp_connect_phy(void *upstream, struct phy_device *phy); +void phy_sfp_disconnect_phy(void *upstream, struct phy_device *phy); void phy_sfp_attach(void *upstream, struct sfp_bus *bus); void phy_sfp_detach(void *upstream, struct sfp_bus *bus); int phy_sfp_probe(struct phy_device *phydev, -- cgit v1.2.3 From 0a2f7de0f3b96c4a30e56d2c79f8d812f89599a1 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 21 Aug 2024 17:09:58 +0200 Subject: net: sfp: Add helper to return the SFP bus name Knowing the bus name is helpful when we want to expose the link topology to userspace, add a helper to return the SFP bus name. This call will always be made while holding the RTNL which ensures that the SFP driver won't unbind from the device. The returned pointer to the bus name will only be used while RTNL is held. Signed-off-by: Maxime Chevallier Suggested-by: "Russell King (Oracle)" Reviewed-by: Christophe Leroy Tested-by: Christophe Leroy Signed-off-by: David S. Miller --- include/linux/sfp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 54abb4d22b2e..60c65cea74f6 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -576,6 +576,7 @@ struct sfp_bus *sfp_bus_find_fwnode(const struct fwnode_handle *fwnode); int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, const struct sfp_upstream_ops *ops); void sfp_bus_del_upstream(struct sfp_bus *bus); +const char *sfp_get_name(struct sfp_bus *bus); #else static inline int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, @@ -654,6 +655,11 @@ static inline int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, static inline void sfp_bus_del_upstream(struct sfp_bus *bus) { } + +static inline const char *sfp_get_name(struct sfp_bus *bus) +{ + return NULL; +} #endif #endif -- cgit v1.2.3 From c15e065b46dc4e19837275b826c1960d55564abd Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 21 Aug 2024 17:09:59 +0200 Subject: net: ethtool: Allow passing a phy index for some commands Some netlink commands are target towards ethernet PHYs, to control some of their features. As there's several such commands, add the ability to pass a PHY index in the ethnl request, which will populate the generic ethnl_req_info with the passed phy_index. Add a helper that netlink command handlers need to use to grab the targeted PHY from the req_info. This helper needs to hold rtnl_lock() while interacting with the PHY, as it may be removed at any point. Signed-off-by: Maxime Chevallier Reviewed-by: Christophe Leroy Tested-by: Christophe Leroy Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 9074fa309bd6..49d1f9220fde 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -134,6 +134,7 @@ enum { ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ ETHTOOL_A_HEADER_DEV_NAME, /* string */ ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ + ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ /* add new constants above here */ __ETHTOOL_A_HEADER_CNT, -- cgit v1.2.3 From 17194be4c8e1e82d8b484e58cdcb495c0714d1fd Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 21 Aug 2024 17:10:01 +0200 Subject: net: ethtool: Introduce a command to list PHYs on an interface As we have the ability to track the PHYs connected to a net_device through the link_topology, we can expose this list to userspace. This allows userspace to use these identifiers for phy-specific commands and take the decision of which PHY to target by knowing the link topology. Add PHY_GET and PHY_DUMP, which can be a filtered DUMP operation to list devices on only one interface. Signed-off-by: Maxime Chevallier Reviewed-by: Christophe Leroy Tested-by: Christophe Leroy Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 49d1f9220fde..45d8bcdea056 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -58,6 +58,7 @@ enum { ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, ETHTOOL_MSG_MODULE_FW_FLASH_ACT, + ETHTOOL_MSG_PHY_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -111,6 +112,8 @@ enum { ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, ETHTOOL_MSG_MODULE_FW_FLASH_NTF, + ETHTOOL_MSG_PHY_GET_REPLY, + ETHTOOL_MSG_PHY_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -1055,6 +1058,22 @@ enum { ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1) }; +enum { + ETHTOOL_A_PHY_UNSPEC, + ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PHY_INDEX, /* u32 */ + ETHTOOL_A_PHY_DRVNAME, /* string */ + ETHTOOL_A_PHY_NAME, /* string */ + ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u32 */ + ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */ + ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */ + ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */ + + /* add new constants above here */ + __ETHTOOL_A_PHY_CNT, + ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3 From c579286a514d88a3f0d3bdabfd4d88737b33cb17 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 19 Aug 2024 18:31:33 +0200 Subject: thermal: core: Drop unused bind/unbind functions and callbacks There are no more callers of thermal_zone_bind_cooling_device() and thermal_zone_unbind_cooling_device(), so drop them along with all of the corresponding headers, code and documentation. Moreover, because the .bind() and .unbind() thermal zone callbacks would only be used when the above functions, respectively, were called, drop them as well along with all of the code related to them. Signed-off-by: Rafael J. Wysocki Reviewed-by: Zhang Rui Acked-by: Huisong Li Reviewed-by: Daniel Lezcano Link: https://patch.msgid.link/4251116.1IzOArtZ34@rjwysocki.net --- include/linux/thermal.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 6599a26847f7..25ea8fe2313e 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -92,10 +92,6 @@ struct cooling_spec { }; struct thermal_zone_device_ops { - int (*bind) (struct thermal_zone_device *, - struct thermal_cooling_device *); - int (*unbind) (struct thermal_zone_device *, - struct thermal_cooling_device *); bool (*should_bind) (struct thermal_zone_device *, const struct thermal_trip *, struct thermal_cooling_device *, @@ -247,12 +243,6 @@ const char *thermal_zone_device_type(struct thermal_zone_device *tzd); int thermal_zone_device_id(struct thermal_zone_device *tzd); struct device *thermal_zone_device(struct thermal_zone_device *tzd); -int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, - struct thermal_cooling_device *, - unsigned long, unsigned long, - unsigned int); -int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, - struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *, enum thermal_notify_event); -- cgit v1.2.3 From 4b570ac2eb54f66ff64f2864be6303b8d67cc7f9 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 22 Aug 2024 09:33:55 +0200 Subject: drm/rect: Add drm_rect_overlap() Check if two rectangles overlap. It's a bit similar to drm_rect_intersect() but this won't modify the rectangle. Simplifies a bit drm_panic. Signed-off-by: Jocelyn Falempe Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240822073852.562286-3-jfalempe@redhat.com --- include/drm/drm_rect.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/drm/drm_rect.h b/include/drm/drm_rect.h index 73fcb899a01d..46f09cf68458 100644 --- a/include/drm/drm_rect.h +++ b/include/drm/drm_rect.h @@ -238,6 +238,21 @@ static inline void drm_rect_fp_to_int(struct drm_rect *dst, drm_rect_height(src) >> 16); } +/** + * drm_rect_overlap - Check if two rectangles overlap + * @a: first rectangle + * @b: second rectangle + * + * RETURNS: + * %true if the rectangles overlap, %false otherwise. + */ +static inline bool drm_rect_overlap(const struct drm_rect *a, + const struct drm_rect *b) +{ + return (a->x2 > b->x1 && b->x2 > a->x1 && + a->y2 > b->y1 && b->y2 > a->y1); +} + bool drm_rect_intersect(struct drm_rect *r, const struct drm_rect *clip); bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst, const struct drm_rect *clip); -- cgit v1.2.3 From 4ec2caab775606c4bf9d0f026f8942a0b33e2255 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 23 Aug 2024 00:41:25 +0300 Subject: media: v4l2-mc: Mark v4l2_pipeline_link_notify() as deprecated Commit b97213a41140 ("media: v4l2-mc: Make v4l2_pipeline_pm_{get,put} deprecated") marked the v4l2_pipeline_pm_get() and v4l2_pipeline_pm_put() functions as deprecated, but forgot to address the related v4l2_pipeline_link_notify() function similarly. Fix it. Signed-off-by: Laurent Pinchart Acked-by: Sakari Ailus Link: https://lore.kernel.org/r/20240822214125.3161-1-laurent.pinchart+renesas@ideasonboard.com Signed-off-by: Laurent Pinchart --- include/media/v4l2-mc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-mc.h b/include/media/v4l2-mc.h index ed0a44b6eada..1837c9fd78cf 100644 --- a/include/media/v4l2-mc.h +++ b/include/media/v4l2-mc.h @@ -178,6 +178,9 @@ void v4l2_pipeline_pm_put(struct media_entity *entity); * @flags: New link flags that will be applied * @notification: The link's state change notification type (MEDIA_DEV_NOTIFY_*) * + * THIS FUNCTION IS DEPRECATED. DO NOT USE IN NEW DRIVERS. USE RUNTIME PM + * ON SUB-DEVICE DRIVERS INSTEAD. + * * React to link management on powered pipelines by updating the use count of * all entities in the source and sink sides of the link. Entities are powered * on or off accordingly. The use of this function should be paired -- cgit v1.2.3 From ccce71013406a0a3c81850dab940f07b112349d3 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 14 Aug 2024 14:21:18 +0200 Subject: mtd: rawnand: davinci: make platform_data private There are no longer any users of the platform data for davinci rawnand in board files. We can remove the public pdata headers and move the structures that are still used into the driver compilation unit while removing the rest. Signed-off-by: Bartosz Golaszewski Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240814122120.13975-1-brgl@bgdev.pl --- include/linux/platform_data/mtd-davinci-aemif.h | 36 ---------- include/linux/platform_data/mtd-davinci.h | 88 ------------------------- 2 files changed, 124 deletions(-) delete mode 100644 include/linux/platform_data/mtd-davinci-aemif.h delete mode 100644 include/linux/platform_data/mtd-davinci.h (limited to 'include') diff --git a/include/linux/platform_data/mtd-davinci-aemif.h b/include/linux/platform_data/mtd-davinci-aemif.h deleted file mode 100644 index a49826214a39..000000000000 --- a/include/linux/platform_data/mtd-davinci-aemif.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * TI DaVinci AEMIF support - * - * Copyright 2010 (C) Texas Instruments, Inc. https://www.ti.com/ - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ -#ifndef _MACH_DAVINCI_AEMIF_H -#define _MACH_DAVINCI_AEMIF_H - -#include - -#define NRCSR_OFFSET 0x00 -#define AWCCR_OFFSET 0x04 -#define A1CR_OFFSET 0x10 - -#define ACR_ASIZE_MASK 0x3 -#define ACR_EW_MASK BIT(30) -#define ACR_SS_MASK BIT(31) - -/* All timings in nanoseconds */ -struct davinci_aemif_timing { - u8 wsetup; - u8 wstrobe; - u8 whold; - - u8 rsetup; - u8 rstrobe; - u8 rhold; - - u8 ta; -}; - -#endif diff --git a/include/linux/platform_data/mtd-davinci.h b/include/linux/platform_data/mtd-davinci.h deleted file mode 100644 index dd474dd44848..000000000000 --- a/include/linux/platform_data/mtd-davinci.h +++ /dev/null @@ -1,88 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * mach-davinci/nand.h - * - * Copyright © 2006 Texas Instruments. - * - * Ported to 2.6.23 Copyright © 2008 by - * Sander Huijsen - * Troy Kisky - * Dirk Behme - * - * -------------------------------------------------------------------------- - */ - -#ifndef __ARCH_ARM_DAVINCI_NAND_H -#define __ARCH_ARM_DAVINCI_NAND_H - -#include - -#define NANDFCR_OFFSET 0x60 -#define NANDFSR_OFFSET 0x64 -#define NANDF1ECC_OFFSET 0x70 - -/* 4-bit ECC syndrome registers */ -#define NAND_4BIT_ECC_LOAD_OFFSET 0xbc -#define NAND_4BIT_ECC1_OFFSET 0xc0 -#define NAND_4BIT_ECC2_OFFSET 0xc4 -#define NAND_4BIT_ECC3_OFFSET 0xc8 -#define NAND_4BIT_ECC4_OFFSET 0xcc -#define NAND_ERR_ADD1_OFFSET 0xd0 -#define NAND_ERR_ADD2_OFFSET 0xd4 -#define NAND_ERR_ERRVAL1_OFFSET 0xd8 -#define NAND_ERR_ERRVAL2_OFFSET 0xdc - -/* NOTE: boards don't need to use these address bits - * for ALE/CLE unless they support booting from NAND. - * They're used unless platform data overrides them. - */ -#define MASK_ALE 0x08 -#define MASK_CLE 0x10 - -struct davinci_nand_pdata { /* platform_data */ - uint32_t mask_ale; - uint32_t mask_cle; - - /* - * 0-indexed chip-select number of the asynchronous - * interface to which the NAND device has been connected. - * - * So, if you have NAND connected to CS3 of DA850, you - * will pass '1' here. Since the asynchronous interface - * on DA850 starts from CS2. - */ - uint32_t core_chipsel; - - /* for packages using two chipselects */ - uint32_t mask_chipsel; - - /* board's default static partition info */ - struct mtd_partition *parts; - unsigned nr_parts; - - /* none == NAND_ECC_ENGINE_TYPE_NONE (strongly *not* advised!!) - * soft == NAND_ECC_ENGINE_TYPE_SOFT - * else == NAND_ECC_ENGINE_TYPE_ON_HOST, according to ecc_bits - * - * All DaVinci-family chips support 1-bit hardware ECC. - * Newer ones also support 4-bit ECC, but are awkward - * using it with large page chips. - */ - enum nand_ecc_engine_type engine_type; - enum nand_ecc_placement ecc_placement; - u8 ecc_bits; - - /* e.g. NAND_BUSWIDTH_16 */ - unsigned options; - /* e.g. NAND_BBT_USE_FLASH */ - unsigned bbt_options; - - /* Main and mirror bbt descriptor overrides */ - struct nand_bbt_descr *bbt_td; - struct nand_bbt_descr *bbt_md; - - /* Access timings */ - struct davinci_aemif_timing *timing; -}; - -#endif /* __ARCH_ARM_DAVINCI_NAND_H */ -- cgit v1.2.3 From 8a48281cfa7f380707d42b649acda3ea36348697 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 23 Aug 2024 15:42:01 +0800 Subject: PCI: Make pci_bus_type constant Since commit d492cc2573a0 ("driver core: device.h: make struct bus_type a const *"), the driver core can properly handle constant struct bus_type, move the pci_bus_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Link: https://lore.kernel.org/r/20240823074202.139265-1-kunwu.chan@linux.dev Suggested-by: Greg Kroah-Hartman Signed-off-by: Kunwu Chan Signed-off-by: Bjorn Helgaas Cc: Greg Kroah-Hartman --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4cf89a4b4cbc..197292b336a5 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1098,7 +1098,7 @@ enum pcie_bus_config_types { extern enum pcie_bus_config_types pcie_bus_config; -extern struct bus_type pci_bus_type; +extern const struct bus_type pci_bus_type; /* Do NOT directly access these two variables, unless you are arch-specific PCI * code, or PCI core code. */ -- cgit v1.2.3 From 9246b487ab3c3b5993aae7552b7a4c541cc14a49 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Fri, 23 Aug 2024 17:57:08 +0800 Subject: PCI: Add function 0 DMA alias quirk for Glenfly Arise chip Add DMA support for audio function of Glenfly Arise chip, which uses Requester ID of function 0. Link: https://lore.kernel.org/r/CA2BBD087345B6D1+20240823095708.3237375-1-wangyuli@uniontech.com Signed-off-by: SiyuLi Signed-off-by: WangYuli [bhelgaas: lower-case hex to match local code, drop unused Device IDs] Signed-off-by: Bjorn Helgaas Reviewed-by: Takashi Iwai --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e388c8b1cbc2..2c94d4004dd5 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2661,6 +2661,8 @@ #define PCI_DEVICE_ID_DCI_PCCOM8 0x0002 #define PCI_DEVICE_ID_DCI_PCCOM2 0x0004 +#define PCI_VENDOR_ID_GLENFLY 0x6766 + #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 #define PCI_DEVICE_ID_INTEL_HDA_CML_LP 0x02c8 -- cgit v1.2.3 From e5b9032b1b8e0ecc5b71451703f04b4e13ea42f8 Mon Sep 17 00:00:00 2001 From: Adam Skladowski Date: Tue, 9 Jul 2024 12:22:46 +0200 Subject: dt-bindings: interconnect: qcom: Add Qualcomm MSM8976 NoC Add bindings for Qualcomm MSM8976 Network-On-Chip interconnect devices. Signed-off-by: Adam Skladowski Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240709102728.15349-2-a39.skl@gmail.com Signed-off-by: Georgi Djakov --- include/dt-bindings/interconnect/qcom,msm8976.h | 97 +++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 include/dt-bindings/interconnect/qcom,msm8976.h (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,msm8976.h b/include/dt-bindings/interconnect/qcom,msm8976.h new file mode 100644 index 000000000000..4ea90f22320e --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,msm8976.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Qualcomm MSM8976 interconnect IDs + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_MSM8976_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_MSM8976_H + +/* BIMC fabric */ +#define MAS_APPS_PROC 0 +#define MAS_SMMNOC_BIMC 1 +#define MAS_SNOC_BIMC 2 +#define MAS_TCU_0 3 +#define SLV_EBI 4 +#define SLV_BIMC_SNOC 5 + +/* PCNOC fabric */ +#define MAS_USB_HS2 0 +#define MAS_BLSP_1 1 +#define MAS_USB_HS1 2 +#define MAS_BLSP_2 3 +#define MAS_CRYPTO 4 +#define MAS_SDCC_1 5 +#define MAS_SDCC_2 6 +#define MAS_SDCC_3 7 +#define MAS_SNOC_PCNOC 8 +#define MAS_LPASS_AHB 9 +#define MAS_SPDM 10 +#define MAS_DEHR 11 +#define MAS_XM_USB_HS1 12 +#define PCNOC_M_0 13 +#define PCNOC_M_1 14 +#define PCNOC_INT_0 15 +#define PCNOC_INT_1 16 +#define PCNOC_INT_2 17 +#define PCNOC_S_1 18 +#define PCNOC_S_2 19 +#define PCNOC_S_3 20 +#define PCNOC_S_4 21 +#define PCNOC_S_8 22 +#define PCNOC_S_9 23 +#define SLV_TCSR 24 +#define SLV_TLMM 25 +#define SLV_CRYPTO_0_CFG 26 +#define SLV_MESSAGE_RAM 27 +#define SLV_PDM 28 +#define SLV_PRNG 29 +#define SLV_PMIC_ARB 30 +#define SLV_SNOC_CFG 31 +#define SLV_DCC_CFG 32 +#define SLV_CAMERA_SS_CFG 33 +#define SLV_DISP_SS_CFG 34 +#define SLV_VENUS_CFG 35 +#define SLV_SDCC_1 36 +#define SLV_BLSP_1 37 +#define SLV_USB_HS 38 +#define SLV_SDCC_3 39 +#define SLV_SDCC_2 40 +#define SLV_GPU_CFG 41 +#define SLV_USB_HS2 42 +#define SLV_BLSP_2 43 +#define SLV_PCNOC_SNOC 44 + +/* SNOC fabric */ +#define MAS_QDSS_BAM 0 +#define MAS_BIMC_SNOC 1 +#define MAS_PCNOC_SNOC 2 +#define MAS_QDSS_ETR 3 +#define MAS_LPASS_PROC 4 +#define MAS_IPA 5 +#define QDSS_INT 6 +#define SNOC_INT_0 7 +#define SNOC_INT_1 8 +#define SNOC_INT_2 9 +#define SLV_KPSS_AHB 10 +#define SLV_SNOC_BIMC 11 +#define SLV_IMEM 12 +#define SLV_SNOC_PCNOC 13 +#define SLV_QDSS_STM 14 +#define SLV_CATS_0 15 +#define SLV_CATS_1 16 +#define SLV_LPASS 17 + +/* SNOC-MM fabric */ +#define MAS_JPEG 0 +#define MAS_OXILI 1 +#define MAS_MDP0 2 +#define MAS_MDP1 3 +#define MAS_VENUS_0 4 +#define MAS_VENUS_1 5 +#define MAS_VFE_0 6 +#define MAS_VFE_1 7 +#define MAS_CPP 8 +#define MM_INT_0 9 +#define SLV_SMMNOC_BIMC 10 + +#endif /* __DT_BINDINGS_INTERCONNECT_QCOM_MSM8976_H */ -- cgit v1.2.3 From 4937dc0ffc8edc774b21882d557c032f3693f40d Mon Sep 17 00:00:00 2001 From: Adam Skladowski Date: Tue, 9 Jul 2024 12:22:48 +0200 Subject: dt-bindings: interconnect: qcom: Add Qualcomm MSM8937 NoC Add bindings for Qualcomm MSM8937 Network-On-Chip interconnect devices. Signed-off-by: Adam Skladowski Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240709102728.15349-4-a39.skl@gmail.com Signed-off-by: Georgi Djakov --- include/dt-bindings/interconnect/qcom,msm8937.h | 93 +++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 include/dt-bindings/interconnect/qcom,msm8937.h (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,msm8937.h b/include/dt-bindings/interconnect/qcom,msm8937.h new file mode 100644 index 000000000000..98b8a4637aab --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,msm8937.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Qualcomm MSM8937 interconnect IDs + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_MSM8937_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_MSM8937_H + +/* BIMC fabric */ +#define MAS_APPS_PROC 0 +#define MAS_OXILI 1 +#define MAS_SNOC_BIMC_0 2 +#define MAS_SNOC_BIMC_2 3 +#define MAS_SNOC_BIMC_1 4 +#define MAS_TCU_0 5 +#define SLV_EBI 6 +#define SLV_BIMC_SNOC 7 + +/* PCNOC fabric */ +#define MAS_SPDM 0 +#define MAS_BLSP_1 1 +#define MAS_BLSP_2 2 +#define MAS_USB_HS1 3 +#define MAS_XI_USB_HS1 4 +#define MAS_CRYPTO 5 +#define MAS_SDCC_1 6 +#define MAS_SDCC_2 7 +#define MAS_SNOC_PCNOC 8 +#define PCNOC_M_0 9 +#define PCNOC_M_1 10 +#define PCNOC_INT_0 11 +#define PCNOC_INT_1 12 +#define PCNOC_INT_2 13 +#define PCNOC_INT_3 14 +#define PCNOC_S_0 15 +#define PCNOC_S_1 16 +#define PCNOC_S_2 17 +#define PCNOC_S_3 18 +#define PCNOC_S_4 19 +#define PCNOC_S_6 20 +#define PCNOC_S_7 21 +#define PCNOC_S_8 22 +#define SLV_SDCC_2 23 +#define SLV_SPDM 24 +#define SLV_PDM 25 +#define SLV_PRNG 26 +#define SLV_TCSR 27 +#define SLV_SNOC_CFG 28 +#define SLV_MESSAGE_RAM 29 +#define SLV_CAMERA_SS_CFG 30 +#define SLV_DISP_SS_CFG 31 +#define SLV_VENUS_CFG 32 +#define SLV_GPU_CFG 33 +#define SLV_TLMM 34 +#define SLV_BLSP_1 35 +#define SLV_BLSP_2 36 +#define SLV_PMIC_ARB 37 +#define SLV_SDCC_1 38 +#define SLV_CRYPTO_0_CFG 39 +#define SLV_USB_HS 40 +#define SLV_TCU 41 +#define SLV_PCNOC_SNOC 42 + +/* SNOC fabric */ +#define MAS_QDSS_BAM 0 +#define MAS_BIMC_SNOC 1 +#define MAS_PCNOC_SNOC 2 +#define MAS_QDSS_ETR 3 +#define QDSS_INT 4 +#define SNOC_INT_0 5 +#define SNOC_INT_1 6 +#define SNOC_INT_2 7 +#define SLV_KPSS_AHB 8 +#define SLV_WCSS 9 +#define SLV_SNOC_BIMC_1 10 +#define SLV_IMEM 11 +#define SLV_SNOC_PCNOC 12 +#define SLV_QDSS_STM 13 +#define SLV_CATS_1 14 +#define SLV_LPASS 15 + +/* SNOC-MM fabric */ +#define MAS_JPEG 0 +#define MAS_MDP 1 +#define MAS_VENUS 2 +#define MAS_VFE0 3 +#define MAS_VFE1 4 +#define MAS_CPP 5 +#define SLV_SNOC_BIMC_0 6 +#define SLV_SNOC_BIMC_2 7 +#define SLV_CATS_0 8 + +#endif /* __DT_BINDINGS_INTERCONNECT_QCOM_MSM8937_H */ -- cgit v1.2.3 From d59232afb0344e33e9399f308d9b4a03876e7676 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 13 Aug 2024 21:24:22 +0000 Subject: bpf: Rename ARG_PTR_TO_KPTR -> ARG_KPTR_XCHG_DEST ARG_PTR_TO_KPTR is currently only used by the bpf_kptr_xchg helper. Although it limits reg types for that helper's first arg to PTR_TO_MAP_VALUE, any arbitrary mapval won't do: further custom verification logic ensures that the mapval reg being xchgd-into is pointing to a kptr field. If this is not the case, it's not safe to xchg into that reg's pointee. Let's rename the bpf_arg_type to more accurately describe the fairly specific expectations that this arg type encodes. This is a nonfunctional change. Acked-by: Martin KaFai Lau Signed-off-by: Dave Marchevsky Signed-off-by: Amery Hung Link: https://lore.kernel.org/r/20240813212424.2871455-4-amery.hung@bytedance.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 00dc4dd28cbd..dc63083f76b7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -744,7 +744,7 @@ enum bpf_arg_type { ARG_PTR_TO_STACK, /* pointer to stack */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ - ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ + ARG_KPTR_XCHG_DEST, /* pointer to destination that kptrs are bpf_kptr_xchg'd into */ ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */ __BPF_ARG_TYPE_MAX, -- cgit v1.2.3 From b0966c724584a5a9fd7fb529de19807c31f27a45 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 13 Aug 2024 21:24:23 +0000 Subject: bpf: Support bpf_kptr_xchg into local kptr Currently, users can only stash kptr into map values with bpf_kptr_xchg(). This patch further supports stashing kptr into local kptr by adding local kptr as a valid destination type. When stashing into local kptr, btf_record in program BTF is used instead of btf_record in map to search for the btf_field of the local kptr. The local kptr specific checks in check_reg_type() only apply when the source argument of bpf_kptr_xchg() is local kptr. Therefore, we make the scope of the check explicit as the destination now can also be local kptr. Acked-by: Martin KaFai Lau Signed-off-by: Dave Marchevsky Signed-off-by: Amery Hung Link: https://lore.kernel.org/r/20240813212424.2871455-5-amery.hung@bytedance.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 35bcf52dbc65..e2629457d72d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5519,11 +5519,12 @@ union bpf_attr { * **-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. * - * void *bpf_kptr_xchg(void *map_value, void *ptr) + * void *bpf_kptr_xchg(void *dst, void *ptr) * Description - * Exchange kptr at pointer *map_value* with *ptr*, and return the - * old value. *ptr* can be NULL, otherwise it must be a referenced - * pointer which will be released when this helper is called. + * Exchange kptr at pointer *dst* with *ptr*, and return the old value. + * *dst* can be map value or local kptr. *ptr* can be NULL, otherwise + * it must be a referenced pointer which will be released when this helper + * is called. * Return * The old value of kptr (which can be NULL). The returned pointer * if not NULL, is a reference which must be released using its -- cgit v1.2.3 From 9e83dd3ebb14fadccb936308b7b101c75da76324 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 23 Aug 2024 18:39:34 +0800 Subject: irqchip/loongson-eiointc: Rename CPUHP_AP_IRQ_LOONGARCH_STARTING Rename CPUHP_AP_IRQ_LOONGARCH_STARTING to CPUHP_AP_IRQ_EIOINTC_STARTING because the upcoming AVECINTC irqchip driver will introduce a new state and so both are clearly identifiable. Signed-off-by: Huacai Chen Signed-off-by: Tianyang Zhang Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240823103936.25092-3-zhangtianyang@loongson.cn --- include/linux/cpuhotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 51ba681b915a..e49807f7805c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -145,7 +145,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_ARMADA_XP_STARTING, CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_IRQ_MIPS_GIC_STARTING, - CPUHP_AP_IRQ_LOONGARCH_STARTING, + CPUHP_AP_IRQ_EIOINTC_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_IRQ_RISCV_IMSIC_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, -- cgit v1.2.3 From ae16f05c928a1336d5d9d19fd805d7bf29c3f0c8 Mon Sep 17 00:00:00 2001 From: Tianyang Zhang Date: Fri, 23 Aug 2024 18:43:37 +0800 Subject: irqchip/loongarch-avec: Add AVEC irqchip support Introduce the advanced extended interrupt controllers (AVECINTC). This feature will allow each core to have 256 independent interrupt vectors and MSI interrupts can be independently routed to any vector on any CPU. The whole topology of irqchips in LoongArch machines looks like this if AVECINTC is supported: +-----+ +-----------------------+ +-------+ | IPI | --> | CPUINTC | <-- | Timer | +-----+ +-----------------------+ +-------+ ^ ^ ^ | | | +---------+ +----------+ +---------+ +-------+ | EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs | +---------+ +----------+ +---------+ +-------+ ^ ^ | | +---------+ +---------+ | PCH-PIC | | PCH-MSI | +---------+ +---------+ ^ ^ ^ | | | +---------+ +---------+ +---------+ | Devices | | PCH-LPC | | Devices | +---------+ +---------+ +---------+ ^ | +---------+ | Devices | +---------+ Co-developed-by: Jianmin Lv Signed-off-by: Jianmin Lv Co-developed-by: Liupu Wang Signed-off-by: Liupu Wang Co-developed-by: Huacai Chen Signed-off-by: Huacai Chen Signed-off-by: Tianyang Zhang Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240823104337.25577-2-zhangtianyang@loongson.cn --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index e49807f7805c..55a726d317d4 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -146,6 +146,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_IRQ_MIPS_GIC_STARTING, CPUHP_AP_IRQ_EIOINTC_STARTING, + CPUHP_AP_IRQ_AVECINTC_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_IRQ_RISCV_IMSIC_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, -- cgit v1.2.3 From 17e28a9aeae40d2de3c1ea3b94819ed94bfd6392 Mon Sep 17 00:00:00 2001 From: Costa Shulyupin Date: Thu, 22 Aug 2024 15:31:58 +0300 Subject: genirq: Fix typo in struct comment Remove redundant "e" in "assign(e)ments". Signed-off-by: Costa Shulyupin Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240822123205.2186221-1-costa.shul@redhat.com --- include/linux/interrupt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 694de61e0b38..457151f9f263 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -276,7 +276,7 @@ struct irq_affinity_notify { #define IRQ_AFFINITY_MAX_SETS 4 /** - * struct irq_affinity - Description for automatic irq affinity assignements + * struct irq_affinity - Description for automatic irq affinity assignments * @pre_vectors: Don't apply affinity to @pre_vectors at beginning of * the MSI(-X) vector space * @post_vectors: Don't apply affinity to @post_vectors at end of -- cgit v1.2.3 From b8ca67b1143e4d1e2adc20790c2c46e1498f54be Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 4 Aug 2024 08:40:10 +0300 Subject: dt-bindings: interconnect: qcom,sm8350: drop DISP nodes Vendor msm-5.x kernels declared duplicate indices for some of display nodes to be used by separate display RSC and BCM voters. As it is not clear how this separate BCM should be modelled upstream and the device trees do not use these indices, drop them for now. Signed-off-by: Dmitry Baryshkov Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240804-sm8350-fixes-v1-6-1149dd8399fe@linaro.org Signed-off-by: Georgi Djakov --- include/dt-bindings/interconnect/qcom,sm8350.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,sm8350.h b/include/dt-bindings/interconnect/qcom,sm8350.h index c7f7ed315aeb..2282f93607bc 100644 --- a/include/dt-bindings/interconnect/qcom,sm8350.h +++ b/include/dt-bindings/interconnect/qcom,sm8350.h @@ -119,9 +119,6 @@ #define SLAVE_SERVICE_GEM_NOC_1 16 #define SLAVE_SERVICE_GEM_NOC_2 17 #define SLAVE_SERVICE_GEM_NOC 18 -#define MASTER_MNOC_HF_MEM_NOC_DISP 19 -#define MASTER_MNOC_SF_MEM_NOC_DISP 20 -#define SLAVE_LLCC_DISP 21 #define MASTER_CNOC_LPASS_AG_NOC 0 #define SLAVE_LPASS_CORE_CFG 1 @@ -133,8 +130,6 @@ #define MASTER_LLCC 0 #define SLAVE_EBI1 1 -#define MASTER_LLCC_DISP 2 -#define SLAVE_EBI1_DISP 3 #define MASTER_CAMNOC_HF 0 #define MASTER_CAMNOC_ICP 1 @@ -149,11 +144,6 @@ #define SLAVE_MNOC_HF_MEM_NOC 10 #define SLAVE_MNOC_SF_MEM_NOC 11 #define SLAVE_SERVICE_MNOC 12 -#define MASTER_MDP0_DISP 13 -#define MASTER_MDP1_DISP 14 -#define MASTER_ROTATOR_DISP 15 -#define SLAVE_MNOC_HF_MEM_NOC_DISP 16 -#define SLAVE_MNOC_SF_MEM_NOC_DISP 17 #define MASTER_CDSP_NOC_CFG 0 #define MASTER_CDSP_PROC 1 -- cgit v1.2.3 From 65ab5ac4df012388481d0414fcac1d5ac1721fb3 Mon Sep 17 00:00:00 2001 From: Jordan Rome Date: Fri, 23 Aug 2024 12:51:00 -0700 Subject: bpf: Add bpf_copy_from_user_str kfunc This adds a kfunc wrapper around strncpy_from_user, which can be called from sleepable BPF programs. This matches the non-sleepable 'bpf_probe_read_user_str' helper except it includes an additional 'flags' param, which allows consumers to clear the entire destination buffer on success or failure. Signed-off-by: Jordan Rome Link: https://lore.kernel.org/r/20240823195101.3621028-1-linux@jordanrome.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e2629457d72d..c3a5728db115 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7513,4 +7513,13 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +/* + * Flags to control BPF kfunc behaviour. + * - BPF_F_PAD_ZEROS: Pad destination buffer with zeros. (See the respective + * helper documentation for details.) + */ +enum bpf_kfunc_flags { + BPF_F_PAD_ZEROS = (1ULL << 0), +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From a54ad727f74559f7c3dfcfd2a63d0ce7683a82e8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 22 Aug 2024 15:04:32 +0200 Subject: xfrm: policy: remove remaining use of inexact list No consumers anymore, remove it. After this, insertion of policies no longer require list walk of all inexact policies but only those that are reachable via the candidate sets. This gives almost linear insertion speeds provided the inserted policies are for non-overlapping networks. Before: Inserted 1000 policies in 70 ms Inserted 10000 policies in 1155 ms Inserted 100000 policies in 216848 ms After: Inserted 1000 policies in 56 ms Inserted 10000 policies in 478 ms Inserted 100000 policies in 4580 ms Insertion of 1m entries takes about ~40s after this change on my test vm. Cc: Noel Kuntze Cc: Tobias Brunner Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f7244ac4fa08..1fa2da22a49e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -555,7 +555,6 @@ struct xfrm_policy { u16 family; struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; - struct hlist_node bydst_inexact_list; struct rcu_head rcu; struct xfrm_dev_offload xdo; -- cgit v1.2.3 From 3c44d31cb34ce4eb8311a2e73634d57702948230 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 17 Aug 2024 14:58:35 +0800 Subject: crypto: simd - Do not call crypto_alloc_tfm during registration Algorithm registration is usually carried out during module init, where as little work as possible should be carried out. The SIMD code violated this rule by allocating a tfm, this then triggers a full test of the algorithm which may dead-lock in certain cases. SIMD is only allocating the tfm to get at the alg object, which is in fact already available as it is what we are registering. Use that directly and remove the crypto_alloc_tfm call. Also remove some obsolete and unused SIMD API. Signed-off-by: Herbert Xu --- include/crypto/internal/simd.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h index d2316242a988..be97b97a75dd 100644 --- a/include/crypto/internal/simd.h +++ b/include/crypto/internal/simd.h @@ -14,11 +14,10 @@ struct simd_skcipher_alg; struct skcipher_alg; -struct simd_skcipher_alg *simd_skcipher_create_compat(const char *algname, +struct simd_skcipher_alg *simd_skcipher_create_compat(struct skcipher_alg *ialg, + const char *algname, const char *drvname, const char *basename); -struct simd_skcipher_alg *simd_skcipher_create(const char *algname, - const char *basename); void simd_skcipher_free(struct simd_skcipher_alg *alg); int simd_register_skciphers_compat(struct skcipher_alg *algs, int count, @@ -32,13 +31,6 @@ void simd_unregister_skciphers(struct skcipher_alg *algs, int count, struct simd_aead_alg; struct aead_alg; -struct simd_aead_alg *simd_aead_create_compat(const char *algname, - const char *drvname, - const char *basename); -struct simd_aead_alg *simd_aead_create(const char *algname, - const char *basename); -void simd_aead_free(struct simd_aead_alg *alg); - int simd_register_aeads_compat(struct aead_alg *algs, int count, struct simd_aead_alg **simd_algs); -- cgit v1.2.3 From 7d3aed652d090508990d245f9d80dcc481910d02 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 22 Aug 2024 05:51:54 +0000 Subject: net: refactor ->ndo_bpf calls into dev_xdp_propagate When net devices propagate xdp configurations to slave devices, we will need to perform a memory provider check to ensure we're not binding xdp to a device using unreadable netmem. Currently the ->ndo_bpf calls in a few places. Adding checks to all these places would not be ideal. Refactor all the ->ndo_bpf calls into one place where we can add this check in the future. Suggested-by: Jakub Kicinski Signed-off-by: Mina Almasry Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 289a6133769c..7dfa836d9dbf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3925,6 +3925,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u8 dev_xdp_prog_count(struct net_device *dev); +int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); -- cgit v1.2.3 From d29cb3726f03cdac7889f0109a7cb84f79e168a8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 7 Aug 2024 15:18:13 +0100 Subject: io_uring: add absolute mode wait timeouts In addition to current relative timeouts for the waiting loop, where the timespec argument specifies the maximum time it can wait for, add support for the absolute mode, with the value carrying a CLOCK_MONOTONIC absolute time until which we should return control back to the user. Suggested-by: Lewis Baker Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/4d5b74d67ada882590b2e42aa3aa7117bbf6b55f.1723039801.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index adc2524fd8e3..6a81f55fcd0d 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -507,6 +507,7 @@ struct io_cqring_offsets { #define IORING_ENTER_SQ_WAIT (1U << 2) #define IORING_ENTER_EXT_ARG (1U << 3) #define IORING_ENTER_REGISTERED_RING (1U << 4) +#define IORING_ENTER_ABS_TIMER (1U << 5) /* * Passed in for io_uring_setup(2). Copied back with updated info on success -- cgit v1.2.3 From 2b8e976b984278edbeab3251d370e76d237699f9 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 7 Aug 2024 15:18:14 +0100 Subject: io_uring: user registered clockid for wait timeouts Add a new registration opcode IORING_REGISTER_CLOCK, which allows the user to select which clock id it wants to use with CQ waiting timeouts. It only allows a subset of all posix clocks and currently supports CLOCK_MONOTONIC and CLOCK_BOOTTIME. Suggested-by: Lewis Baker Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/98f2bc8a3c36cdf8f0e6a275245e81e903459703.1723039801.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 +++ include/uapi/linux/io_uring.h | 7 +++++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 3315005df117..4b9ba523978d 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -239,6 +239,9 @@ struct io_ring_ctx { struct io_rings *rings; struct percpu_ref refs; + clockid_t clockid; + enum tk_offsets clock_offset; + enum task_work_notify_mode notify_method; unsigned sq_thread_idle; } ____cacheline_aligned_in_smp; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6a81f55fcd0d..7af716136df9 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -596,6 +596,8 @@ enum io_uring_register_op { IORING_REGISTER_NAPI = 27, IORING_UNREGISTER_NAPI = 28, + IORING_REGISTER_CLOCK = 29, + /* this goes last */ IORING_REGISTER_LAST, @@ -676,6 +678,11 @@ struct io_uring_restriction { __u32 resv2[3]; }; +struct io_uring_clock_register { + __u32 clockid; + __u32 __resv[3]; +}; + struct io_uring_buf { __u64 addr; __u32 len; -- cgit v1.2.3 From 7ed9e09e2d13d5d43385153bba4734cb0eafd7fd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 4 Jan 2024 10:46:30 -0700 Subject: io_uring: wire up min batch wake timeout Expose min_wait_usec in io_uring_getevents_arg, replacing the pad member that is currently in there. The value is in usecs, which is explained in the name as well. Note that if min_wait_usec and a normal timeout is used in conjunction, the normal timeout is still relative to the base time. For example, if min_wait_usec is set to 100 and the normal timeout is 1000, the max total time waited is still 1000. This also means that if the normal timeout is shorter than min_wait_usec, then only the min_wait_usec will take effect. See previous commit for an explanation of how this works. IORING_FEAT_MIN_TIMEOUT is added as a feature flag for this, as applications doing submit_and_wait_timeout() style operations will generally not see the -EINVAL from the wait side as they return the number of IOs submitted. Only if no IOs are submitted will the -EINVAL bubble back up to the application. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 7af716136df9..042eab793e26 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -543,6 +543,7 @@ struct io_uring_params { #define IORING_FEAT_LINKED_FILE (1U << 12) #define IORING_FEAT_REG_REG_RING (1U << 13) #define IORING_FEAT_RECVSEND_BUNDLE (1U << 14) +#define IORING_FEAT_MIN_TIMEOUT (1U << 15) /* * io_uring_register(2) opcodes and arguments @@ -766,7 +767,7 @@ enum io_uring_register_restriction_op { struct io_uring_getevents_arg { __u64 sigmask; __u32 sigmask_sz; - __u32 pad; + __u32 min_wait_usec; __u64 ts; }; -- cgit v1.2.3 From 6729c73103bd7a0e60b0c980b51b5434010b4502 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 23 Aug 2024 17:11:09 +0300 Subject: drm/ttm: fix kernel-doc typo for @trylock_only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit s/tryock_only/trylock_only/ Fixes: da966b82bf3d ("drm/ttm: Provide a generic LRU walker helper") Cc: Thomas Hellström Cc: Christian König Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240823141110.3431423-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/ttm/ttm_bo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index d1a732d56259..7294dde240fb 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -222,7 +222,7 @@ struct ttm_lru_walk { struct ttm_operation_ctx *ctx; /** @ticket: The struct ww_acquire_ctx if any. */ struct ww_acquire_ctx *ticket; - /** @tryock_only: Only use trylock for locking. */ + /** @trylock_only: Only use trylock for locking. */ bool trylock_only; }; -- cgit v1.2.3 From 64aa494de6fa7289ba9d1de4ba331a1aa8ae094f Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Fri, 2 Aug 2024 08:35:16 +0800 Subject: dt-bindings: pinctrl: Add pinctrl for Sophgo CV1800 series SoC. Add pinctrl support for Sophgo CV1800 series SoC. Signed-off-by: Inochi Amaoto Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/IA1PR20MB4953680DE7977CAD906DBDB4BBB32@IA1PR20MB4953.namprd20.prod.outlook.com Signed-off-by: Linus Walleij --- include/dt-bindings/pinctrl/pinctrl-cv1800b.h | 63 +++++++++++++ include/dt-bindings/pinctrl/pinctrl-cv1812h.h | 127 ++++++++++++++++++++++++++ include/dt-bindings/pinctrl/pinctrl-cv18xx.h | 19 ++++ include/dt-bindings/pinctrl/pinctrl-sg2000.h | 127 ++++++++++++++++++++++++++ include/dt-bindings/pinctrl/pinctrl-sg2002.h | 79 ++++++++++++++++ 5 files changed, 415 insertions(+) create mode 100644 include/dt-bindings/pinctrl/pinctrl-cv1800b.h create mode 100644 include/dt-bindings/pinctrl/pinctrl-cv1812h.h create mode 100644 include/dt-bindings/pinctrl/pinctrl-cv18xx.h create mode 100644 include/dt-bindings/pinctrl/pinctrl-sg2000.h create mode 100644 include/dt-bindings/pinctrl/pinctrl-sg2002.h (limited to 'include') diff --git a/include/dt-bindings/pinctrl/pinctrl-cv1800b.h b/include/dt-bindings/pinctrl/pinctrl-cv1800b.h new file mode 100644 index 000000000000..0593fc33d470 --- /dev/null +++ b/include/dt-bindings/pinctrl/pinctrl-cv1800b.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * Copyright (C) 2024 Inochi Amaoto + * + * This file is generated from vendor pinout definition. + */ + +#ifndef _DT_BINDINGS_PINCTRL_CV1800B_H +#define _DT_BINDINGS_PINCTRL_CV1800B_H + +#include + +#define PIN_AUD_AOUTR 1 +#define PIN_SD0_CLK 3 +#define PIN_SD0_CMD 4 +#define PIN_SD0_D0 5 +#define PIN_SD0_D1 7 +#define PIN_SD0_D2 8 +#define PIN_SD0_D3 9 +#define PIN_SD0_CD 11 +#define PIN_SD0_PWR_EN 12 +#define PIN_SPK_EN 14 +#define PIN_UART0_TX 15 +#define PIN_UART0_RX 16 +#define PIN_SPINOR_HOLD_X 17 +#define PIN_SPINOR_SCK 18 +#define PIN_SPINOR_MOSI 19 +#define PIN_SPINOR_WP_X 20 +#define PIN_SPINOR_MISO 21 +#define PIN_SPINOR_CS_X 22 +#define PIN_IIC0_SCL 23 +#define PIN_IIC0_SDA 24 +#define PIN_AUX0 25 +#define PIN_PWR_VBAT_DET 30 +#define PIN_PWR_SEQ2 31 +#define PIN_XTAL_XIN 33 +#define PIN_SD1_GPIO0 35 +#define PIN_SD1_GPIO1 36 +#define PIN_SD1_D3 38 +#define PIN_SD1_D2 39 +#define PIN_SD1_D1 40 +#define PIN_SD1_D0 41 +#define PIN_SD1_CMD 42 +#define PIN_SD1_CLK 43 +#define PIN_ADC1 44 +#define PIN_USB_VBUS_DET 45 +#define PIN_ETH_TXP 47 +#define PIN_ETH_TXM 48 +#define PIN_ETH_RXP 49 +#define PIN_ETH_RXM 50 +#define PIN_MIPIRX4N 56 +#define PIN_MIPIRX4P 57 +#define PIN_MIPIRX3N 58 +#define PIN_MIPIRX3P 59 +#define PIN_MIPIRX2N 60 +#define PIN_MIPIRX2P 61 +#define PIN_MIPIRX1N 62 +#define PIN_MIPIRX1P 63 +#define PIN_MIPIRX0N 64 +#define PIN_MIPIRX0P 65 +#define PIN_AUD_AINL_MIC 67 + +#endif /* _DT_BINDINGS_PINCTRL_CV1800B_H */ diff --git a/include/dt-bindings/pinctrl/pinctrl-cv1812h.h b/include/dt-bindings/pinctrl/pinctrl-cv1812h.h new file mode 100644 index 000000000000..2908de347919 --- /dev/null +++ b/include/dt-bindings/pinctrl/pinctrl-cv1812h.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * Copyright (C) 2024 Inochi Amaoto + * + * This file is generated from vendor pinout definition. + */ + +#ifndef _DT_BINDINGS_PINCTRL_CV1812H_H +#define _DT_BINDINGS_PINCTRL_CV1812H_H + +#include + +#define PINPOS(row, col) \ + ((((row) - 'A' + 1) << 8) + ((col) - 1)) + +#define PIN_MIPI_TXM4 PINPOS('A', 2) +#define PIN_MIPIRX0N PINPOS('A', 4) +#define PIN_MIPIRX3P PINPOS('A', 6) +#define PIN_MIPIRX4P PINPOS('A', 7) +#define PIN_VIVO_D2 PINPOS('A', 9) +#define PIN_VIVO_D3 PINPOS('A', 10) +#define PIN_VIVO_D10 PINPOS('A', 12) +#define PIN_USB_VBUS_DET PINPOS('A', 13) +#define PIN_MIPI_TXP3 PINPOS('B', 1) +#define PIN_MIPI_TXM3 PINPOS('B', 2) +#define PIN_MIPI_TXP4 PINPOS('B', 3) +#define PIN_MIPIRX0P PINPOS('B', 4) +#define PIN_MIPIRX1N PINPOS('B', 5) +#define PIN_MIPIRX2N PINPOS('B', 6) +#define PIN_MIPIRX4N PINPOS('B', 7) +#define PIN_MIPIRX5N PINPOS('B', 8) +#define PIN_VIVO_D1 PINPOS('B', 9) +#define PIN_VIVO_D5 PINPOS('B', 10) +#define PIN_VIVO_D7 PINPOS('B', 11) +#define PIN_VIVO_D9 PINPOS('B', 12) +#define PIN_USB_ID PINPOS('B', 13) +#define PIN_ETH_RXM PINPOS('B', 15) +#define PIN_MIPI_TXP2 PINPOS('C', 1) +#define PIN_MIPI_TXM2 PINPOS('C', 2) +#define PIN_CAM_PD0 PINPOS('C', 3) +#define PIN_CAM_MCLK0 PINPOS('C', 4) +#define PIN_MIPIRX1P PINPOS('C', 5) +#define PIN_MIPIRX2P PINPOS('C', 6) +#define PIN_MIPIRX3N PINPOS('C', 7) +#define PIN_MIPIRX5P PINPOS('C', 8) +#define PIN_VIVO_CLK PINPOS('C', 9) +#define PIN_VIVO_D6 PINPOS('C', 10) +#define PIN_VIVO_D8 PINPOS('C', 11) +#define PIN_USB_VBUS_EN PINPOS('C', 12) +#define PIN_ETH_RXP PINPOS('C', 14) +#define PIN_GPIO_RTX PINPOS('C', 15) +#define PIN_MIPI_TXP1 PINPOS('D', 1) +#define PIN_MIPI_TXM1 PINPOS('D', 2) +#define PIN_CAM_MCLK1 PINPOS('D', 3) +#define PIN_IIC3_SCL PINPOS('D', 4) +#define PIN_VIVO_D4 PINPOS('D', 10) +#define PIN_ETH_TXM PINPOS('D', 14) +#define PIN_ETH_TXP PINPOS('D', 15) +#define PIN_MIPI_TXP0 PINPOS('E', 1) +#define PIN_MIPI_TXM0 PINPOS('E', 2) +#define PIN_CAM_PD1 PINPOS('E', 4) +#define PIN_CAM_RST0 PINPOS('E', 5) +#define PIN_VIVO_D0 PINPOS('E', 10) +#define PIN_ADC1 PINPOS('E', 13) +#define PIN_ADC2 PINPOS('E', 14) +#define PIN_ADC3 PINPOS('E', 15) +#define PIN_AUD_AOUTL PINPOS('F', 2) +#define PIN_IIC3_SDA PINPOS('F', 4) +#define PIN_SD1_D2 PINPOS('F', 14) +#define PIN_AUD_AOUTR PINPOS('G', 2) +#define PIN_SD1_D3 PINPOS('G', 13) +#define PIN_SD1_CLK PINPOS('G', 14) +#define PIN_SD1_CMD PINPOS('G', 15) +#define PIN_AUD_AINL_MIC PINPOS('H', 1) +#define PIN_RSTN PINPOS('H', 12) +#define PIN_PWM0_BUCK PINPOS('H', 13) +#define PIN_SD1_D1 PINPOS('H', 14) +#define PIN_SD1_D0 PINPOS('H', 15) +#define PIN_AUD_AINR_MIC PINPOS('J', 1) +#define PIN_IIC2_SCL PINPOS('J', 13) +#define PIN_IIC2_SDA PINPOS('J', 14) +#define PIN_SD0_CD PINPOS('K', 2) +#define PIN_SD0_D1 PINPOS('K', 3) +#define PIN_UART2_RX PINPOS('K', 13) +#define PIN_UART2_CTS PINPOS('K', 14) +#define PIN_UART2_TX PINPOS('K', 15) +#define PIN_SD0_CLK PINPOS('L', 1) +#define PIN_SD0_D0 PINPOS('L', 2) +#define PIN_SD0_CMD PINPOS('L', 3) +#define PIN_CLK32K PINPOS('L', 14) +#define PIN_UART2_RTS PINPOS('L', 15) +#define PIN_SD0_D3 PINPOS('M', 1) +#define PIN_SD0_D2 PINPOS('M', 2) +#define PIN_UART0_RX PINPOS('M', 4) +#define PIN_UART0_TX PINPOS('M', 5) +#define PIN_JTAG_CPU_TRST PINPOS('M', 6) +#define PIN_PWR_ON PINPOS('M', 11) +#define PIN_PWR_GPIO2 PINPOS('M', 12) +#define PIN_PWR_GPIO0 PINPOS('M', 13) +#define PIN_CLK25M PINPOS('M', 14) +#define PIN_SD0_PWR_EN PINPOS('N', 1) +#define PIN_SPK_EN PINPOS('N', 3) +#define PIN_JTAG_CPU_TCK PINPOS('N', 4) +#define PIN_JTAG_CPU_TMS PINPOS('N', 6) +#define PIN_PWR_WAKEUP1 PINPOS('N', 11) +#define PIN_PWR_WAKEUP0 PINPOS('N', 12) +#define PIN_PWR_GPIO1 PINPOS('N', 13) +#define PIN_EMMC_DAT3 PINPOS('P', 1) +#define PIN_EMMC_DAT0 PINPOS('P', 2) +#define PIN_EMMC_DAT2 PINPOS('P', 3) +#define PIN_EMMC_RSTN PINPOS('P', 4) +#define PIN_AUX0 PINPOS('P', 5) +#define PIN_IIC0_SDA PINPOS('P', 6) +#define PIN_PWR_SEQ3 PINPOS('P', 10) +#define PIN_PWR_VBAT_DET PINPOS('P', 11) +#define PIN_PWR_SEQ1 PINPOS('P', 12) +#define PIN_PWR_BUTTON1 PINPOS('P', 13) +#define PIN_EMMC_DAT1 PINPOS('R', 2) +#define PIN_EMMC_CMD PINPOS('R', 3) +#define PIN_EMMC_CLK PINPOS('R', 4) +#define PIN_IIC0_SCL PINPOS('R', 6) +#define PIN_GPIO_ZQ PINPOS('R', 10) +#define PIN_PWR_RSTN PINPOS('R', 11) +#define PIN_PWR_SEQ2 PINPOS('R', 12) +#define PIN_XTAL_XIN PINPOS('R', 13) + +#endif /* _DT_BINDINGS_PINCTRL_CV1812H_H */ diff --git a/include/dt-bindings/pinctrl/pinctrl-cv18xx.h b/include/dt-bindings/pinctrl/pinctrl-cv18xx.h new file mode 100644 index 000000000000..bc92ad1067ec --- /dev/null +++ b/include/dt-bindings/pinctrl/pinctrl-cv18xx.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * Copyright (C) 2023 Sophgo Ltd. + * + * Author: Inochi Amaoto + */ + +#ifndef _DT_BINDINGS_PINCTRL_CV18XX_H +#define _DT_BINDINGS_PINCTRL_CV18XX_H + +#define PIN_MUX_INVALD 0xff + +#define PINMUX2(pin, mux, mux2) \ + (((pin) & 0xffff) | (((mux) & 0xff) << 16) | (((mux2) & 0xff) << 24)) + +#define PINMUX(pin, mux) \ + PINMUX2(pin, mux, PIN_MUX_INVALD) + +#endif /* _DT_BINDINGS_PINCTRL_CV18XX_H */ diff --git a/include/dt-bindings/pinctrl/pinctrl-sg2000.h b/include/dt-bindings/pinctrl/pinctrl-sg2000.h new file mode 100644 index 000000000000..4871f9a7c6c1 --- /dev/null +++ b/include/dt-bindings/pinctrl/pinctrl-sg2000.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * Copyright (C) 2024 Inochi Amaoto + * + * This file is generated from vendor pinout definition. + */ + +#ifndef _DT_BINDINGS_PINCTRL_SG2000_H +#define _DT_BINDINGS_PINCTRL_SG2000_H + +#include + +#define PINPOS(row, col) \ + ((((row) - 'A' + 1) << 8) + ((col) - 1)) + +#define PIN_MIPI_TXM4 PINPOS('A', 2) +#define PIN_MIPIRX0N PINPOS('A', 4) +#define PIN_MIPIRX3P PINPOS('A', 6) +#define PIN_MIPIRX4P PINPOS('A', 7) +#define PIN_VIVO_D2 PINPOS('A', 9) +#define PIN_VIVO_D3 PINPOS('A', 10) +#define PIN_VIVO_D10 PINPOS('A', 12) +#define PIN_USB_VBUS_DET PINPOS('A', 13) +#define PIN_MIPI_TXP3 PINPOS('B', 1) +#define PIN_MIPI_TXM3 PINPOS('B', 2) +#define PIN_MIPI_TXP4 PINPOS('B', 3) +#define PIN_MIPIRX0P PINPOS('B', 4) +#define PIN_MIPIRX1N PINPOS('B', 5) +#define PIN_MIPIRX2N PINPOS('B', 6) +#define PIN_MIPIRX4N PINPOS('B', 7) +#define PIN_MIPIRX5N PINPOS('B', 8) +#define PIN_VIVO_D1 PINPOS('B', 9) +#define PIN_VIVO_D5 PINPOS('B', 10) +#define PIN_VIVO_D7 PINPOS('B', 11) +#define PIN_VIVO_D9 PINPOS('B', 12) +#define PIN_USB_ID PINPOS('B', 13) +#define PIN_ETH_RXM PINPOS('B', 15) +#define PIN_MIPI_TXP2 PINPOS('C', 1) +#define PIN_MIPI_TXM2 PINPOS('C', 2) +#define PIN_CAM_PD0 PINPOS('C', 3) +#define PIN_CAM_MCLK0 PINPOS('C', 4) +#define PIN_MIPIRX1P PINPOS('C', 5) +#define PIN_MIPIRX2P PINPOS('C', 6) +#define PIN_MIPIRX3N PINPOS('C', 7) +#define PIN_MIPIRX5P PINPOS('C', 8) +#define PIN_VIVO_CLK PINPOS('C', 9) +#define PIN_VIVO_D6 PINPOS('C', 10) +#define PIN_VIVO_D8 PINPOS('C', 11) +#define PIN_USB_VBUS_EN PINPOS('C', 12) +#define PIN_ETH_RXP PINPOS('C', 14) +#define PIN_GPIO_RTX PINPOS('C', 15) +#define PIN_MIPI_TXP1 PINPOS('D', 1) +#define PIN_MIPI_TXM1 PINPOS('D', 2) +#define PIN_CAM_MCLK1 PINPOS('D', 3) +#define PIN_IIC3_SCL PINPOS('D', 4) +#define PIN_VIVO_D4 PINPOS('D', 10) +#define PIN_ETH_TXM PINPOS('D', 14) +#define PIN_ETH_TXP PINPOS('D', 15) +#define PIN_MIPI_TXP0 PINPOS('E', 1) +#define PIN_MIPI_TXM0 PINPOS('E', 2) +#define PIN_CAM_PD1 PINPOS('E', 4) +#define PIN_CAM_RST0 PINPOS('E', 5) +#define PIN_VIVO_D0 PINPOS('E', 10) +#define PIN_ADC1 PINPOS('E', 13) +#define PIN_ADC2 PINPOS('E', 14) +#define PIN_ADC3 PINPOS('E', 15) +#define PIN_AUD_AOUTL PINPOS('F', 2) +#define PIN_IIC3_SDA PINPOS('F', 4) +#define PIN_SD1_D2 PINPOS('F', 14) +#define PIN_AUD_AOUTR PINPOS('G', 2) +#define PIN_SD1_D3 PINPOS('G', 13) +#define PIN_SD1_CLK PINPOS('G', 14) +#define PIN_SD1_CMD PINPOS('G', 15) +#define PIN_AUD_AINL_MIC PINPOS('H', 1) +#define PIN_RSTN PINPOS('H', 12) +#define PIN_PWM0_BUCK PINPOS('H', 13) +#define PIN_SD1_D1 PINPOS('H', 14) +#define PIN_SD1_D0 PINPOS('H', 15) +#define PIN_AUD_AINR_MIC PINPOS('J', 1) +#define PIN_IIC2_SCL PINPOS('J', 13) +#define PIN_IIC2_SDA PINPOS('J', 14) +#define PIN_SD0_CD PINPOS('K', 2) +#define PIN_SD0_D1 PINPOS('K', 3) +#define PIN_UART2_RX PINPOS('K', 13) +#define PIN_UART2_CTS PINPOS('K', 14) +#define PIN_UART2_TX PINPOS('K', 15) +#define PIN_SD0_CLK PINPOS('L', 1) +#define PIN_SD0_D0 PINPOS('L', 2) +#define PIN_SD0_CMD PINPOS('L', 3) +#define PIN_CLK32K PINPOS('L', 14) +#define PIN_UART2_RTS PINPOS('L', 15) +#define PIN_SD0_D3 PINPOS('M', 1) +#define PIN_SD0_D2 PINPOS('M', 2) +#define PIN_UART0_RX PINPOS('M', 4) +#define PIN_UART0_TX PINPOS('M', 5) +#define PIN_JTAG_CPU_TRST PINPOS('M', 6) +#define PIN_PWR_ON PINPOS('M', 11) +#define PIN_PWR_GPIO2 PINPOS('M', 12) +#define PIN_PWR_GPIO0 PINPOS('M', 13) +#define PIN_CLK25M PINPOS('M', 14) +#define PIN_SD0_PWR_EN PINPOS('N', 1) +#define PIN_SPK_EN PINPOS('N', 3) +#define PIN_JTAG_CPU_TCK PINPOS('N', 4) +#define PIN_JTAG_CPU_TMS PINPOS('N', 6) +#define PIN_PWR_WAKEUP1 PINPOS('N', 11) +#define PIN_PWR_WAKEUP0 PINPOS('N', 12) +#define PIN_PWR_GPIO1 PINPOS('N', 13) +#define PIN_EMMC_DAT3 PINPOS('P', 1) +#define PIN_EMMC_DAT0 PINPOS('P', 2) +#define PIN_EMMC_DAT2 PINPOS('P', 3) +#define PIN_EMMC_RSTN PINPOS('P', 4) +#define PIN_AUX0 PINPOS('P', 5) +#define PIN_IIC0_SDA PINPOS('P', 6) +#define PIN_PWR_SEQ3 PINPOS('P', 10) +#define PIN_PWR_VBAT_DET PINPOS('P', 11) +#define PIN_PWR_SEQ1 PINPOS('P', 12) +#define PIN_PWR_BUTTON1 PINPOS('P', 13) +#define PIN_EMMC_DAT1 PINPOS('R', 2) +#define PIN_EMMC_CMD PINPOS('R', 3) +#define PIN_EMMC_CLK PINPOS('R', 4) +#define PIN_IIC0_SCL PINPOS('R', 6) +#define PIN_GPIO_ZQ PINPOS('R', 10) +#define PIN_PWR_RSTN PINPOS('R', 11) +#define PIN_PWR_SEQ2 PINPOS('R', 12) +#define PIN_XTAL_XIN PINPOS('R', 13) + +#endif /* _DT_BINDINGS_PINCTRL_SG2000_H */ diff --git a/include/dt-bindings/pinctrl/pinctrl-sg2002.h b/include/dt-bindings/pinctrl/pinctrl-sg2002.h new file mode 100644 index 000000000000..3c36cfa0a550 --- /dev/null +++ b/include/dt-bindings/pinctrl/pinctrl-sg2002.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * Copyright (C) 2024 Inochi Amaoto + * + * This file is generated from vendor pinout definition. + */ + +#ifndef _DT_BINDINGS_PINCTRL_SG2002_H +#define _DT_BINDINGS_PINCTRL_SG2002_H + +#include + +#define PIN_AUD_AINL_MIC 2 +#define PIN_AUD_AOUTR 4 +#define PIN_SD0_CLK 6 +#define PIN_SD0_CMD 7 +#define PIN_SD0_D0 8 +#define PIN_SD0_D1 10 +#define PIN_SD0_D2 11 +#define PIN_SD0_D3 12 +#define PIN_SD0_CD 14 +#define PIN_SD0_PWR_EN 15 +#define PIN_SPK_EN 17 +#define PIN_UART0_TX 18 +#define PIN_UART0_RX 19 +#define PIN_EMMC_DAT2 20 +#define PIN_EMMC_CLK 21 +#define PIN_EMMC_DAT0 22 +#define PIN_EMMC_DAT3 23 +#define PIN_EMMC_CMD 24 +#define PIN_EMMC_DAT1 25 +#define PIN_JTAG_CPU_TMS 26 +#define PIN_JTAG_CPU_TCK 27 +#define PIN_IIC0_SCL 28 +#define PIN_IIC0_SDA 29 +#define PIN_AUX0 30 +#define PIN_GPIO_ZQ 35 +#define PIN_PWR_VBAT_DET 38 +#define PIN_PWR_RSTN 39 +#define PIN_PWR_SEQ1 40 +#define PIN_PWR_SEQ2 41 +#define PIN_PWR_WAKEUP0 43 +#define PIN_PWR_BUTTON1 44 +#define PIN_XTAL_XIN 45 +#define PIN_PWR_GPIO0 47 +#define PIN_PWR_GPIO1 48 +#define PIN_PWR_GPIO2 49 +#define PIN_SD1_D3 51 +#define PIN_SD1_D2 52 +#define PIN_SD1_D1 53 +#define PIN_SD1_D0 54 +#define PIN_SD1_CMD 55 +#define PIN_SD1_CLK 56 +#define PIN_PWM0_BUCK 58 +#define PIN_ADC1 59 +#define PIN_USB_VBUS_DET 60 +#define PIN_ETH_TXP 62 +#define PIN_ETH_TXM 63 +#define PIN_ETH_RXP 64 +#define PIN_ETH_RXM 65 +#define PIN_GPIO_RTX 67 +#define PIN_MIPIRX4N 72 +#define PIN_MIPIRX4P 73 +#define PIN_MIPIRX3N 74 +#define PIN_MIPIRX3P 75 +#define PIN_MIPIRX2N 76 +#define PIN_MIPIRX2P 77 +#define PIN_MIPIRX1N 78 +#define PIN_MIPIRX1P 79 +#define PIN_MIPIRX0N 80 +#define PIN_MIPIRX0P 81 +#define PIN_MIPI_TXM2 83 +#define PIN_MIPI_TXP2 84 +#define PIN_MIPI_TXM1 85 +#define PIN_MIPI_TXP1 86 +#define PIN_MIPI_TXM0 87 +#define PIN_MIPI_TXP0 88 + +#endif /* _DT_BINDINGS_PINCTRL_SG2002_H */ -- cgit v1.2.3 From 5fd0628918977a0afdc2e6bc562d8751b5d3b8c5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 26 Aug 2024 12:45:22 +0200 Subject: netfilter: nf_tables: restore IP sanity checks for netdev/egress Subtract network offset to skb->len before performing IPv4 header sanity checks, then adjust transport offset from offset from mac header. Jorge Ortiz says: When small UDP packets (< 4 bytes payload) are sent from eth0, `meta l4proto udp` condition is not met because `NFT_PKTINFO_L4PROTO` is not set. This happens because there is a comparison that checks if the transport header offset exceeds the total length. This comparison does not take into account the fact that the skb network offset might be non-zero in egress mode (e.g., 14 bytes for Ethernet header). Fixes: 0ae8e4cca787 ("netfilter: nf_tables: set transport offset from mac header for netdev/egress") Reported-by: Jorge Ortiz Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv4.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index 60a7d0ce3080..fcf967286e37 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -19,7 +19,7 @@ static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt) static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) { struct iphdr *iph, _iph; - u32 len, thoff; + u32 len, thoff, skb_len; iph = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb), sizeof(*iph), &_iph); @@ -30,8 +30,10 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) return -1; len = iph_totlen(pkt->skb, iph); - thoff = skb_network_offset(pkt->skb) + (iph->ihl * 4); - if (pkt->skb->len < len) + thoff = iph->ihl * 4; + skb_len = pkt->skb->len - skb_network_offset(pkt->skb); + + if (skb_len < len) return -1; else if (len < thoff) return -1; @@ -40,7 +42,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = iph->protocol; - pkt->thoff = thoff; + pkt->thoff = skb_network_offset(pkt->skb) + thoff; pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; return 0; -- cgit v1.2.3 From 1e9046e3a154608f63ce79edcb01e6afd6b10c7c Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Wed, 14 Aug 2024 17:35:55 +0200 Subject: rpmb: add Replay Protected Memory Block (RPMB) subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A number of storage technologies support a specialised hardware partition designed to be resistant to replay attacks. The underlying HW protocols differ but the operations are common. The RPMB partition cannot be accessed via standard block layer, but by a set of specific RPMB commands. Such a partition provides authenticated and replay protected access, hence suitable as a secure storage. The initial aim of this patch is to provide a simple RPMB driver interface which can be accessed by the optee driver to facilitate early RPMB access to OP-TEE OS (secure OS) during the boot time. A TEE device driver can claim the RPMB interface, for example, via rpmb_interface_register() or rpmb_dev_find_device(). The RPMB driver provides a callback to route RPMB frames to the RPMB device accessible via rpmb_route_frames(). The detailed operation of implementing the access is left to the TEE device driver itself. Signed-off-by: Tomas Winkler Signed-off-by: Alex Bennée Signed-off-by: Shyam Saini Signed-off-by: Jens Wiklander Reviewed-by: Linus Walleij Tested-by: Manuel Traut Reviewed-by: Ulf Hansson Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240814153558.708365-2-jens.wiklander@linaro.org Signed-off-by: Ulf Hansson --- include/linux/rpmb.h | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 include/linux/rpmb.h (limited to 'include') diff --git a/include/linux/rpmb.h b/include/linux/rpmb.h new file mode 100644 index 000000000000..cccda73eea4d --- /dev/null +++ b/include/linux/rpmb.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Intel Corp. All rights reserved + * Copyright (C) 2021-2022 Linaro Ltd + */ +#ifndef __RPMB_H__ +#define __RPMB_H__ + +#include +#include + +/** + * enum rpmb_type - type of underlying storage technology + * + * @RPMB_TYPE_EMMC : emmc (JESD84-B50.1) + * @RPMB_TYPE_UFS : UFS (JESD220) + * @RPMB_TYPE_NVME : NVM Express + */ +enum rpmb_type { + RPMB_TYPE_EMMC, + RPMB_TYPE_UFS, + RPMB_TYPE_NVME, +}; + +/** + * struct rpmb_descr - RPMB description provided by the underlying block device + * + * @type : block device type + * @route_frames : routes frames to and from the RPMB device + * @dev_id : unique device identifier read from the hardware + * @dev_id_len : length of unique device identifier + * @reliable_wr_count: number of sectors that can be written in one access + * @capacity : capacity of the device in units of 128K + * + * @dev_id is intended to be used as input when deriving the authenticaion key. + */ +struct rpmb_descr { + enum rpmb_type type; + int (*route_frames)(struct device *dev, u8 *req, unsigned int req_len, + u8 *resp, unsigned int resp_len); + u8 *dev_id; + size_t dev_id_len; + u16 reliable_wr_count; + u16 capacity; +}; + +/** + * struct rpmb_dev - device which can support RPMB partition + * + * @dev : device + * @id : device_id + * @list_node : linked list node + * @descr : RPMB description + */ +struct rpmb_dev { + struct device dev; + int id; + struct list_head list_node; + struct rpmb_descr descr; +}; + +#define to_rpmb_dev(x) container_of((x), struct rpmb_dev, dev) + +#if IS_ENABLED(CONFIG_RPMB) +struct rpmb_dev *rpmb_dev_get(struct rpmb_dev *rdev); +void rpmb_dev_put(struct rpmb_dev *rdev); +struct rpmb_dev *rpmb_dev_find_device(const void *data, + const struct rpmb_dev *start, + int (*match)(struct device *dev, + const void *data)); +int rpmb_interface_register(struct class_interface *intf); +void rpmb_interface_unregister(struct class_interface *intf); +struct rpmb_dev *rpmb_dev_register(struct device *dev, + struct rpmb_descr *descr); +int rpmb_dev_unregister(struct rpmb_dev *rdev); + +int rpmb_route_frames(struct rpmb_dev *rdev, u8 *req, + unsigned int req_len, u8 *resp, unsigned int resp_len); + +#else +static inline struct rpmb_dev *rpmb_dev_get(struct rpmb_dev *rdev) +{ + return NULL; +} + +static inline void rpmb_dev_put(struct rpmb_dev *rdev) { } + +static inline struct rpmb_dev * +rpmb_dev_find_device(const void *data, const struct rpmb_dev *start, + int (*match)(struct device *dev, const void *data)) +{ + return NULL; +} + +static inline int rpmb_interface_register(struct class_interface *intf) +{ + return -EOPNOTSUPP; +} + +static inline void rpmb_interface_unregister(struct class_interface *intf) +{ +} + +static inline struct rpmb_dev * +rpmb_dev_register(struct device *dev, struct rpmb_descr *descr) +{ + return NULL; +} + +static inline int rpmb_dev_unregister(struct rpmb_dev *dev) +{ + return 0; +} + +static inline int rpmb_route_frames(struct rpmb_dev *rdev, u8 *req, + unsigned int req_len, u8 *resp, + unsigned int resp_len) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_RPMB */ + +#endif /* __RPMB_H__ */ -- cgit v1.2.3 From c30b855e814d9094e369a19fbd86c9bb5badc154 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Wed, 14 Aug 2024 17:35:57 +0200 Subject: tee: add tee_device_set_dev_groups() Add tee_device_set_dev_groups() to TEE drivers to supply driver specific attribute groups. The class specific attributes are from now on added via the tee_class, which currently only consist of implementation_id. Signed-off-by: Jens Wiklander Reviewed-by: Sumit Garg Link: https://lore.kernel.org/r/20240814153558.708365-4-jens.wiklander@linaro.org Signed-off-by: Ulf Hansson --- include/linux/tee_core.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h index efd16ed52315..a38494d6b5f4 100644 --- a/include/linux/tee_core.h +++ b/include/linux/tee_core.h @@ -154,6 +154,18 @@ int tee_device_register(struct tee_device *teedev); */ void tee_device_unregister(struct tee_device *teedev); +/** + * tee_device_set_dev_groups() - Set device attribute groups + * @teedev: Device to register + * @dev_groups: Attribute groups + * + * Assigns the provided @dev_groups to the @teedev to be registered later + * with tee_device_register(). Calling this function is optional, but if + * it's called it must be called before tee_device_register(). + */ +void tee_device_set_dev_groups(struct tee_device *teedev, + const struct attribute_group **dev_groups); + /** * tee_session_calc_client_uuid() - Calculates client UUID for session * @uuid: Resulting UUID -- cgit v1.2.3 From d3596c73011d1b0e9b43ae12f1e0f491d6bb96eb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 21 Aug 2024 15:24:06 +0200 Subject: mmc: core: Remove struct mmc_context_info The 'mmc_context_info' structure is unused. It has been introduced in: - commit 2220eedfd7ae ("mmc: fix async request mechanism for sequential read scenarios") in 2013-02 and its usages have been removed in: - commit 126b62700386 ("mmc: core: Remove code no longer needed after the switch to blk-mq") - commit 0fbfd1251830 ("mmc: block: Remove code no longer needed after the switch to blk-mq") in 2017-12. Now remove this unused structure. Signed-off-by: Christophe JAILLET Reviewed-by: Vladimir Zapolskiy Link: https://lore.kernel.org/r/232106a8a6a374dee25feea9b94498361568c10b.1724246389.git.christophe.jaillet@wanadoo.fr Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 88c6a76042ee..f85df7d045ca 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -291,20 +291,6 @@ struct mmc_slot { void *handler_priv; }; -/** - * mmc_context_info - synchronization details for mmc context - * @is_done_rcv wake up reason was done request - * @is_new_req wake up reason was new request - * @is_waiting_last_req mmc context waiting for single running request - * @wait wait queue - */ -struct mmc_context_info { - bool is_done_rcv; - bool is_new_req; - bool is_waiting_last_req; - wait_queue_head_t wait; -}; - struct regulator; struct mmc_pwrseq; -- cgit v1.2.3 From f5e1638bf3c785600e2ab53e5532007af5296581 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Sat, 24 Aug 2024 01:59:17 +0300 Subject: mmc: core: remove left-over data structure declarations The last users of 'enum mmc_blk_status' and 'struct mmc_async_req' were removed by commit 126b62700386 ("mmc: core: Remove code no longer needed after the switch to blk-mq") in 2017, remove these two left-over data structures. Signed-off-by: Vladimir Zapolskiy Link: https://lore.kernel.org/r/20240823225917.2826156-1-vladimir.zapolskiy@linaro.org Signed-off-by: Ulf Hansson --- include/linux/mmc/core.h | 12 ------------ include/linux/mmc/host.h | 10 ---------- 2 files changed, 22 deletions(-) (limited to 'include') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 2c7928a50907..f0ac2e469b32 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -11,18 +11,6 @@ struct mmc_data; struct mmc_request; -enum mmc_blk_status { - MMC_BLK_SUCCESS = 0, - MMC_BLK_PARTIAL, - MMC_BLK_CMD_ERR, - MMC_BLK_RETRY, - MMC_BLK_ABORT, - MMC_BLK_DATA_ERR, - MMC_BLK_ECC_ERR, - MMC_BLK_NOMEDIUM, - MMC_BLK_NEW_REQUEST, -}; - struct mmc_command { u32 opcode; u32 arg; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f85df7d045ca..ac01cd1622ef 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -264,16 +264,6 @@ struct mmc_cqe_ops { void (*cqe_recovery_finish)(struct mmc_host *host); }; -struct mmc_async_req { - /* active mmc request */ - struct mmc_request *mrq; - /* - * Check error status of completed mmc request. - * Returns 0 if success otherwise non zero. - */ - enum mmc_blk_status (*err_check)(struct mmc_card *, struct mmc_async_req *); -}; - /** * struct mmc_slot - MMC slot functions * -- cgit v1.2.3 From f2c6dbd220170c2396fb019ead67fbada1e23ebd Mon Sep 17 00:00:00 2001 From: David Gow Date: Fri, 16 Aug 2024 12:51:22 +0800 Subject: kunit: Device wrappers should also manage driver name kunit_driver_create() accepts a name for the driver, but does not copy it, so if that name is either on the stack, or otherwise freed, we end up with a use-after-free when the driver is cleaned up. Instead, strdup() the name, and manage it as another KUnit allocation. As there was no existing kunit_kstrdup(), we add one. Further, add a kunit_ variant of strdup_const() and kfree_const(), so we don't need to allocate and manage the string in the majority of cases where it's a constant. However, these are inline functions, and is_kernel_rodata() only works for built-in code. This causes problems in two cases: - If kunit is built as a module, __{start,end}_rodata is not defined. - If a kunit test using these functions is built as a module, it will suffer the same fate. This fixes a KASAN splat with overflow.overflow_allocation_test, when built as a module. Restrict the is_kernel_rodata() case to when KUnit is built as a module, which fixes the first case, at the cost of losing the optimisation. Also, make kunit_{kstrdup,kfree}_const non-inline, so that other modules using them will not accidentally depend on is_kernel_rodata(). If KUnit is built-in, they'll benefit from the optimisation, if KUnit is not, they won't, but the string will be properly duplicated. Fixes: d03c720e03bd ("kunit: Add APIs for managing devices") Reported-by: Nico Pache Closes: https://groups.google.com/g/kunit-dev/c/81V9b9QYON0 Reviewed-by: Kees Cook Reviewed-by: Maxime Ripard Reviewed-by: Rae Moar Signed-off-by: David Gow Tested-by: Rae Moar Signed-off-by: Shuah Khan --- include/kunit/test.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index e2a1f0928e8b..5ac237c949a0 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -28,6 +28,7 @@ #include #include +#include /* Static key: true if any KUnit tests are currently running */ DECLARE_STATIC_KEY_FALSE(kunit_running); @@ -480,6 +481,53 @@ static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp return kunit_kmalloc_array(test, n, size, gfp | __GFP_ZERO); } + +/** + * kunit_kfree_const() - conditionally free test managed memory + * @x: pointer to the memory + * + * Calls kunit_kfree() only if @x is not in .rodata section. + * See kunit_kstrdup_const() for more information. + */ +void kunit_kfree_const(struct kunit *test, const void *x); + +/** + * kunit_kstrdup() - Duplicates a string into a test managed allocation. + * + * @test: The test context object. + * @str: The NULL-terminated string to duplicate. + * @gfp: flags passed to underlying kmalloc(). + * + * See kstrdup() and kunit_kmalloc_array() for more information. + */ +static inline char *kunit_kstrdup(struct kunit *test, const char *str, gfp_t gfp) +{ + size_t len; + char *buf; + + if (!str) + return NULL; + + len = strlen(str) + 1; + buf = kunit_kmalloc(test, len, gfp); + if (buf) + memcpy(buf, str, len); + return buf; +} + +/** + * kunit_kstrdup_const() - Conditionally duplicates a string into a test managed allocation. + * + * @test: The test context object. + * @str: The NULL-terminated string to duplicate. + * @gfp: flags passed to underlying kmalloc(). + * + * Calls kunit_kstrdup() only if @str is not in the rodata section. Must be freed with + * kunit_kfree_const() -- not kunit_kfree(). + * See kstrdup_const() and kunit_kmalloc_array() for more information. + */ +const char *kunit_kstrdup_const(struct kunit *test, const char *str, gfp_t gfp); + /** * kunit_vm_mmap() - Allocate KUnit-tracked vm_mmap() area * @test: The test context object. -- cgit v1.2.3 From 8d3cefaf659265aa82b0373a563fdb9d16a2b947 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 15 Aug 2024 21:44:50 +0200 Subject: i2c: core: Lock address during client device instantiation Krzysztof reported an issue [0] which is caused by parallel attempts to instantiate the same I2C client device. This can happen if driver supports auto-detection, but certain devices are also instantiated explicitly. The original change isn't actually wrong, it just revealed that I2C core isn't prepared yet to handle this scenario. Calls to i2c_new_client_device() can be nested, therefore we can't use a simple mutex here. Parallel instantiation of devices at different addresses is ok, so we just have to prevent parallel instantiation at the same address. We can use a bitmap with one bit per 7-bit I2C client address, and atomic bit operations to set/check/clear bits. Now a parallel attempt to instantiate a device at the same address will result in -EBUSY being returned, avoiding the "sysfs: cannot create duplicate filename" splash. Note: This patch version includes small cosmetic changes to the Tested-by version, only functional change is that address locking is supported for slave addresses too. [0] https://lore.kernel.org/linux-i2c/9479fe4e-eb0c-407e-84c0-bd60c15baf74@ans.pl/T/#m12706546e8e2414d8f1a0dc61c53393f731685cc Fixes: caba40ec3531 ("eeprom: at24: Probe for DDR3 thermal sensor in the SPD case") Cc: stable@vger.kernel.org Tested-by: Krzysztof Piotr Oledzki Signed-off-by: Heiner Kallweit Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 7eedd0c662da..8d79440cd8ce 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -761,6 +761,9 @@ struct i2c_adapter { struct regulator *bus_regulator; struct dentry *debugfs; + + /* 7bit address space */ + DECLARE_BITMAP(addrs_in_instantiation, 1 << 7); }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) -- cgit v1.2.3 From af8a6e65f42c6c89414017cc4d78403e83056172 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 3 May 2024 12:10:52 +0200 Subject: USB: serial: set driver owner when registering drivers Modules registering drivers with usb_serial_register_drivers() might forget to set .owner field. The field is used by some of other kernel parts for reference counting (try_module_get()), so it is expected that drivers will set it. Solve the problem by moving this task away from the drivers to the core USB serial code, just like we did for platform_driver in commit 9447057eaff8 ("platform_device: use a macro instead of platform_driver_register"). Signed-off-by: Krzysztof Kozlowski [ johan: amend commit summary ] Signed-off-by: Johan Hovold --- include/linux/usb/serial.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 1a0a4dc87980..75b2b763f1ba 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -311,8 +311,11 @@ struct usb_serial_driver { #define to_usb_serial_driver(d) \ container_of(d, struct usb_serial_driver, driver) -int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[], - const char *name, const struct usb_device_id *id_table); +#define usb_serial_register_drivers(serial_drivers, name, id_table) \ + __usb_serial_register_drivers(serial_drivers, THIS_MODULE, name, id_table) +int __usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[], + struct module *owner, const char *name, + const struct usb_device_id *id_table); void usb_serial_deregister_drivers(struct usb_serial_driver *const serial_drivers[]); void usb_serial_port_softint(struct usb_serial_port *port); -- cgit v1.2.3 From 5fd3e2412ade67ea20d855f0aea821c650d27559 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 19 Jun 2024 03:02:42 +0300 Subject: media: v4l2-subdev: Support hybrid links in v4l2_subdev_link_validate() The v4l2_subdev_link_validate() helper function is meant to be used as a drop-in implementation of a V4L2 subdev entity .link_validate() handler. It supports subdev-to-subdev links only, and complains if one end of the link is not a subdev. This forces drivers that have video output devices connected to subdevs to implement a custom .link_validate() handler, calling v4l2_subdev_link_validate() for the subdev-to-subdev links, and performing manual link validation for the video-to-subdev links. Video devices embed a media entity, and therefore also have a .link_validate() operation. For video capture devices, the operation should be manually implemented by drivers for validate the subdev-to-video links. For video output devices, on the other hand, that operation is never called, as link validation is performed in the context of the sink entity. As a result, we end up forcing drivers to implement a custom .link_validate() handler for subdevs connected to video output devices, when the video devices provide an operation that could be used for that purpose. To improve that situation, make v4l2_subdev_link_validate() delegate link validation to the source's .link_validate() operation when the link source is a video device and the link sink is a subdev. This allows broader usage of v4l2_subdev_link_validate(), and simplifies drivers by making video device link validation easy to implement in the video device .link_validate(), regardless of whether the video device is an output device or a capture device. Signed-off-by: Laurent Pinchart Reviewed-by: Tomi Valkeinen Acked-by: Sakari Ailus --- include/media/v4l2-subdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index bd235d325ff9..8daa0929865c 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1250,6 +1250,12 @@ int v4l2_subdev_link_validate_default(struct v4l2_subdev *sd, * calls v4l2_subdev_link_validate_default() to ensure that * width, height and the media bus pixel code are equal on both * source and sink of the link. + * + * The function can be used as a drop-in &media_entity_ops.link_validate + * implementation for v4l2_subdev instances. It supports all links between + * subdevs, as well as links between subdevs and video devices, provided that + * the video devices also implement their &media_entity_ops.link_validate + * operation. */ int v4l2_subdev_link_validate(struct media_link *link); -- cgit v1.2.3 From 67733d7a71503fd3e32eeada371f8aa2516c5c95 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 1 Aug 2024 20:10:51 -0700 Subject: drm/i915: ARL requires a newer GSC firmware ARL and MTL share a single GSC firmware blob. However, ARL requires a newer version of it. So add differentiate of the PCI ids for ARL from MTL and create ARL as a sub-platform of MTL. That way, all the existing workarounds and such still treat ARL as MTL exactly as before. However, now the GSC code can check for ARL and do an extra version check on the firmware before committing to it. Also, the version extraction code has various ways of failing but the return code was being ignore and so the firmware load would attempt to continue anyway. Fix that by propagating the return code to the next level out. Signed-off-by: John Harrison Fixes: 213c43676beb ("drm/i915/mtl: Remove the 'force_probe' requirement for Meteor Lake") Reviewed-by: Daniele Ceraolo Spurio Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240802031051.3816392-1-John.C.Harrison@Intel.com Signed-off-by: Rodrigo Vivi --- include/drm/intel/i915_pciids.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h index b21374f76df2..2bf03ebfcf73 100644 --- a/include/drm/intel/i915_pciids.h +++ b/include/drm/intel/i915_pciids.h @@ -772,15 +772,18 @@ INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) /* MTL */ +#define INTEL_ARL_IDS(MACRO__, ...) \ + MACRO__(0x7D41, ## __VA_ARGS__), \ + MACRO__(0x7D51, ## __VA_ARGS__), \ + MACRO__(0x7D67, ## __VA_ARGS__), \ + MACRO__(0x7DD1, ## __VA_ARGS__) + #define INTEL_MTL_IDS(MACRO__, ...) \ + INTEL_ARL_IDS(MACRO__, ## __VA_ARGS__), \ MACRO__(0x7D40, ## __VA_ARGS__), \ - MACRO__(0x7D41, ## __VA_ARGS__), \ MACRO__(0x7D45, ## __VA_ARGS__), \ - MACRO__(0x7D51, ## __VA_ARGS__), \ MACRO__(0x7D55, ## __VA_ARGS__), \ MACRO__(0x7D60, ## __VA_ARGS__), \ - MACRO__(0x7D67, ## __VA_ARGS__), \ - MACRO__(0x7DD1, ## __VA_ARGS__), \ MACRO__(0x7DD5, ## __VA_ARGS__) /* LNL */ -- cgit v1.2.3 From cf4d37b8157ca085c17fdc1faad737465ff311b9 Mon Sep 17 00:00:00 2001 From: renjun wang Date: Sat, 24 Aug 2024 16:20:14 +0800 Subject: drm/atomic: fix kerneldoc for fake_commit field According to the context, the function description for fake_commit should be "prevent the atomic states from being freed too early" Signed-off-by: renjun wang Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/tencent_6EF2603DCCFAD6A8265F8AAD9D6D5BCB9309@qq.com --- include/drm/drm_atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index 4d7f4c5f2001..31ca88deb10d 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -460,7 +460,7 @@ struct drm_atomic_state { * * Used for signaling unbound planes/connectors. * When a connector or plane is not bound to any CRTC, it's still important - * to preserve linearity to prevent the atomic states from being freed to early. + * to preserve linearity to prevent the atomic states from being freed too early. * * This commit (if set) is not bound to any CRTC, but will be completed when * drm_atomic_helper_commit_hw_done() is called. -- cgit v1.2.3 From 1d4684fbe88dc28e2bf79f5e94a432f0469d2dac Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 2 Aug 2024 17:32:02 -0700 Subject: iommufd: Reorder include files Reorder include files to alphabetic order to simplify maintenance, and separate local headers and global headers with a blank line. No functional change intended. Link: https://patch.msgid.link/r/7524b037cc05afe19db3c18f863253e1d1554fa2.1722644866.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 4 ++-- include/uapi/linux/iommufd.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index ffc3a949f837..c2f2f6b9148e 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -6,9 +6,9 @@ #ifndef __LINUX_IOMMUFD_H #define __LINUX_IOMMUFD_H -#include -#include #include +#include +#include struct device; struct iommufd_device; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 4dde745cfb7e..72010f71c5e4 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -4,8 +4,8 @@ #ifndef _UAPI_IOMMUFD_H #define _UAPI_IOMMUFD_H -#include #include +#include #define IOMMUFD_TYPE (';') -- cgit v1.2.3 From 45c4d994b82b08f0ce5eb50f8da29379c92a391e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Fri, 23 Aug 2024 18:30:47 +0200 Subject: accel: Use XArray instead of IDR for minors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accel minor management is based on DRM (and is also using struct drm_minor internally), since DRM is using XArray for minors, it makes sense to also convert accel. As the two implementations are identical (only difference being the underlying xarray), move the accel_minor_* functionality to DRM. Signed-off-by: Michał Winiarski Acked-by: James Zhu Acked-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240823163048.2676257-3-michal.winiarski@intel.com Signed-off-by: Christian König --- include/drm/drm_accel.h | 18 ++---------------- include/drm/drm_file.h | 5 +++++ 2 files changed, 7 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/drm/drm_accel.h b/include/drm/drm_accel.h index f4d3784b1dce..8867ce0be94c 100644 --- a/include/drm/drm_accel.h +++ b/include/drm/drm_accel.h @@ -51,11 +51,10 @@ #if IS_ENABLED(CONFIG_DRM_ACCEL) +extern struct xarray accel_minors_xa; + void accel_core_exit(void); int accel_core_init(void); -void accel_minor_remove(int index); -int accel_minor_alloc(void); -void accel_minor_replace(struct drm_minor *minor, int index); void accel_set_device_instance_params(struct device *kdev, int index); int accel_open(struct inode *inode, struct file *filp); void accel_debugfs_init(struct drm_device *dev); @@ -73,19 +72,6 @@ static inline int __init accel_core_init(void) return 0; } -static inline void accel_minor_remove(int index) -{ -} - -static inline int accel_minor_alloc(void) -{ - return -EOPNOTSUPP; -} - -static inline void accel_minor_replace(struct drm_minor *minor, int index) -{ -} - static inline void accel_set_device_instance_params(struct device *kdev, int index) { } diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index ab230d3af138..8c0030c77308 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -45,6 +45,8 @@ struct drm_printer; struct device; struct file; +extern struct xarray drm_minors_xa; + /* * FIXME: Not sure we want to have drm_minor here in the end, but to avoid * header include loops we need it here for now. @@ -434,6 +436,9 @@ static inline bool drm_is_accel_client(const struct drm_file *file_priv) void drm_file_update_pid(struct drm_file *); +struct drm_minor *drm_minor_acquire(struct xarray *minors_xa, unsigned int minor_id); +void drm_minor_release(struct drm_minor *minor); + int drm_open(struct inode *inode, struct file *filp); int drm_open_helper(struct file *filp, struct drm_minor *minor); ssize_t drm_read(struct file *filp, char __user *buffer, -- cgit v1.2.3 From 03c3d7c74371a46d967fbf41628874ec04ddda96 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 15 Aug 2024 22:11:31 +0200 Subject: nvme-rdma: send cntlid in the RDMA_CM_REQUEST Private Data When sending a RDMA_CM_REQUEST, the NVMe RDMA Transport Specification allows you to populate the cntlid field in the RDMA_CM_REQUEST Private Data. The cntlid is returned by the target on completion of the first RDMA_CM_REQUEST command (which creates the admin queue). The cntlid field can then be populated by the host when the I/O queues are created (using additional RDMA_CM_REQUEST commands), such that the target can perform extra validation for additional RDMA_CM_REQUEST commands. This additional error code and error message is also added, such that nvme_rdma_cm_msg() will display the proper error message if the target fails the RDMA_CM_REQUEST command because of this extra validation. Signed-off-by: Niklas Cassel Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme-rdma.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index eb2f04d636c8..97c5f00b9aa3 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -25,6 +25,7 @@ enum nvme_rdma_cm_status { NVME_RDMA_CM_NO_RSC = 0x06, NVME_RDMA_CM_INVALID_IRD = 0x07, NVME_RDMA_CM_INVALID_ORD = 0x08, + NVME_RDMA_CM_INVALID_CNTLID = 0x09, }; static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status) @@ -46,6 +47,8 @@ static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status) return "invalid IRD"; case NVME_RDMA_CM_INVALID_ORD: return "Invalid ORD"; + case NVME_RDMA_CM_INVALID_CNTLID: + return "invalid controller ID"; default: return "unrecognized reason"; } @@ -64,7 +67,8 @@ struct nvme_rdma_cm_req { __le16 qid; __le16 hrqsize; __le16 hsqsize; - u8 rsvd[24]; + __le16 cntlid; + u8 rsvd[22]; }; /** -- cgit v1.2.3 From abcd3026dd63417692a5e80aff70e7cd9b5c14ea Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 22 Aug 2024 14:07:01 +0200 Subject: ethtool: Extend cable testing interface with result source information Extend the ethtool netlink cable testing interface by adding support for specifying the source of cable testing results. This allows users to differentiate between results obtained through different diagnostic methods. For example, some TI 10BaseT1L PHYs provide two variants of cable diagnostics: Time Domain Reflectometry (TDR) and Active Link Cable Diagnostic (ALCD). By introducing `ETHTOOL_A_CABLE_RESULT_SRC` and `ETHTOOL_A_CABLE_FAULT_LENGTH_SRC` attributes, this update enables drivers to indicate whether the result was derived from TDR or ALCD, improving the clarity and utility of diagnostic information. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20240822120703.1393130-2-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 45d8bcdea056..283305f6b063 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -573,10 +573,20 @@ enum { ETHTOOL_A_CABLE_PAIR_D, }; +/* Information source for specific results. */ +enum { + ETHTOOL_A_CABLE_INF_SRC_UNSPEC, + /* Results provided by the Time Domain Reflectometry (TDR) */ + ETHTOOL_A_CABLE_INF_SRC_TDR, + /* Results provided by the Active Link Cable Diagnostic (ALCD) */ + ETHTOOL_A_CABLE_INF_SRC_ALCD, +}; + enum { ETHTOOL_A_CABLE_RESULT_UNSPEC, ETHTOOL_A_CABLE_RESULT_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ ETHTOOL_A_CABLE_RESULT_CODE, /* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */ + ETHTOOL_A_CABLE_RESULT_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ __ETHTOOL_A_CABLE_RESULT_CNT, ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1) @@ -586,6 +596,7 @@ enum { ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC, ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ ETHTOOL_A_CABLE_FAULT_LENGTH_CM, /* u32 */ + ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ __ETHTOOL_A_CABLE_FAULT_LENGTH_CNT, ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1) -- cgit v1.2.3 From 4715d87e11ac805ab95a75adfbf93d67dfbdbe81 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 22 Aug 2024 14:07:02 +0200 Subject: ethtool: Add support for specifying information source in cable test results Enhance the ethtool cable test interface by introducing the ability to specify the source of the diagnostic information for cable test results. This is particularly useful for PHYs that offer multiple diagnostic methods, such as Time Domain Reflectometry (TDR) and Active Link Cable Diagnostic (ALCD). Key changes: - Added `ethnl_cable_test_result_with_src` and `ethnl_cable_test_fault_length_with_src` functions to allow specifying the information source when reporting cable test results. - Updated existing `ethnl_cable_test_result` and `ethnl_cable_test_fault_length` functions to use TDR as the default source, ensuring backward compatibility. - Modified the UAPI to support these new attributes, enabling drivers to provide more detailed diagnostic information. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20240822120703.1393130-3-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- include/linux/ethtool_netlink.h | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index fae0dfb9a9c8..aba91335273a 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -23,8 +23,10 @@ struct phy_device; int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd); void ethnl_cable_test_free(struct phy_device *phydev); void ethnl_cable_test_finished(struct phy_device *phydev); -int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result); -int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm); +int ethnl_cable_test_result_with_src(struct phy_device *phydev, u8 pair, + u8 result, u32 src); +int ethnl_cable_test_fault_length_with_src(struct phy_device *phydev, u8 pair, + u32 cm, u32 src); int ethnl_cable_test_amplitude(struct phy_device *phydev, u8 pair, s16 mV); int ethnl_cable_test_pulse(struct phy_device *phydev, u16 mV); int ethnl_cable_test_step(struct phy_device *phydev, u32 first, u32 last, @@ -54,14 +56,14 @@ static inline void ethnl_cable_test_free(struct phy_device *phydev) static inline void ethnl_cable_test_finished(struct phy_device *phydev) { } -static inline int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, - u8 result) +static inline int ethnl_cable_test_result_with_src(struct phy_device *phydev, + u8 pair, u8 result, u32 src) { return -EOPNOTSUPP; } -static inline int ethnl_cable_test_fault_length(struct phy_device *phydev, - u8 pair, u32 cm) +static inline int ethnl_cable_test_fault_length_with_src(struct phy_device *phydev, + u8 pair, u32 cm, u32 src) { return -EOPNOTSUPP; } @@ -119,4 +121,19 @@ static inline bool ethtool_dev_mm_supported(struct net_device *dev) } #endif /* IS_ENABLED(CONFIG_ETHTOOL_NETLINK) */ + +static inline int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, + u8 result) +{ + return ethnl_cable_test_result_with_src(phydev, pair, result, + ETHTOOL_A_CABLE_INF_SRC_TDR); +} + +static inline int ethnl_cable_test_fault_length(struct phy_device *phydev, + u8 pair, u32 cm) +{ + return ethnl_cable_test_fault_length_with_src(phydev, pair, cm, + ETHTOOL_A_CABLE_INF_SRC_TDR); +} + #endif /* _LINUX_ETHTOOL_NETLINK_H_ */ -- cgit v1.2.3 From d24dac8eb8111c401b0de40a17760a0a254bcffc Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 22 Aug 2024 13:57:22 +0100 Subject: packet: Correct spelling in if_packet.h Correct spelling in if_packet.h As reported by codespell. Signed-off-by: Simon Horman Acked-by: Willem de Bruijn Link: https://patch.msgid.link/20240822-net-spell-v1-1-3a98971ce2d2@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_packet.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 9efc42382fdb..1d2718dd9647 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -230,8 +230,8 @@ struct tpacket_hdr_v1 { * ts_first_pkt: * Is always the time-stamp when the block was opened. * Case a) ZERO packets - * No packets to deal with but atleast you know the - * time-interval of this block. + * No packets to deal with but at least you know + * the time-interval of this block. * Case b) Non-zero packets * Use the ts of the first packet in the block. * @@ -265,7 +265,8 @@ enum tpacket_versions { - struct tpacket_hdr - pad to TPACKET_ALIGNMENT=16 - struct sockaddr_ll - - Gap, chosen so that packet data (Start+tp_net) alignes to TPACKET_ALIGNMENT=16 + - Gap, chosen so that packet data (Start+tp_net) aligns to + TPACKET_ALIGNMENT=16 - Start+tp_mac: [ Optional MAC header ] - Start+tp_net: Packet data, aligned to TPACKET_ALIGNMENT=16. - Pad to align to TPACKET_ALIGNMENT=16 -- cgit v1.2.3 From c3494460324832ba03ab8f640a0a5e52283b1b15 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 22 Aug 2024 13:57:23 +0100 Subject: s390/iucv: Correct spelling in iucv.h Correct spelling in iucv.h As reported by codespell. Cc: Alexandra Winter Cc: Thorsten Winkler Signed-off-by: Simon Horman Link: https://patch.msgid.link/20240822-net-spell-v1-2-3a98971ce2d2@kernel.org Signed-off-by: Jakub Kicinski --- include/net/iucv/iucv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h index 4d114e6d6d23..dd9e93c12260 100644 --- a/include/net/iucv/iucv.h +++ b/include/net/iucv/iucv.h @@ -15,7 +15,7 @@ * To explore any of the IUCV functions, one must first register their * program using iucv_register(). Once your program has successfully * completed a register, it can exploit the other functions. - * For furthur reference on all IUCV functionality, refer to the + * For further reference on all IUCV functionality, refer to the * CP Programming Services book, also available on the web thru * www.vm.ibm.com/pubs, manual # SC24-6084 * -- cgit v1.2.3 From d0193b167f2756de2f8be4d44186294bfacaaafc Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 22 Aug 2024 13:57:24 +0100 Subject: ip_tunnel: Correct spelling in ip_tunnels.h Correct spelling in ip_tunnels.h As reported by codespell. Cc: David Ahern Signed-off-by: Simon Horman Link: https://patch.msgid.link/20240822-net-spell-v1-3-3a98971ce2d2@kernel.org Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 1db2417b8ff5..6194fbb564c6 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -573,7 +573,7 @@ static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph, return 0; } -/* Propogate ECN bits out */ +/* Propagate ECN bits out */ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, const struct sk_buff *skb) { -- cgit v1.2.3 From 507285b7f9b2bc90f093fa335d43f2a21a2dbef9 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 22 Aug 2024 13:57:25 +0100 Subject: ipv6: Correct spelling in ipv6.h Correct spelling in ip_tunnels.h As reported by codespell. Cc: David Ahern Signed-off-by: Simon Horman Link: https://patch.msgid.link/20240822-net-spell-v1-4-3a98971ce2d2@kernel.org Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index e7113855a10f..248bfb26e2af 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -851,7 +851,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int * we should *never* get to this point since that * would mean the addrs are equal * - * However, we do get to it 8) And exacly, when + * However, we do get to it 8) And exactly, when * addresses are equal 8) * * ip route add 1111::/128 via ... @@ -973,7 +973,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, hash = skb_get_hash_flowi6(skb, fl6); /* Since this is being sent on the wire obfuscate hash a bit - * to minimize possbility that any useful information to an + * to minimize possibility that any useful information to an * attacker is leaked. Only lower 20 bits are relevant. */ hash = rol32(hash, 16); -- cgit v1.2.3 From e8ac2dba93eafb928fd9e127cdade3b2216a642c Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 22 Aug 2024 13:57:26 +0100 Subject: bonding: Correct spelling in headers Correct spelling in bond_3ad.h and bond_alb.h. As reported by codespell. Cc: Jay Vosburgh Cc: Andy Gospodarek Signed-off-by: Simon Horman Link: https://patch.msgid.link/20240822-net-spell-v1-5-3a98971ce2d2@kernel.org Signed-off-by: Jakub Kicinski --- include/net/bond_3ad.h | 5 ++++- include/net/bond_alb.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index 9ce5ac2bfbad..2053cd8e788a 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -231,7 +231,10 @@ typedef struct port { mux_states_t sm_mux_state; /* state machine mux state */ u16 sm_mux_timer_counter; /* state machine mux timer counter */ tx_states_t sm_tx_state; /* state machine tx state */ - u16 sm_tx_timer_counter; /* state machine tx timer counter(allways on - enter to transmit state 3 time per second) */ + u16 sm_tx_timer_counter; /* state machine tx timer counter + * (always on - enter to transmit + * state 3 time per second) + */ u16 sm_churn_actor_timer_counter; u16 sm_churn_partner_timer_counter; u32 churn_actor_count; diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h index 9dc082b2d543..e5945427f38d 100644 --- a/include/net/bond_alb.h +++ b/include/net/bond_alb.h @@ -53,7 +53,7 @@ struct slave; struct tlb_client_info { - struct slave *tx_slave; /* A pointer to slave used for transmiting + struct slave *tx_slave; /* A pointer to slave used for transmitting * packets to a Client that the Hash function * gave this entry index. */ -- cgit v1.2.3 From 19f1f11c9a8e92a2211a3854bd2cfbd4bb6303f1 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 22 Aug 2024 13:57:27 +0100 Subject: net: qualcomm: rmnet: Correct spelling in if_rmnet.h Correct spelling in if_rmnet.h As reported by codespell. Cc: Sean Tranchetti Signed-off-by: Simon Horman Reviewed-by: Subash Abhinov Kasiviswanathan Link: https://patch.msgid.link/20240822-net-spell-v1-6-3a98971ce2d2@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/if_rmnet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h index 839d1e48b85e..c44bf6e80ecb 100644 --- a/include/linux/if_rmnet.h +++ b/include/linux/if_rmnet.h @@ -42,7 +42,7 @@ struct rmnet_map_ul_csum_header { /* csum_info field: * OFFSET: where (offset in bytes) to insert computed checksum - * UDP: 1 = UDP checksum (zero checkum means no checksum) + * UDP: 1 = UDP checksum (zero checksum means no checksum) * ENABLED: 1 = checksum computation requested */ #define MAP_CSUM_UL_OFFSET_MASK GENMASK(13, 0) -- cgit v1.2.3 From 6899c2549cf7ca554ee2dd8574f0b70945f3ed1b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 22 Aug 2024 13:57:28 +0100 Subject: netlabel: Correct spelling in netlabel.h Correct spelling in netlabel.h. As reported by codespell. Cc: Paul Moore Signed-off-by: Simon Horman Link: https://patch.msgid.link/20240822-net-spell-v1-7-3a98971ce2d2@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netlabel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 654bc777d2a7..529160f76cac 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -30,7 +30,7 @@ struct calipso_doi; /* * NetLabel - A management interface for maintaining network packet label - * mapping tables for explicit packet labling protocols. + * mapping tables for explicit packet labeling protocols. * * Network protocols such as CIPSO and RIPSO require a label translation layer * to convert the label on the packet into something meaningful on the host -- cgit v1.2.3 From 10d0749a38c3a02f308fe8c4f4ed29bd6412e5fb Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 22 Aug 2024 13:57:29 +0100 Subject: NFC: Correct spelling in headers Correct spelling in NFC headers. As reported by codespell. Signed-off-by: Simon Horman Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240822-net-spell-v1-8-3a98971ce2d2@kernel.org Signed-off-by: Jakub Kicinski --- include/net/nfc/nci.h | 2 +- include/net/nfc/nfc.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h index e82f55f543bb..dc36519d16aa 100644 --- a/include/net/nfc/nci.h +++ b/include/net/nfc/nci.h @@ -332,7 +332,7 @@ struct nci_core_init_rsp_1 { __le32 nfcc_features; __u8 num_supported_rf_interfaces; __u8 supported_rf_interfaces[]; /* variable size array */ - /* continuted in nci_core_init_rsp_2 */ + /* continued in nci_core_init_rsp_2 */ } __packed; struct nci_core_init_rsp_2 { diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 3d07abacf08b..3a3781838c67 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -80,7 +80,7 @@ struct nfc_ops { #define NFC_ATR_REQ_GT_OFFSET 14 /** - * struct nfc_target - NFC target descriptiom + * struct nfc_target - NFC target description * * @sens_res: 2 bytes describing the target SENS_RES response, if the target * is a type A one. The %sens_res most significant byte must be byte 2 @@ -230,10 +230,10 @@ static inline void nfc_set_parent_dev(struct nfc_dev *nfc_dev, } /** - * nfc_set_drvdata - set driver specifc data + * nfc_set_drvdata - set driver specific data * * @dev: The nfc device - * @data: Pointer to driver specifc data + * @data: Pointer to driver specific data */ static inline void nfc_set_drvdata(struct nfc_dev *dev, void *data) { @@ -241,7 +241,7 @@ static inline void nfc_set_drvdata(struct nfc_dev *dev, void *data) } /** - * nfc_get_drvdata - get driver specifc data + * nfc_get_drvdata - get driver specific data * * @dev: The nfc device */ -- cgit v1.2.3 From a7a45f02a093d3b3f80a77db334703b5f53ebd44 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 22 Aug 2024 13:57:30 +0100 Subject: net: sched: Correct spelling in headers Correct spelling in pkt_cls.h and red.h. As reported by codespell. Cc: Krzysztof Kozlowski Signed-off-by: Simon Horman Link: https://patch.msgid.link/20240822-net-spell-v1-9-3a98971ce2d2@kernel.org Signed-off-by: Jakub Kicinski --- include/net/pkt_cls.h | 2 +- include/net/red.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 41297bd38dff..4880b3a7aced 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -491,7 +491,7 @@ int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *, struct tcf_pkt_info *); /** - * tcf_em_tree_match - evaulate an ematch tree + * tcf_em_tree_match - evaluate an ematch tree * * @skb: socket buffer of the packet in question * @tree: ematch tree to be used for evaluation diff --git a/include/net/red.h b/include/net/red.h index 802287d52c9e..159a09359fc0 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -40,7 +40,7 @@ max_P should be small (not 1), usually 0.01..0.02 is good value. max_P is chosen as a number, so that max_P/(th_max-th_min) - is a negative power of two in order arithmetics to contain + is a negative power of two in order arithmetic to contain only shifts. @@ -159,7 +159,7 @@ static inline u32 red_maxp(u8 Plog) static inline void red_set_vars(struct red_vars *v) { /* Reset average queue length, the value is strictly bound - * to the parameters below, reseting hurts a bit but leaving + * to the parameters below, resetting hurts a bit but leaving * it might result in an unreasonable qavg for a while. --TGR */ v->qavg = 0; @@ -340,7 +340,7 @@ static inline unsigned long red_calc_qavg_no_idle_time(const struct red_parms *p { /* * NOTE: v->qavg is fixed point number with point at Wlog. - * The formula below is equvalent to floating point + * The formula below is equivalent to floating point * version: * * qavg = qavg*(1-W) + backlog*W; @@ -375,7 +375,7 @@ static inline int red_mark_probability(const struct red_parms *p, OK. qR is random number in the interval (0..1/max_P)*(qth_max-qth_min) i.e. 0..(2^Plog). If we used floating point - arithmetics, it would be: (2^Plog)*rnd_num, + arithmetic, it would be: (2^Plog)*rnd_num, where rnd_num is less 1. Taking into account, that qavg have fixed -- cgit v1.2.3 From 7f47fcea8c6b4af25e72e0ebb8d492a181b7ce03 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 22 Aug 2024 13:57:31 +0100 Subject: sctp: Correct spelling in headers Correct spelling in sctp.h and structs.h. As reported by codespell. Cc: Marcelo Ricardo Leitner Signed-off-by: Simon Horman Acked-by: Xin Long Link: https://patch.msgid.link/20240822-net-spell-v1-10-3a98971ce2d2@kernel.org Signed-off-by: Jakub Kicinski --- include/net/sctp/sctp.h | 2 +- include/net/sctp/structs.h | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index a2310fa995f6..84e6b9fd5610 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -28,7 +28,7 @@ #define __net_sctp_h__ /* Header Strategy. - * Start getting some control over the header file depencies: + * Start getting some control over the header file dependencies: * includes * constants * structs diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f24a1bbcb3ef..31248cfdfb23 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -521,7 +521,7 @@ struct sctp_datamsg { refcount_t refcnt; /* When is this message no longer interesting to the peer? */ unsigned long expires_at; - /* Did the messenge fail to send? */ + /* Did the message fail to send? */ int send_error; u8 send_failed:1, can_delay:1, /* should this message be Nagle delayed */ @@ -792,7 +792,7 @@ struct sctp_transport { */ hb_sent:1, - /* Is the Path MTU update pending on this tranport */ + /* Is the Path MTU update pending on this transport */ pmtu_pending:1, dst_pending_confirm:1, /* need to confirm neighbour */ @@ -1223,7 +1223,7 @@ enum sctp_endpoint_type { }; /* - * A common base class to bridge the implmentation view of a + * A common base class to bridge the implementation view of a * socket (usually listening) endpoint versus an association's * local endpoint. * This common structure is useful for several purposes: @@ -1353,7 +1353,7 @@ struct sctp_endpoint { struct rcu_head rcu; }; -/* Recover the outter endpoint structure. */ +/* Recover the outer endpoint structure. */ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) { struct sctp_endpoint *ep; @@ -1906,7 +1906,7 @@ struct sctp_association { __u32 rwnd_over; /* Keeps treack of rwnd pressure. This happens when we have - * a window, but not recevie buffer (i.e small packets). This one + * a window, but not receive buffer (i.e small packets). This one * is releases slowly (1 PMTU at a time ). */ __u32 rwnd_press; @@ -1994,7 +1994,7 @@ struct sctp_association { /* ADDIP Section 5.2 Upon reception of an ASCONF Chunk. * - * This is needed to implement itmes E1 - E4 of the updated + * This is needed to implement items E1 - E4 of the updated * spec. Here is the justification: * * Since the peer may bundle multiple ASCONF chunks toward us, @@ -2005,7 +2005,7 @@ struct sctp_association { /* These ASCONF chunks are waiting to be sent. * - * These chunaks can't be pushed to outqueue until receiving + * These chunks can't be pushed to outqueue until receiving * ASCONF_ACK for the previous ASCONF indicated by * addip_last_asconf, so as to guarantee that only one ASCONF * is in flight at any time. @@ -2059,13 +2059,13 @@ struct sctp_association { struct sctp_transport *new_transport; /* SCTP AUTH: list of the endpoint shared keys. These - * keys are provided out of band by the user applicaton + * keys are provided out of band by the user application * and can't change during the lifetime of the association */ struct list_head endpoint_shared_keys; /* SCTP AUTH: - * The current generated assocaition shared key (secret) + * The current generated association shared key (secret) */ struct sctp_auth_bytes *asoc_shared_key; struct sctp_shared_key *shkey; @@ -2121,7 +2121,7 @@ enum { SCTP_ASSOC_EYECATCHER = 0xa550c123, }; -/* Recover the outter association structure. */ +/* Recover the outer association structure. */ static inline struct sctp_association *sctp_assoc(struct sctp_ep_common *base) { struct sctp_association *asoc; -- cgit v1.2.3 From 01d86846a5a5f34662679dd229a0167fc9d92382 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 22 Aug 2024 13:57:32 +0100 Subject: x25: Correct spelling in x25.h Correct spelling in x25.h As reported by codespell. Signed-off-by: Simon Horman Reviewed-by: Martin Schiller Link: https://patch.msgid.link/20240822-net-spell-v1-11-3a98971ce2d2@kernel.org Signed-off-by: Jakub Kicinski --- include/net/x25.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/x25.h b/include/net/x25.h index 597eb53c471e..5e833cfc864e 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -81,7 +81,7 @@ enum { #define X25_DEFAULT_WINDOW_SIZE 2 /* Default Window Size */ #define X25_DEFAULT_PACKET_SIZE X25_PS128 /* Default Packet Size */ -#define X25_DEFAULT_THROUGHPUT 0x0A /* Deafult Throughput */ +#define X25_DEFAULT_THROUGHPUT 0x0A /* Default Throughput */ #define X25_DEFAULT_REVERSE 0x00 /* Default Reverse Charging */ #define X25_SMODULUS 8 -- cgit v1.2.3 From 70d0bb45fae87a3b08970a318e15f317446a1956 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 22 Aug 2024 13:57:33 +0100 Subject: net: Correct spelling in headers Correct spelling in Networking headers. As reported by codespell. Signed-off-by: Simon Horman Link: https://patch.msgid.link/20240822-net-spell-v1-12-3a98971ce2d2@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/etherdevice.h | 2 +- include/linux/netdevice.h | 8 ++++---- include/net/addrconf.h | 2 +- include/net/busy_poll.h | 2 +- include/net/caif/caif_layer.h | 4 ++-- include/net/caif/cfpkt.h | 2 +- include/net/dropreason-core.h | 6 +++--- include/net/dst.h | 2 +- include/net/dst_cache.h | 2 +- include/net/erspan.h | 4 ++-- include/net/hwbm.h | 4 ++-- include/net/llc_pdu.h | 2 +- include/net/netlink.h | 16 ++++++++-------- include/net/netns/sctp.h | 4 ++-- include/net/regulatory.h | 2 +- include/net/sock.h | 4 ++-- include/net/udp.h | 2 +- include/uapi/linux/in.h | 2 +- include/uapi/linux/inet_diag.h | 2 +- 19 files changed, 36 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 0ed47d00549b..30114c25ad12 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -645,7 +645,7 @@ static inline struct ethhdr *eth_skb_pull_mac(struct sk_buff *skb) } /** - * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame + * eth_skb_pad - Pad buffer to minimum number of octets for Ethernet frame * @skb: Buffer to pad * * An Ethernet frame should have a minimum size of 60 bytes. This function diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7dfa836d9dbf..fce70990b209 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1237,7 +1237,7 @@ struct netdev_net_notifier { * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, * const unsigned char *addr, u16 vid) - * Deletes the FDB entry from dev coresponding to addr. + * Deletes the FDB entry from dev corresponding to addr. * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, @@ -3514,7 +3514,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue, dql_completed(&dev_queue->dql, bytes); /* - * Without the memory barrier there is a small possiblity that + * Without the memory barrier there is a small possibility that * netdev_tx_sent_queue will miss the update and cause the queue to * be stopped forever */ @@ -4583,7 +4583,7 @@ void dev_uc_flush(struct net_device *dev); void dev_uc_init(struct net_device *dev); /** - * __dev_uc_sync - Synchonize device's unicast list + * __dev_uc_sync - Synchronize device's unicast list * @dev: device to sync * @sync: function to call if address should be added * @unsync: function to call if address should be removed @@ -4627,7 +4627,7 @@ void dev_mc_flush(struct net_device *dev); void dev_mc_init(struct net_device *dev); /** - * __dev_mc_sync - Synchonize device's multicast list + * __dev_mc_sync - Synchronize device's multicast list * @dev: device to sync * @sync: function to call if address should be added * @unsync: function to call if address should be removed diff --git a/include/net/addrconf.h b/include/net/addrconf.h index c8ed31828db3..363dd63babe7 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -333,7 +333,7 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev) /** * __in6_dev_stats_get - get inet6_dev pointer for stats * @dev: network device - * @skb: skb for original incoming interface if neeeded + * @skb: skb for original incoming interface if needed * * Caller must hold rcu_read_lock or RTNL, because this function * does not take a reference on the inet6_dev. diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 9b09acac538e..5e3b5703e4ab 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -131,7 +131,7 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, #endif } -/* used in the protocol hanlder to propagate the napi_id to the socket */ +/* used in the protocol handler to propagate the napi_id to the socket */ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h index 0f45d875905f..053e7c6a6a66 100644 --- a/include/net/caif/caif_layer.h +++ b/include/net/caif/caif_layer.h @@ -20,7 +20,7 @@ struct caif_payload_info; * @assert: expression to evaluate. * * This function will print a error message and a do WARN_ON if the - * assertion failes. Normally this will do a stack up at the current location. + * assertion fails. Normally this will do a stack up at the current location. */ #define caif_assert(assert) \ do { \ @@ -116,7 +116,7 @@ enum caif_direction { * @dn: Pointer down to the layer below. * @node: List node used when layer participate in a list. * @receive: Packet receive function. - * @transmit: Packet transmit funciton. + * @transmit: Packet transmit function. * @ctrlcmd: Used for control signalling upwards in the stack. * @modemcmd: Used for control signaling downwards in the stack. * @id: The identity of this layer diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h index 44d914a50369..acf664227d96 100644 --- a/include/net/caif/cfpkt.h +++ b/include/net/caif/cfpkt.h @@ -18,7 +18,7 @@ struct cfpkt *cfpkt_create(u16 len); /* * Destroy a CAIF Packet. - * pkt Packet to be destoyed. + * pkt Packet to be destroyed. */ void cfpkt_destroy(struct cfpkt *pkt); diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 9707ab54fdd5..4748680e8c88 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -155,8 +155,8 @@ enum skb_drop_reason { /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */ SKB_DROP_REASON_SOCKET_RCVBUFF, /** - * @SKB_DROP_REASON_PROTO_MEM: proto memory limition, such as udp packet - * drop out of udp_memory_allocated. + * @SKB_DROP_REASON_PROTO_MEM: proto memory limitation, such as + * udp packet drop out of udp_memory_allocated. */ SKB_DROP_REASON_PROTO_MEM, /** @@ -217,7 +217,7 @@ enum skb_drop_reason { */ SKB_DROP_REASON_TCP_ZEROWINDOW, /** - * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data reveived is already + * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data received is already * received before (spurious retrans may happened), see * LINUX_MIB_DELAYEDACKLOST */ diff --git a/include/net/dst.h b/include/net/dst.h index 0aa331bd2fdb..0f303cc60252 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -341,7 +341,7 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, skb->dev = dev; /* - * Clear hash so that we can recalulate the hash for the + * Clear hash so that we can recalculate the hash for the * encapsulated packet, unless we have already determine the hash * over the L4 4-tuple. */ diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h index b4a55d2d5e71..1961699598e2 100644 --- a/include/net/dst_cache.h +++ b/include/net/dst_cache.h @@ -102,7 +102,7 @@ int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp); * @dst_cache: the cache * * No synchronization is enforced: it must be called only when the cache - * is unsed. + * is unused. */ void dst_cache_destroy(struct dst_cache *dst_cache); diff --git a/include/net/erspan.h b/include/net/erspan.h index 6cb4cbd6a48f..c6209e7b6c96 100644 --- a/include/net/erspan.h +++ b/include/net/erspan.h @@ -89,7 +89,7 @@ enum erspan_encap_type { ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */ ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */ ERSPAN_ENCAP_8021Q = 0x2, /* originally 802.1Q encapsulated */ - ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */ + ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag preserved in frame */ }; #define ERSPAN_V1_MDSIZE 4 @@ -192,7 +192,7 @@ static inline void erspan_build_header(struct sk_buff *skb, enc_type = ERSPAN_ENCAP_NOVLAN; /* If mirrored packet has vlan tag, extract tci and - * perserve vlan header in the mirrored frame. + * preserve vlan header in the mirrored frame. */ if (eth->h_proto == htons(ETH_P_8021Q)) { qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); diff --git a/include/net/hwbm.h b/include/net/hwbm.h index aa495decec35..bdbe91c609ff 100644 --- a/include/net/hwbm.h +++ b/include/net/hwbm.h @@ -11,9 +11,9 @@ struct hwbm_pool { int frag_size; /* Number of buffers currently used by this pool */ int buf_num; - /* constructor called during alocation */ + /* constructor called during allocation */ int (*construct)(struct hwbm_pool *bm_pool, void *buf); - /* protect acces to the buffer counter*/ + /* protect access to the buffer counter*/ struct mutex buf_lock; /* private data */ void *priv; diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 1d55ba7c45be..86681f29bda7 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -254,7 +254,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, } /** - * llc_pdu_decode_sa - extracs source address (MAC) of input frame + * llc_pdu_decode_sa - extracts, source address (MAC) of input frame * @skb: input skb that source address must be extracted from it. * @sa: pointer to source address (6 byte array). * diff --git a/include/net/netlink.h b/include/net/netlink.h index e78ce008e07c..db6af207287c 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -827,7 +827,7 @@ nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen, /** * nlmsg_find_attr - find a specific attribute in a netlink message * @nlh: netlink message header - * @hdrlen: length of familiy specific header + * @hdrlen: length of family specific header * @attrtype: type of attribute to look for * * Returns the first attribute which matches the specified type. @@ -849,7 +849,7 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, * * Validates all attributes in the specified attribute stream against the * specified policy. Validation is done in liberal mode. - * See documenation of struct nla_policy for more details. + * See documentation of struct nla_policy for more details. * * Returns 0 on success or a negative error code. */ @@ -872,7 +872,7 @@ static inline int nla_validate_deprecated(const struct nlattr *head, int len, * * Validates all attributes in the specified attribute stream against the * specified policy. Validation is done in strict mode. - * See documenation of struct nla_policy for more details. + * See documentation of struct nla_policy for more details. * * Returns 0 on success or a negative error code. */ @@ -887,7 +887,7 @@ static inline int nla_validate(const struct nlattr *head, int len, int maxtype, /** * nlmsg_validate_deprecated - validate a netlink message including attributes * @nlh: netlinket message header - * @hdrlen: length of familiy specific header + * @hdrlen: length of family specific header * @maxtype: maximum attribute type to be expected * @policy: validation policy * @extack: extended ACK report struct @@ -933,7 +933,7 @@ static inline u32 nlmsg_seq(const struct nlmsghdr *nlh) * nlmsg_for_each_attr - iterate over a stream of attributes * @pos: loop counter, set to current attribute * @nlh: netlink message header - * @hdrlen: length of familiy specific header + * @hdrlen: length of family specific header * @rem: initialized to len, holds bytes currently remaining in stream */ #define nlmsg_for_each_attr(pos, nlh, hdrlen, rem) \ @@ -1034,7 +1034,7 @@ static inline struct sk_buff *nlmsg_new_large(size_t payload) * @skb: socket buffer the message is stored in * @nlh: netlink message header * - * Corrects the netlink message header to include the appeneded + * Corrects the netlink message header to include the appended * attributes. Only necessary if attributes have been added to * the message. */ @@ -1954,7 +1954,7 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) * @start: container attribute * * Corrects the container attribute header to include the all - * appeneded attributes. + * appended attributes. * * Returns the total data length of the skb. */ @@ -1987,7 +1987,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * * Validates all attributes in the nested attribute stream against the * specified policy. Attributes with a type exceeding maxtype will be - * ignored. See documenation of struct nla_policy for more details. + * ignored. See documentation of struct nla_policy for more details. * * Returns 0 on success or a negative error code. */ diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 7eff3d981b89..d25cd7a9c5ff 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -125,14 +125,14 @@ struct netns_sctp { int pf_expose; /* - * Policy for preforming sctp/socket accounting + * Policy for performing sctp/socket accounting * 0 - do socket level accounting, all assocs share sk_sndbuf * 1 - do sctp accounting, each asoc may use sk_sndbuf bytes */ int sndbuf_policy; /* - * Policy for preforming sctp/socket accounting + * Policy for performing sctp/socket accounting * 0 - do socket level accounting, all assocs share sk_rcvbuf * 1 - do sctp accounting, each asoc may use sk_rcvbuf bytes */ diff --git a/include/net/regulatory.h b/include/net/regulatory.h index a103f4c8cf75..6633627f6e76 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -121,7 +121,7 @@ struct regulatory_request { * @REGULATORY_DISABLE_BEACON_HINTS: enable this if your driver needs to * ensure that passive scan flags and beaconing flags may not be lifted by * cfg80211 due to regulatory beacon hints. For more information on beacon - * hints read the documenation for regulatory_hint_found_beacon() + * hints read the documentation for regulatory_hint_found_beacon() * @REGULATORY_COUNTRY_IE_FOLLOW_POWER: for devices that have a preference * that even though they may have programmed their own custom power * setting prior to wiphy registration, they want to ensure their channel diff --git a/include/net/sock.h b/include/net/sock.h index cce23ac4d514..f51d61fab059 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1624,7 +1624,7 @@ bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock); * lock_sock_fast - fast version of lock_sock * @sk: socket * - * This version should be used for very small section, where process wont block + * This version should be used for very small section, where process won't block * return false if fast path is taken: * * sk_lock.slock locked, owned = 0, BH disabled @@ -2546,7 +2546,7 @@ struct sock_skb_cb { /* Store sock_skb_cb at the end of skb->cb[] so protocol families * using skb->cb[] would keep using it directly and utilize its - * alignement guarantee. + * alignment guarantee. */ #define SOCK_SKB_CB_OFFSET ((sizeof_field(struct sk_buff, cb) - \ sizeof(struct sock_skb_cb))) diff --git a/include/net/udp.h b/include/net/udp.h index 5ca53b1cec67..61222545ab1c 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -232,7 +232,7 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, } /* Since this is being sent on the wire obfuscate hash a bit - * to minimize possbility that any useful information to an + * to minimize possibility that any useful information to an * attacker is leaked. Only upper 16 bits are relevant in the * computation for 16 bit port value. */ diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index d358add1611c..5d32d53508d9 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -141,7 +141,7 @@ struct in_addr { */ #define IP_PMTUDISC_INTERFACE 4 /* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get - * fragmented if they exeed the interface mtu + * fragmented if they exceed the interface mtu */ #define IP_PMTUDISC_OMIT 5 diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 50655de04c9b..86bb2e8b17c9 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -143,7 +143,7 @@ enum { INET_DIAG_SHUTDOWN, /* - * Next extenstions cannot be requested in struct inet_diag_req_v2: + * Next extensions cannot be requested in struct inet_diag_req_v2: * its field idiag_ext has only 8 bits. */ -- cgit v1.2.3 From cead0b8991713bdeb5b947758dd62287fcf8da40 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Mon, 26 Aug 2024 16:09:43 +0530 Subject: nvme: rename apptag and appmask to lbat and lbatm Rename apptag and appmask to lbat and lbatm so that it matches the field names used in NVMe spec. Signed-off-by: Anuj Gupta Signed-off-by: Kanchan Joshi Suggested-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- include/linux/nvme.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7b2ae2e43544..b58d9405d65e 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -987,8 +987,8 @@ struct nvme_rw_command { __le16 control; __le32 dsmgmt; __le32 reftag; - __le16 apptag; - __le16 appmask; + __le16 lbat; + __le16 lbatm; }; enum { @@ -1057,8 +1057,8 @@ struct nvme_write_zeroes_cmd { __le16 control; __le32 dsmgmt; __le32 reftag; - __le16 apptag; - __le16 appmask; + __le16 lbat; + __le16 lbatm; }; enum nvme_zone_mgmt_action { -- cgit v1.2.3 From 1b9c2e94df1408e567f23641f6052af9a75614d1 Mon Sep 17 00:00:00 2001 From: Muhammad Qasim Abdul Majeed Date: Sun, 4 Aug 2024 17:33:13 +0500 Subject: ACPI: bus: Define and use symbols for device and class name lengths It is better to define symbols for the maximum ACPI device name length and the maximum ACPI class name length instead of using raw numbers in typedef statements. Signed-off-by: Muhammad Qasim Abdul Majeed Link: https://patch.msgid.link/20240804123313.16211-6-qasim.majeed20@gmail.com [ rjw: Subject edits, added a changelog, dropped unrelated change ] Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 8db5bd382915..049cbda28c4c 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -228,10 +228,12 @@ struct acpi_device_dir { /* Plug and Play */ +#define MAX_ACPI_DEVICE_NAME_LEN 40 +#define MAX_ACPI_CLASS_NAME_LEN 20 typedef char acpi_bus_id[8]; typedef u64 acpi_bus_address; -typedef char acpi_device_name[40]; -typedef char acpi_device_class[20]; +typedef char acpi_device_name[MAX_ACPI_DEVICE_NAME_LEN]; +typedef char acpi_device_class[MAX_ACPI_CLASS_NAME_LEN]; struct acpi_hardware_id { struct list_head list; -- cgit v1.2.3 From 50c6dbdfd16e312382842198a7919341ad480e05 Mon Sep 17 00:00:00 2001 From: Max Ramanouski Date: Sun, 25 Aug 2024 01:01:11 +0300 Subject: x86/ioremap: Improve iounmap() address range checks Allowing iounmap() on memory that was not ioremap()'d in the first place is obviously a bad idea. There is currently a feeble attempt to avoid errant iounmap()s by checking to see if the address is below "high_memory". But that's imprecise at best because there are plenty of high addresses that are also invalid to call iounmap() on. Thankfully, there is a more precise helper: is_ioremap_addr(). x86 just does not use it in iounmap(). Restrict iounmap() to addresses in the ioremap region, by using is_ioremap_addr(). This aligns x86 closer to the generic iounmap() implementation. Additionally, add a warning in case there is an attempt to iounmap() invalid memory. This replaces an existing silent return and will help alert folks to any incorrect usage of iounmap(). Due to VMALLOC_START on i386 not being present in asm/pgtable.h, include for asm/vmalloc.h had to be added to include/linux/ioremap.h. [ dhansen: tweak subject and changelog ] Signed-off-by: Max Ramanouski Signed-off-by: Dave Hansen Reviewed-by: Christoph Hellwig Reviewed-by: Alistair Popple Link: https://lore.kernel.org/all/20240824220111.84441-1-max8rr8%40gmail.com --- include/linux/ioremap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ioremap.h b/include/linux/ioremap.h index f0e99fc7dd8b..2bd1661fe9ad 100644 --- a/include/linux/ioremap.h +++ b/include/linux/ioremap.h @@ -4,6 +4,7 @@ #include #include +#include #if defined(CONFIG_HAS_IOMEM) || defined(CONFIG_GENERIC_IOREMAP) /* -- cgit v1.2.3 From 22652022c7eef3c4ad6ab5f13a6dfc7f25f853d4 Mon Sep 17 00:00:00 2001 From: Laurentiu Mihalcea Date: Mon, 26 Aug 2024 14:24:42 -0400 Subject: ASoC: SOF: ipc: replace "enum sof_comp_type" field with "uint32_t" Normally, the type of enums is "unsigned int" or "int". GCC has the "-fshort-enums" option, which instructs the compiler to use the smallest data type that can hold all the values in the enum (i.e: char, short, int or their unsigned variants). According to the GCC documentation, "-fshort-enums" may be default on some targets. This seems to be the case for SOF when built for a certain 32-bit ARM platform. On Linux, this is not the case (tested with "aarch64-linux-gnu-gcc") which means enums such as "enum sof_comp_type" will end up having different sizes on Linux and SOF. Since "enum sof_comp_type" is used in IPC-related structures such as "struct sof_ipc_comp", this means the fields of the structures will end up being placed at different offsets. This, in turn, leads to SOF not being able to properly interpret data passed from Linux. With this in mind, replace "enum sof_comp_type" from "struct sof_ipc_comp" with "uint32_t". Signed-off-by: Laurentiu Mihalcea Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Daniel Baluta Link: https://patch.msgid.link/20240826182442.6191-1-laurentiumihalcea111@gmail.com Signed-off-by: Mark Brown --- include/sound/sof/topology.h | 2 +- include/uapi/sound/sof/abi.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/sof/topology.h b/include/sound/sof/topology.h index 3ba086f61983..449e93c25184 100644 --- a/include/sound/sof/topology.h +++ b/include/sound/sof/topology.h @@ -54,7 +54,7 @@ enum sof_comp_type { struct sof_ipc_comp { struct sof_ipc_cmd_hdr hdr; uint32_t id; - enum sof_comp_type type; + uint32_t type; uint32_t pipeline_id; uint32_t core; diff --git a/include/uapi/sound/sof/abi.h b/include/uapi/sound/sof/abi.h index 937ed9408c23..c1b158ec5dab 100644 --- a/include/uapi/sound/sof/abi.h +++ b/include/uapi/sound/sof/abi.h @@ -29,7 +29,7 @@ /* SOF ABI version major, minor and patch numbers */ #define SOF_ABI_MAJOR 3 #define SOF_ABI_MINOR 23 -#define SOF_ABI_PATCH 0 +#define SOF_ABI_PATCH 1 /* SOF ABI version number. Format within 32bit word is MMmmmppp */ #define SOF_ABI_MAJOR_SHIFT 24 -- cgit v1.2.3 From 1bf8012fc6997f2117f6919369cde16659db60e0 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 19 Aug 2024 22:59:45 +0800 Subject: pstore: replace spinlock_t by raw_spinlock_t pstore_dump() is called when both preemption and local IRQ are disabled, and a spinlock is obtained, which is problematic for the RT kernel because in this configuration, spinlocks are sleep locks. Replace the spinlock_t with raw_spinlock_t to avoid sleeping in atomic context. Signed-off-by: Wen Yang Cc: Kees Cook Cc: Tony Luck Cc: Guilherme G. Piccoli Cc: linux-hardening@vger.kernel.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/20240819145945.61274-1-wen.yang@linux.dev Signed-off-by: Kees Cook --- include/linux/pstore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 638507a3c8ff..fed601053c51 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -182,7 +182,7 @@ struct pstore_info { struct module *owner; const char *name; - spinlock_t buf_lock; + raw_spinlock_t buf_lock; char *buf; size_t bufsize; -- cgit v1.2.3 From 7e8ae8486e4471513e2111aba6ac29f2357bed2a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 26 Aug 2024 10:32:34 -0400 Subject: fs/nfsd: fix update of inode attrs in CB_GETATTR Currently, we copy the mtime and ctime to the in-core inode and then mark the inode dirty. This is fine for certain types of filesystems, but not all. Some require a real setattr to properly change these values (e.g. ceph or reexported NFS). Fix this code to call notify_change() instead, which is the proper way to effect a setattr. There is one problem though: In this case, the client is holding a write delegation and has sent us attributes to update our cache. We don't want to break the delegation for this since that would defeat the purpose. Add a new ATTR_DELEG flag that makes notify_change bypass the try_break_deleg call. Fixes: c5967721e106 ("NFSD: handle GETATTR conflict with write delegation") Reviewed-by: Christian Brauner Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0283cf366c2a..bafc1d134b94 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -208,6 +208,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ #define ATTR_TIMES_SET (1 << 16) #define ATTR_TOUCH (1 << 17) +#define ATTR_DELEG (1 << 18) /* Delegated attrs. Don't break write delegations */ /* * Whiteout is represented by a char device. The following constants define the -- cgit v1.2.3 From 13acf2b74803a9a9ab3475c306d5814cf3cefbd8 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 26 Aug 2024 14:32:42 +0800 Subject: ata: libata: Remove obsolete function declarations The function ata_schedule_scsi_eh() was removed with commit f8bbfc247efb ("[PATCH] SCSI: make scsi_implement_eh() generic API for SCSI transports"), and the function ata_sff_irq_clear() was removed with commit 37f65b8bc262("libata-sff: ata_sff_irq_clear() is BMDMA specific"). Remove the now useless declarations of these functions in drivers/ata/libata.h and include/linux/libata.h. Signed-off-by: Gaosheng Cui Signed-off-by: Damien Le Moal --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 0279c0a6302f..6552e90753ae 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -2006,7 +2006,6 @@ extern unsigned int ata_sff_data_xfer(struct ata_queued_cmd *qc, extern unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc, unsigned char *buf, unsigned int buflen, int rw); extern void ata_sff_irq_on(struct ata_port *ap); -extern void ata_sff_irq_clear(struct ata_port *ap); extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, u8 status, int in_wq); extern void ata_sff_queue_work(struct work_struct *work); -- cgit v1.2.3 From cda1fba15cb2282b3c364805c9767698f11c3b0e Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Fri, 23 Aug 2024 00:25:12 +0200 Subject: dpll: add Embedded SYNC feature for a pin Implement and document new pin attributes for providing Embedded SYNC capabilities to the DPLL subsystem users through a netlink pin-get do/dump messages. Allow the user to set Embedded SYNC frequency with pin-set do netlink message. Reviewed-by: Aleksandr Loktionov Signed-off-by: Arkadiusz Kubalewski Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20240822222513.255179-2-arkadiusz.kubalewski@intel.com Signed-off-by: Jakub Kicinski --- include/linux/dpll.h | 15 +++++++++++++++ include/uapi/linux/dpll.h | 3 +++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/dpll.h b/include/linux/dpll.h index d275736230b3..81f7b623d0ba 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -15,6 +15,7 @@ struct dpll_device; struct dpll_pin; +struct dpll_pin_esync; struct dpll_device_ops { int (*mode_get)(const struct dpll_device *dpll, void *dpll_priv, @@ -83,6 +84,13 @@ struct dpll_pin_ops { int (*ffo_get)(const struct dpll_pin *pin, void *pin_priv, const struct dpll_device *dpll, void *dpll_priv, s64 *ffo, struct netlink_ext_ack *extack); + int (*esync_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u64 freq, struct netlink_ext_ack *extack); + int (*esync_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + struct dpll_pin_esync *esync, + struct netlink_ext_ack *extack); }; struct dpll_pin_frequency { @@ -111,6 +119,13 @@ struct dpll_pin_phase_adjust_range { s32 max; }; +struct dpll_pin_esync { + u64 freq; + const struct dpll_pin_frequency *range; + u8 range_num; + u8 pulse; +}; + struct dpll_pin_properties { const char *board_label; const char *panel_label; diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index 0c13d7f1a1bc..b0654ade7b7e 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -210,6 +210,9 @@ enum dpll_a_pin { DPLL_A_PIN_PHASE_ADJUST, DPLL_A_PIN_PHASE_OFFSET, DPLL_A_PIN_FRACTIONAL_FREQUENCY_OFFSET, + DPLL_A_PIN_ESYNC_FREQUENCY, + DPLL_A_PIN_ESYNC_FREQUENCY_SUPPORTED, + DPLL_A_PIN_ESYNC_PULSE, __DPLL_A_PIN_MAX, DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1) -- cgit v1.2.3 From 7e8fb2eda9885ea2d13179a4c0bbf810f900ef25 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 22 Jul 2024 22:16:47 -0700 Subject: jbd2: fix kernel-doc for j_transaction_overhead_buffers Use the correct struct member name in the kernel-doc notation to prevent a kernel-doc build warning. include/linux/jbd2.h:1303: warning: Function parameter or struct member 'j_transaction_overhead_buffers' not described in 'journal_s' include/linux/jbd2.h:1303: warning: Excess struct member 'j_transaction_overhead' description in 'journal_s' Fixes: e3a00a23781c ("jbd2: precompute number of transaction descriptor blocks") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/linux-next/20240710182252.4c281445@canb.auug.org.au/ Signed-off-by: Randy Dunlap Reviewed-by: Jan Kara Link: https://patch.msgid.link/20240723051647.3053491-1-rdunlap@infradead.org Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 5157d92b6f23..17662eae408f 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1086,7 +1086,7 @@ struct journal_s int j_revoke_records_per_block; /** - * @j_transaction_overhead: + * @j_transaction_overhead_buffers: * * Number of blocks each transaction needs for its own bookkeeping */ -- cgit v1.2.3 From fa10db138d205362026daecc8ea65d4e339ade00 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Thu, 1 Aug 2024 09:38:10 +0800 Subject: jbd2: remove unused return value of jbd2_fc_release_bufs Remove unused return value of jbd2_fc_release_bufs. Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Reviewed-by: Zhang Yi Link: https://patch.msgid.link/20240801013815.2393869-4-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 17662eae408f..8aef9bb6ad57 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1675,7 +1675,7 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); -int jbd2_fc_release_bufs(journal_t *journal); +void jbd2_fc_release_bufs(journal_t *journal); /* * is_journal_abort -- cgit v1.2.3 From 5e1c5c5a687bb3351d9b4a3b0ad457f8497b2a0d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 7 Aug 2024 17:27:23 +0200 Subject: ALSA: pcm: Drop PCM vmalloc buffer helpers As the last-standing user of PCM vmalloc buffer helper API took its own buffer management, we can finally drop those API functions, which were leftover after reorganization of ALSA memalloc code. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240807152725.18948-3-tiwai@suse.de --- include/sound/pcm.h | 42 ------------------------------------------ 1 file changed, 42 deletions(-) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 384032b6c59c..732121b934fd 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -1358,48 +1358,6 @@ snd_pcm_set_fixed_buffer_all(struct snd_pcm *pcm, int type, return snd_pcm_set_managed_buffer_all(pcm, type, data, size, 0); } -int _snd_pcm_lib_alloc_vmalloc_buffer(struct snd_pcm_substream *substream, - size_t size, gfp_t gfp_flags); -int snd_pcm_lib_free_vmalloc_buffer(struct snd_pcm_substream *substream); -struct page *snd_pcm_lib_get_vmalloc_page(struct snd_pcm_substream *substream, - unsigned long offset); -/** - * snd_pcm_lib_alloc_vmalloc_buffer - allocate virtual DMA buffer - * @substream: the substream to allocate the buffer to - * @size: the requested buffer size, in bytes - * - * Allocates the PCM substream buffer using vmalloc(), i.e., the memory is - * contiguous in kernel virtual space, but not in physical memory. Use this - * if the buffer is accessed by kernel code but not by device DMA. - * - * Return: 1 if the buffer was changed, 0 if not changed, or a negative error - * code. - */ -static inline int snd_pcm_lib_alloc_vmalloc_buffer - (struct snd_pcm_substream *substream, size_t size) -{ - return _snd_pcm_lib_alloc_vmalloc_buffer(substream, size, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); -} - -/** - * snd_pcm_lib_alloc_vmalloc_32_buffer - allocate 32-bit-addressable buffer - * @substream: the substream to allocate the buffer to - * @size: the requested buffer size, in bytes - * - * This function works like snd_pcm_lib_alloc_vmalloc_buffer(), but uses - * vmalloc_32(), i.e., the pages are allocated from 32-bit-addressable memory. - * - * Return: 1 if the buffer was changed, 0 if not changed, or a negative error - * code. - */ -static inline int snd_pcm_lib_alloc_vmalloc_32_buffer - (struct snd_pcm_substream *substream, size_t size) -{ - return _snd_pcm_lib_alloc_vmalloc_buffer(substream, size, - GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); -} - #define snd_pcm_get_dma_buf(substream) ((substream)->runtime->dma_buffer_p) /** -- cgit v1.2.3 From 9a948c0c8e741776ee6d938b49d34d22034fbb7d Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Wed, 14 Aug 2024 11:34:15 +0800 Subject: ceph: Remove unused declarations These functions is never implemented and used. Signed-off-by: Yue Haibing Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index f66f6aac74f6..d7941478158c 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -449,8 +449,6 @@ extern int ceph_osdc_init(struct ceph_osd_client *osdc, extern void ceph_osdc_stop(struct ceph_osd_client *osdc); extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc); -extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, - struct ceph_msg *msg); extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); -- cgit v1.2.3 From ea63fb71993c56628f323b8268d36f4bbd836a7f Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Thu, 25 Jul 2024 12:09:25 +0300 Subject: wifi: mac80211: refactor block ack management code Introduce 'ieee80211_mgmt_ba()' to avoid code duplication between 'ieee80211_send_addba_resp()', 'ieee80211_send_addba_request()', and 'ieee80211_send_delba()', ensure that all related addresses are '__aligned(2)', and prefer convenient 'ether_addr_copy()' over generic 'memcpy()'. No functional changes expected. Signed-off-by: Dmitry Antipov Link: https://patch.msgid.link/20240725090925.6022-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0a04eaf5343c..9406f687cffb 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2487,7 +2487,7 @@ struct ieee80211_link_sta { * @spp_amsdu: indicates whether the STA uses SPP A-MSDU or not. */ struct ieee80211_sta { - u8 addr[ETH_ALEN]; + u8 addr[ETH_ALEN] __aligned(2); u16 aid; u16 max_rx_aggregation_subframes; bool wme; -- cgit v1.2.3 From e7a7ef9a0742dbd0818d5b15fba2c5313ace765b Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 29 Jul 2024 15:48:16 +0800 Subject: wifi: mac80211: don't use rate mask for offchannel TX either Like the commit ab9177d83c04 ("wifi: mac80211: don't use rate mask for scanning"), ignore incorrect settings to avoid no supported rate warning reported by syzbot. The syzbot did bisect and found cause is commit 9df66d5b9f45 ("cfg80211: fix default HE tx bitrate mask in 2G band"), which however corrects bitmask of HE MCS and recognizes correctly settings of empty legacy rate plus HE MCS rate instead of returning -EINVAL. As suggestions [1], follow the change of SCAN TX to consider this case of offchannel TX as well. [1] https://lore.kernel.org/linux-wireless/6ab2dc9c3afe753ca6fdcdd1421e7a1f47e87b84.camel@sipsolutions.net/T/#m2ac2a6d2be06a37c9c47a3d8a44b4f647ed4f024 Reported-by: syzbot+8dd98a9e98ee28dc484a@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-wireless/000000000000fdef8706191a3f7b@google.com/ Fixes: 9df66d5b9f45 ("cfg80211: fix default HE tx bitrate mask in 2G band") Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20240729074816.20323-1-pkshih@realtek.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9406f687cffb..de50dc8712c0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -994,8 +994,9 @@ enum mac80211_tx_info_flags { * of their QoS TID or other priority field values. * @IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX: first MLO TX, used mostly internally * for sequence number assignment - * @IEEE80211_TX_CTRL_SCAN_TX: Indicates that this frame is transmitted - * due to scanning, not in normal operation on the interface. + * @IEEE80211_TX_CTRL_DONT_USE_RATE_MASK: Don't use rate mask for this frame + * which is transmitted due to scanning or offchannel TX, not in normal + * operation on the interface. * @IEEE80211_TX_CTRL_MLO_LINK: If not @IEEE80211_LINK_UNSPECIFIED, this * frame should be transmitted on the specific link. This really is * only relevant for frames that do not have data present, and is @@ -1016,7 +1017,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_NO_SEQNO = BIT(7), IEEE80211_TX_CTRL_DONT_REORDER = BIT(8), IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX = BIT(9), - IEEE80211_TX_CTRL_SCAN_TX = BIT(10), + IEEE80211_TX_CTRL_DONT_USE_RATE_MASK = BIT(10), IEEE80211_TX_CTRL_MLO_LINK = 0xf0000000, }; -- cgit v1.2.3 From 3a3d1afd25ea2e142c78b67fb769df5be7d308c3 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 5 Aug 2024 08:40:37 +0200 Subject: wifi: lib80211: Handle const struct lib80211_crypto_ops in lib80211 lib80211_register_crypto_ops() and lib80211_unregister_crypto_ops() don't modify their "struct lib80211_crypto_ops *ops" argument. So, it can be declared as const. Doing so, some adjustments are needed to also constify some date in "struct lib80211_crypt_data", "struct lib80211_crypto_alg" and the return value of lib80211_get_crypto_ops(). Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Link: https://patch.msgid.link/c74085e02f33a11327582b19c9f51c3236e85ae2.1722839425.git.christophe.jaillet@wanadoo.fr Signed-off-by: Johannes Berg --- include/net/lib80211.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/lib80211.h b/include/net/lib80211.h index 8b47d3a51cf8..fd0f15d87d80 100644 --- a/include/net/lib80211.h +++ b/include/net/lib80211.h @@ -92,7 +92,7 @@ struct lib80211_crypto_ops { struct lib80211_crypt_data { struct list_head list; /* delayed deletion list */ - struct lib80211_crypto_ops *ops; + const struct lib80211_crypto_ops *ops; void *priv; atomic_t refcnt; }; @@ -113,9 +113,9 @@ struct lib80211_crypt_info { int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, spinlock_t *lock); void lib80211_crypt_info_free(struct lib80211_crypt_info *info); -int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops); -int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops); -struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name); +int lib80211_register_crypto_ops(const struct lib80211_crypto_ops *ops); +int lib80211_unregister_crypto_ops(const struct lib80211_crypto_ops *ops); +const struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name); void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info, struct lib80211_crypt_data **crypt); -- cgit v1.2.3 From 7c3b69eadea9e57c28bf914b0fd70f268f3682e1 Mon Sep 17 00:00:00 2001 From: Rory Little Date: Mon, 5 Aug 2024 17:40:23 -0700 Subject: wifi: mac80211: Add non-atomic station iterator Drivers may at times want to iterate their stations with a function which requires some non-atomic operations. ieee80211_iterate_stations_mtx() introduces an API to iterate stations while holding that wiphy's mutex. This allows the iterating function to do non-atomic operations safely. Signed-off-by: Rory Little Link: https://patch.msgid.link/20240806004024.2014080-2-rory@candelatech.com [unify internal list iteration functions] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index de50dc8712c0..cfd64acdc039 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6243,6 +6243,24 @@ void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw, void (*iterator)(void *data, struct ieee80211_sta *sta), void *data); + +/** + * ieee80211_iterate_stations_mtx - iterate stations + * + * This function iterates over all stations associated with a given + * hardware that are currently uploaded to the driver and calls the callback + * function for them. This version can only be used while holding the wiphy + * mutex. + * + * @hw: the hardware struct of which the interfaces should be iterated over + * @iterator: the iterator function to call + * @data: first argument of the iterator function + */ +void ieee80211_iterate_stations_mtx(struct ieee80211_hw *hw, + void (*iterator)(void *data, + struct ieee80211_sta *sta), + void *data); + /** * ieee80211_queue_work - add work onto the mac80211 workqueue * -- cgit v1.2.3 From 373d3f8dcbb1c0d764123653ca4574be636b8d86 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Sun, 11 Aug 2024 12:47:26 +0800 Subject: wifi: rfkill: Correct parameter type for rfkill_set_hw_state_reason() Change type of parameter @reason to enum rfkill_hard_block_reasons for API rfkill_set_hw_state_reason() according to its comments, and all kernel callers have invoked the API with enum type actually. Signed-off-by: Zijun Hu Link: https://patch.msgid.link/20240811-rfkill_fix-v2-1-9050760336f4@quicinc.com Signed-off-by: Johannes Berg --- include/linux/rfkill.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 373003ace639..997b34197385 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -147,7 +147,8 @@ void rfkill_destroy(struct rfkill *rfkill); * Prefer to use rfkill_set_hw_state if you don't need any special reason. */ bool rfkill_set_hw_state_reason(struct rfkill *rfkill, - bool blocked, unsigned long reason); + bool blocked, + enum rfkill_hard_block_reasons reason); /** * rfkill_set_hw_state - Set the internal rfkill hardware block state * @rfkill: pointer to the rfkill class to modify. @@ -280,7 +281,7 @@ static inline void rfkill_destroy(struct rfkill *rfkill) static inline bool rfkill_set_hw_state_reason(struct rfkill *rfkill, bool blocked, - unsigned long reason) + enum rfkill_hard_block_reasons reason) { return blocked; } -- cgit v1.2.3 From 53bc1b73b67836ac9867f93dee7a443986b4a94f Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Thu, 22 Aug 2024 09:42:54 +0800 Subject: wifi: mac80211: export ieee80211_purge_tx_queue() for drivers Drivers need to purge TX SKB when stopping. Using skb_queue_purge() can't report TX status to mac80211, causing ieee80211_free_ack_frame() warns "Have pending ack frames!". Export ieee80211_purge_tx_queue() for drivers to not have to reimplement it. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20240822014255.10211-1-pkshih@realtek.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cfd64acdc039..adfec877f392 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3181,6 +3181,19 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, */ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); +/** + * ieee80211_purge_tx_queue - purge TX skb queue + * @hw: the hardware + * @skbs: the skbs + * + * Free a set of transmit skbs. Use this function when device is going to stop + * but some transmit skbs without TX status are still queued. + * This function does not take the list lock and the caller must hold the + * relevant locks to use it. + */ +void ieee80211_purge_tx_queue(struct ieee80211_hw *hw, + struct sk_buff_head *skbs); + /** * DOC: Hardware crypto acceleration * -- cgit v1.2.3 From 21b91d40575fb64f3f280f6c3af586e32a704a92 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 8 Aug 2024 22:05:54 +0800 Subject: efi: Remove unused declaration efi_initialize_iomem_resources() Since commit cf8e8658100d ("arch: Remove Itanium (IA-64) architecture"), this is not used anymore. Signed-off-by: Yue Haibing Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 6bf3c4fe8511..e28d88066033 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -764,8 +764,6 @@ extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); extern int __efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); extern void efi_mem_reserve(phys_addr_t addr, u64 size); extern int efi_mem_reserve_persistent(phys_addr_t addr, u64 size); -extern void efi_initialize_iomem_resources(struct resource *code_resource, - struct resource *data_resource, struct resource *bss_resource); extern u64 efi_get_fdt_params(struct efi_memory_map_data *data); extern struct kobject *efi_kobj; -- cgit v1.2.3 From 2955ae8186c8a6f029e429f7890e0c7e5f6e215e Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 1 Aug 2024 20:10:51 -0700 Subject: drm/i915: ARL requires a newer GSC firmware ARL and MTL share a single GSC firmware blob. However, ARL requires a newer version of it. So add differentiate of the PCI ids for ARL from MTL and create ARL as a sub-platform of MTL. That way, all the existing workarounds and such still treat ARL as MTL exactly as before. However, now the GSC code can check for ARL and do an extra version check on the firmware before committing to it. Also, the version extraction code has various ways of failing but the return code was being ignore and so the firmware load would attempt to continue anyway. Fix that by propagating the return code to the next level out. Signed-off-by: John Harrison Fixes: 213c43676beb ("drm/i915/mtl: Remove the 'force_probe' requirement for Meteor Lake") Reviewed-by: Daniele Ceraolo Spurio Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240802031051.3816392-1-John.C.Harrison@Intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 67733d7a71503fd3e32eeada371f8aa2516c5c95) Signed-off-by: Joonas Lahtinen --- include/drm/intel/i915_pciids.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h index b21374f76df2..2bf03ebfcf73 100644 --- a/include/drm/intel/i915_pciids.h +++ b/include/drm/intel/i915_pciids.h @@ -772,15 +772,18 @@ INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) /* MTL */ +#define INTEL_ARL_IDS(MACRO__, ...) \ + MACRO__(0x7D41, ## __VA_ARGS__), \ + MACRO__(0x7D51, ## __VA_ARGS__), \ + MACRO__(0x7D67, ## __VA_ARGS__), \ + MACRO__(0x7DD1, ## __VA_ARGS__) + #define INTEL_MTL_IDS(MACRO__, ...) \ + INTEL_ARL_IDS(MACRO__, ## __VA_ARGS__), \ MACRO__(0x7D40, ## __VA_ARGS__), \ - MACRO__(0x7D41, ## __VA_ARGS__), \ MACRO__(0x7D45, ## __VA_ARGS__), \ - MACRO__(0x7D51, ## __VA_ARGS__), \ MACRO__(0x7D55, ## __VA_ARGS__), \ MACRO__(0x7D60, ## __VA_ARGS__), \ - MACRO__(0x7D67, ## __VA_ARGS__), \ - MACRO__(0x7DD1, ## __VA_ARGS__), \ MACRO__(0x7DD5, ## __VA_ARGS__) /* LNL */ -- cgit v1.2.3 From 2b55d6a42d14c8675e38d6d9adca3014fdf01951 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 20 Aug 2024 17:59:35 +0200 Subject: rcu/kvfree: Add kvfree_rcu_barrier() API Add a kvfree_rcu_barrier() function. It waits until all in-flight pointers are freed over RCU machinery. It does not wait any GP completion and it is within its right to return immediately if there are no outstanding pointers. This function is useful when there is a need to guarantee that a memory is fully freed before destroying memory caches. For example, during unloading a kernel module. Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Vlastimil Babka --- include/linux/rcutiny.h | 5 +++++ include/linux/rcutree.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index d9ac7b136aea..522123050ff8 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -111,6 +111,11 @@ static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr) kvfree(ptr); } +static inline void kvfree_rcu_barrier(void) +{ + rcu_barrier(); +} + #ifdef CONFIG_KASAN_GENERIC void kvfree_call_rcu(struct rcu_head *head, void *ptr); #else diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 254244202ea9..58e7db80f3a8 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -35,6 +35,7 @@ static inline void rcu_virt_note_context_switch(void) void synchronize_rcu_expedited(void); void kvfree_call_rcu(struct rcu_head *head, void *ptr); +void kvfree_rcu_barrier(void); void rcu_barrier(void); void rcu_momentary_dyntick_idle(void); -- cgit v1.2.3 From b3c34245756adada8a50bdaedbb3965b071c7b0a Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 9 Aug 2024 17:36:55 +0200 Subject: kasan: catch invalid free before SLUB reinitializes the object Currently, when KASAN is combined with init-on-free behavior, the initialization happens before KASAN's "invalid free" checks. More importantly, a subsequent commit will want to RCU-delay the actual SLUB freeing of an object, and we'd like KASAN to still validate synchronously that freeing the object is permitted. (Otherwise this change will make the existing testcase kmem_cache_invalid_free fail.) So add a new KASAN hook that allows KASAN to pre-validate a kmem_cache_free() operation before SLUB actually starts modifying the object or its metadata. Inside KASAN, this: - moves checks from poison_slab_object() into check_slab_allocation() - moves kasan_arch_is_ready() up into callers of poison_slab_object() - removes "ip" argument of poison_slab_object() and __kasan_slab_free() (since those functions no longer do any reporting) Acked-by: Vlastimil Babka #slub Reviewed-by: Andrey Konovalov Signed-off-by: Jann Horn Signed-off-by: Vlastimil Babka --- include/linux/kasan.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 70d6a8f6e25d..1570c7191176 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -175,13 +175,55 @@ static __always_inline void * __must_check kasan_init_slab_obj( return (void *)object; } -bool __kasan_slab_free(struct kmem_cache *s, void *object, - unsigned long ip, bool init); +bool __kasan_slab_pre_free(struct kmem_cache *s, void *object, + unsigned long ip); +/** + * kasan_slab_pre_free - Check whether freeing a slab object is safe. + * @object: Object to be freed. + * + * This function checks whether freeing the given object is safe. It may + * check for double-free and invalid-free bugs and report them. + * + * This function is intended only for use by the slab allocator. + * + * @Return true if freeing the object is unsafe; false otherwise. + */ +static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s, + void *object) +{ + if (kasan_enabled()) + return __kasan_slab_pre_free(s, object, _RET_IP_); + return false; +} + +bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init); +/** + * kasan_slab_free - Poison, initialize, and quarantine a slab object. + * @object: Object to be freed. + * @init: Whether to initialize the object. + * + * This function informs that a slab object has been freed and is not + * supposed to be accessed anymore, except for objects in + * SLAB_TYPESAFE_BY_RCU caches. + * + * For KASAN modes that have integrated memory initialization + * (kasan_has_integrated_init() == true), this function also initializes + * the object's memory. For other modes, the @init argument is ignored. + * + * This function might also take ownership of the object to quarantine it. + * When this happens, KASAN will defer freeing the object to a later + * stage and handle it internally until then. The return value indicates + * whether KASAN took ownership of the object. + * + * This function is intended only for use by the slab allocator. + * + * @Return true if KASAN took ownership of the object; false otherwise. + */ static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object, bool init) { if (kasan_enabled()) - return __kasan_slab_free(s, object, _RET_IP_, init); + return __kasan_slab_free(s, object, init); return false; } @@ -371,6 +413,12 @@ static inline void *kasan_init_slab_obj(struct kmem_cache *cache, { return (void *)object; } + +static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object) +{ + return false; +} + static inline bool kasan_slab_free(struct kmem_cache *s, void *object, bool init) { return false; -- cgit v1.2.3 From b8c8ba73c68bb3c3e9dad22f488b86c540c839f9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 9 Aug 2024 17:36:56 +0200 Subject: slub: Introduce CONFIG_SLUB_RCU_DEBUG Currently, KASAN is unable to catch use-after-free in SLAB_TYPESAFE_BY_RCU slabs because use-after-free is allowed within the RCU grace period by design. Add a SLUB debugging feature which RCU-delays every individual kmem_cache_free() before either actually freeing the object or handing it off to KASAN, and change KASAN to poison freed objects as normal when this option is enabled. For now I've configured Kconfig.debug to default-enable this feature in the KASAN GENERIC and SW_TAGS modes; I'm not enabling it by default in HW_TAGS mode because I'm not sure if it might have unwanted performance degradation effects there. Note that this is mostly useful with KASAN in the quarantine-based GENERIC mode; SLAB_TYPESAFE_BY_RCU slabs are basically always also slabs with a ->ctor, and KASAN's assign_tag() currently has to assign fixed tags for those, reducing the effectiveness of SW_TAGS/HW_TAGS mode. (A possible future extension of this work would be to also let SLUB call the ->ctor() on every allocation instead of only when the slab page is allocated; then tag-based modes would be able to assign new tags on every reallocation.) Tested-by: syzbot+263726e59eab6b442723@syzkaller.appspotmail.com Reviewed-by: Andrey Konovalov Acked-by: Marco Elver Acked-by: Vlastimil Babka #slab Signed-off-by: Jann Horn Signed-off-by: Vlastimil Babka --- include/linux/kasan.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 1570c7191176..00a3bf7c0d8f 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -196,15 +196,18 @@ static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s, return false; } -bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init); +bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init, + bool still_accessible); /** * kasan_slab_free - Poison, initialize, and quarantine a slab object. * @object: Object to be freed. * @init: Whether to initialize the object. + * @still_accessible: Whether the object contents are still accessible. * * This function informs that a slab object has been freed and is not - * supposed to be accessed anymore, except for objects in - * SLAB_TYPESAFE_BY_RCU caches. + * supposed to be accessed anymore, except when @still_accessible is set + * (indicating that the object is in a SLAB_TYPESAFE_BY_RCU cache and an RCU + * grace period might not have passed yet). * * For KASAN modes that have integrated memory initialization * (kasan_has_integrated_init() == true), this function also initializes @@ -220,10 +223,11 @@ bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init); * @Return true if KASAN took ownership of the object; false otherwise. */ static __always_inline bool kasan_slab_free(struct kmem_cache *s, - void *object, bool init) + void *object, bool init, + bool still_accessible) { if (kasan_enabled()) - return __kasan_slab_free(s, object, init); + return __kasan_slab_free(s, object, init, still_accessible); return false; } @@ -419,7 +423,8 @@ static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object) return false; } -static inline bool kasan_slab_free(struct kmem_cache *s, void *object, bool init) +static inline bool kasan_slab_free(struct kmem_cache *s, void *object, + bool init, bool still_accessible) { return false; } -- cgit v1.2.3 From d8ea645d6984c84a87032063a0941f15a323831f Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Sun, 18 Aug 2024 21:47:26 -0700 Subject: RDMA/bnxt_re: Handle variable WQE support for user applications User library calculates the number of slots required for user applications and it can pass that information to the driver. Driver can use this value and update the HW directly. This mechanism is currently used only for the newly introduced variable size WQEs. Extend the bnxt_re_qp_req structure to pass the Send Queue slot count. Reorganize the code to get the sq_slots before initializing the Send Queue attributes. Link: https://patch.msgid.link/r/1724042847-1481-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Hongguang Gao Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/bnxt_re-abi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index e61104f35d73..71140618700a 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -118,10 +118,16 @@ struct bnxt_re_resize_cq_req { __aligned_u64 cq_va; }; +enum bnxt_re_qp_mask { + BNXT_RE_QP_REQ_MASK_VAR_WQE_SQ_SLOTS = 0x1, +}; + struct bnxt_re_qp_req { __aligned_u64 qpsva; __aligned_u64 qprva; __aligned_u64 qp_handle; + __aligned_u64 comp_mask; + __u32 sq_slots; }; struct bnxt_re_qp_resp { -- cgit v1.2.3 From 10a104c0debbb19a1e45193d5670510216e339ff Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Sun, 18 Aug 2024 21:47:27 -0700 Subject: RDMA/bnxt_re: Enable variable size WQEs for user space applications Add backward compatibility code to enable variable size WQEs only if the user lib supports it. Link: https://patch.msgid.link/r/1724042847-1481-6-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Hongguang Gao Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/bnxt_re-abi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index 71140618700a..6821002931c8 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -66,6 +66,7 @@ enum bnxt_re_wqe_mode { enum { BNXT_RE_COMP_MASK_REQ_UCNTX_POW2_SUPPORT = 0x01, + BNXT_RE_COMP_MASK_REQ_UCNTX_VAR_WQE_SUPPORT = 0x02, }; struct bnxt_re_uctx_req { -- cgit v1.2.3 From a9b8f337ea4eb66e4980f90bb73e8786e56cacfd Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 12 Aug 2024 06:29:14 +0530 Subject: ACPI: scan: Add a weak arch_sort_irqchip_probe() to order the IRQCHIP probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unlike OF framework, the irqchip probe using IRQCHIP_ACPI_DECLARE has no order defined. Depending on the Makefile is not a good idea. So, usually it is worked around by mandating only root interrupt controller probed using IRQCHIP_ACPI_DECLARE and other interrupt controllers are probed via cascade mechanism. However, this is also not a clean solution because if there are multiple root controllers (ex: RINTC in RISC-V which is per CPU) which need to be probed first, then the cascade will happen for every root controller. So, introduce an architecture specific weak function arch_sort_irqchip_probe() to order the probing of the interrupt controllers which can be implemented by different architectures as per their interrupt controller hierarchy. Signed-off-by: Sunil V L Tested-by: Björn Töpel Link: https://patch.msgid.link/20240812005929.113499-3-sunilvl@ventanamicro.com Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0687a442fec7..3fff86f95c2f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1343,6 +1343,8 @@ struct acpi_probe_entry { kernel_ulong_t driver_data; }; +void arch_sort_irqchip_probe(struct acpi_probe_entry *ap_head, int nr); + #define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, \ valid, data, fn) \ static const struct acpi_probe_entry __acpi_probe_##name \ -- cgit v1.2.3 From f7d7ccf92f2b9398781f791b4af1a74a9f65b5c3 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 12 Aug 2024 06:29:15 +0530 Subject: ACPI: bus: Add acpi_riscv_init() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new function for RISC-V to do architecture specific initialization similar to acpi_arm_init(). Some of the ACPI tables are architecture specific and there is no reason trying to find them on other architectures. So, add acpi_riscv_init() similar to acpi_arm_init(). Signed-off-by: Sunil V L Tested-by: Björn Töpel Link: https://patch.msgid.link/20240812005929.113499-4-sunilvl@ventanamicro.com Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 3fff86f95c2f..892025d873f0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1531,6 +1531,12 @@ void acpi_arm_init(void); static inline void acpi_arm_init(void) { } #endif +#ifdef CONFIG_RISCV +void acpi_riscv_init(void); +#else +static inline void acpi_riscv_init(void) { } +#endif + #ifdef CONFIG_ACPI_PCC void acpi_init_pcc(void); #else -- cgit v1.2.3 From 76d749c58f4c6dd333cb8ea515373f7c8df96e78 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 12 Aug 2024 06:29:16 +0530 Subject: ACPI: scan: Refactor dependency creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some architectures like RISC-V will use implicit dependencies like GSI map to create dependencies between interrupt controller and devices. To support doing that, the function which creates the dependency, is refactored bit and made public so that dependency can be added from outside of scan.c as well. Signed-off-by: Sunil V L Tested-by: Björn Töpel Link: https://patch.msgid.link/20240812005929.113499-5-sunilvl@ventanamicro.com Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 8db5bd382915..d6a4dd58e36f 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -993,6 +993,7 @@ static inline void acpi_put_acpi_dev(struct acpi_device *adev) int acpi_wait_for_acpi_ipmi(void); +int acpi_scan_add_dep(acpi_handle handle, struct acpi_handle_list *dep_devices); #else /* CONFIG_ACPI */ static inline int register_acpi_bus_type(void *bus) { return 0; } -- cgit v1.2.3 From 8cf252737b418c311d1016b0a7956407a235bb8d Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 12 Aug 2024 06:29:18 +0530 Subject: ACPI: scan: Define weak function to populate dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some architectures like RISC-V need to add dependencies without explicit _DEP. Define a weak function which can be implemented by the architecture. Signed-off-by: Sunil V L Tested-by: Björn Töpel Link: https://patch.msgid.link/20240812005929.113499-7-sunilvl@ventanamicro.com Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index d6a4dd58e36f..af72a5d9de99 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -994,6 +994,7 @@ static inline void acpi_put_acpi_dev(struct acpi_device *adev) int acpi_wait_for_acpi_ipmi(void); int acpi_scan_add_dep(acpi_handle handle, struct acpi_handle_list *dep_devices); +u32 arch_acpi_add_auto_dep(acpi_handle handle); #else /* CONFIG_ACPI */ static inline int register_acpi_bus_type(void *bus) { return 0; } -- cgit v1.2.3 From 21734d29f84ad610dbafdca5f4c1bf7ecceeb2fb Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 12 Aug 2024 06:29:19 +0530 Subject: ACPI: bus: Add RINTC IRQ model for RISC-V MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the IRQ model for RISC-V INTC so that acpi_set_irq_model can use this for RISC-V. Signed-off-by: Sunil V L Tested-by: Björn Töpel Link: https://patch.msgid.link/20240812005929.113499-8-sunilvl@ventanamicro.com Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 892025d873f0..3a21f1cf126f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -107,6 +107,7 @@ enum acpi_irq_model_id { ACPI_IRQ_MODEL_PLATFORM, ACPI_IRQ_MODEL_GIC, ACPI_IRQ_MODEL_LPIC, + ACPI_IRQ_MODEL_RINTC, ACPI_IRQ_MODEL_COUNT }; -- cgit v1.2.3 From fbe826b1c10699a70b81a441fe47b817d1019f37 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 12 Aug 2024 06:29:27 +0530 Subject: irqchip/riscv-imsic: Add ACPI support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RISC-V IMSIC interrupt controller provides IPI and MSI support. Currently, DT based drivers setup the IPI feature early during boot but defer setting up the MSI functionality. However, in ACPI systems, PCI subsystem is probed early and assume MSI controller is already setup. Hence, both IPI and MSI features need to be initialized early itself. Signed-off-by: Sunil V L Reviewed-by: Anup Patel Tested-by: Björn Töpel Acked-by: Thomas Gleixner Link: https://patch.msgid.link/20240812005929.113499-16-sunilvl@ventanamicro.com Signed-off-by: Rafael J. Wysocki --- include/linux/irqchip/riscv-imsic.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/riscv-imsic.h b/include/linux/irqchip/riscv-imsic.h index faf0b800b1b0..7494952c5518 100644 --- a/include/linux/irqchip/riscv-imsic.h +++ b/include/linux/irqchip/riscv-imsic.h @@ -8,6 +8,8 @@ #include #include +#include +#include #include #define IMSIC_MMIO_PAGE_SHIFT 12 @@ -84,4 +86,11 @@ static inline const struct imsic_global_config *imsic_get_global_config(void) #endif +#ifdef CONFIG_ACPI +int imsic_platform_acpi_probe(struct fwnode_handle *fwnode); +struct fwnode_handle *imsic_acpi_get_fwnode(struct device *dev); +#else +static inline struct fwnode_handle *imsic_acpi_get_fwnode(struct device *dev) { return NULL; } +#endif + #endif -- cgit v1.2.3 From a707f85d47cad3978bef8fe90c7a79e3998e9d36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 3 Aug 2024 14:34:17 +0200 Subject: HID: bpf: constify parameter rdesc of call_hid_bpf_rdesc_fixup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parameter is never modified, so mark it as const. Also inline the return statement to avoid a type mismatch error. This is a prerequisite for constification changes in the HID core. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20240803-hid-const-fixup-v2-1-f53d7a7b29d8@weissschuh.net Signed-off-by: Benjamin Tissoires --- include/linux/hid_bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index d4d063cf63b5..6a47223e6460 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -212,7 +212,7 @@ int hid_bpf_connect_device(struct hid_device *hdev); void hid_bpf_disconnect_device(struct hid_device *hdev); void hid_bpf_destroy_device(struct hid_device *hid); int hid_bpf_device_init(struct hid_device *hid); -u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *size); +u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned int *size); #else /* CONFIG_HID_BPF */ static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 *size, int interrupt, -- cgit v1.2.3 From 6737769ca0b6bfc82df53db5fd69068902d608db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 3 Aug 2024 14:34:18 +0200 Subject: HID: constify parameter rdesc of hid_parse_report() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parameter is never modified, so mark it as const. This is a prerequisite for constification changes in the HID core. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20240803-hid-const-fixup-v2-2-f53d7a7b29d8@weissschuh.net Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 1533c9dcd3a6..e7a5d6f2f2eb 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -953,7 +953,7 @@ struct hid_device *hid_allocate_device(void); struct hid_report *hid_register_report(struct hid_device *device, enum hid_report_type type, unsigned int id, unsigned int application); -int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size); +int hid_parse_report(struct hid_device *hid, const __u8 *start, unsigned size); struct hid_report *hid_validate_values(struct hid_device *hid, enum hid_report_type type, unsigned int id, unsigned int field_index, -- cgit v1.2.3 From 24ddd0d7de7aaf50f537fd727f31616cb5a65a9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 3 Aug 2024 14:34:19 +0200 Subject: HID: constify hid_device::rdesc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once a report descriptor has been created by the HID core it is not supposed to be modified anymore. Enforce this invariant through the type system. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20240803-hid-const-fixup-v2-3-f53d7a7b29d8@weissschuh.net Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index e7a5d6f2f2eb..dda34d8cad19 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -602,7 +602,7 @@ struct hid_ll_driver; struct hid_device { /* device report descriptor */ __u8 *dev_rdesc; unsigned dev_rsize; - __u8 *rdesc; + const __u8 *rdesc; unsigned rsize; struct hid_collection *collection; /* List of HID collections */ unsigned collection_size; /* Number of allocated hid_collections */ -- cgit v1.2.3 From 80cfb508f3fe4c7c6a567fc4aa863c9a38709cd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 3 Aug 2024 14:34:20 +0200 Subject: HID: constify params and return value of fetch_item() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fetch_item() does not modify the descriptor it operates on. As a prerequisite for the constification of hid_driver::dev_rdesc, mark the parameters and return value of fetch_item() as const. Also adapt the variable types in the callers to match this constification. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20240803-hid-const-fixup-v2-4-f53d7a7b29d8@weissschuh.net Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index dda34d8cad19..502bbc6f078c 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -46,7 +46,7 @@ struct hid_item { __s16 s16; __u32 u32; __s32 s32; - __u8 *longdata; + const __u8 *longdata; } data; }; -- cgit v1.2.3 From 3593630c89d7d3963c42262694f8aa8b4727a0ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 3 Aug 2024 14:34:21 +0200 Subject: HID: constify hid_device::dev_rdesc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once a report descriptor has been created by the HID core it is not supposed to be modified anymore. Enforce this invariant through the type system. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20240803-hid-const-fixup-v2-5-f53d7a7b29d8@weissschuh.net Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 502bbc6f078c..c5fb43db0f2e 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -600,7 +600,7 @@ struct hid_driver; struct hid_ll_driver; struct hid_device { /* device report descriptor */ - __u8 *dev_rdesc; + const __u8 *dev_rdesc; unsigned dev_rsize; const __u8 *rdesc; unsigned rsize; -- cgit v1.2.3 From fe73965d078670406acee0218f118c0870d6a58b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 3 Aug 2024 14:34:22 +0200 Subject: HID: change return type of report_fixup() to const MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By allowing the drivers to return a "const *" they can constify their static report arrays. This makes it clear to driver authors that the HID core will not modify those reports and they can be reused for multiple devices. Furthermore security is slightly improved as those reports are protected against accidental or malicious modifications. [bentiss: fixup hid-cougar.c and hid-multitouch.c for latest version of the master branch] Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20240803-hid-const-fixup-v2-6-f53d7a7b29d8@weissschuh.net Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index c5fb43db0f2e..3a998fa3812d 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -822,7 +822,7 @@ struct hid_driver { struct hid_usage *usage, __s32 value); void (*report)(struct hid_device *hdev, struct hid_report *report); - __u8 *(*report_fixup)(struct hid_device *hdev, __u8 *buf, + const __u8 *(*report_fixup)(struct hid_device *hdev, __u8 *buf, unsigned int *size); int (*input_mapping)(struct hid_device *hdev, -- cgit v1.2.3 From 70c261d500951cf3ea0fcf32651aab9a65a91471 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 26 Aug 2024 15:03:23 +0200 Subject: netfilter: nf_tables_ipv6: consider network offset in netdev/egress validation From netdev/egress, skb->len can include the ethernet header, therefore, subtract network offset from skb->len when validating IPv6 packet length. Fixes: 42df6e1d221d ("netfilter: Introduce egress hook") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv6.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 467d59b9e533..a0633eeaec97 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -31,8 +31,8 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) struct ipv6hdr *ip6h, _ip6h; unsigned int thoff = 0; unsigned short frag_off; + u32 pkt_len, skb_len; int protohdr; - u32 pkt_len; ip6h = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb), sizeof(*ip6h), &_ip6h); @@ -43,7 +43,8 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) return -1; pkt_len = ntohs(ip6h->payload_len); - if (pkt_len + sizeof(*ip6h) > pkt->skb->len) + skb_len = pkt->skb->len - skb_network_offset(pkt->skb); + if (pkt_len + sizeof(*ip6h) > skb_len) return -1; protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); -- cgit v1.2.3 From ba7e053ec89f61be9d27bfb244de52849b5138aa Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Fri, 16 Aug 2024 10:19:09 +0200 Subject: power: supply: max77693: Expose input current limit and CC current properties There are two charger current limit registers: - Fast charge current limit (which controls current going from the charger to the battery); - CHGIN input current limit (which controls current going into the charger through the cable). Add the necessary functions to retrieve the CHGIN input limit (from CHARGER regulator) and maximum fast charge current values, and expose them as power supply properties. Tested-by: Henrik Grimler Signed-off-by: Artur Weber Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240816-max77693-charger-extcon-v4-3-050a0a9bfea0@gmail.com Signed-off-by: Sebastian Reichel --- include/linux/mfd/max77693-private.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index 54444ff2a5de..20c5e02ed9da 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -217,6 +217,10 @@ enum max77693_charger_battery_state { #define CHG_CNFG_01_CHGRSTRT_MASK (0x3 << CHG_CNFG_01_CHGRSTRT_SHIFT) #define CHG_CNFG_01_PQEN_MAKS BIT(CHG_CNFG_01_PQEN_SHIFT) +/* MAX77693_CHG_REG_CHG_CNFG_02 register */ +#define CHG_CNFG_02_CC_SHIFT 0 +#define CHG_CNFG_02_CC_MASK 0x3F + /* MAX77693_CHG_REG_CHG_CNFG_03 register */ #define CHG_CNFG_03_TOITH_SHIFT 0 #define CHG_CNFG_03_TOTIME_SHIFT 3 @@ -244,6 +248,7 @@ enum max77693_charger_battery_state { #define CHG_CNFG_12_VCHGINREG_MASK (0x3 << CHG_CNFG_12_VCHGINREG_SHIFT) /* MAX77693 CHG_CNFG_09 Register */ +#define CHG_CNFG_09_CHGIN_ILIM_SHIFT 0 #define CHG_CNFG_09_CHGIN_ILIM_MASK 0x7F /* MAX77693 CHG_CTRL Register */ -- cgit v1.2.3 From 2aeeef906d5a526dc60cf4af92eda69836c39b1f Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Fri, 23 Aug 2024 06:10:56 +0300 Subject: bonding: change ipsec_lock from spin lock to mutex In the cited commit, bond->ipsec_lock is added to protect ipsec_list, hence xdo_dev_state_add and xdo_dev_state_delete are called inside this lock. As ipsec_lock is a spin lock and such xfrmdev ops may sleep, "scheduling while atomic" will be triggered when changing bond's active slave. [ 101.055189] BUG: scheduling while atomic: bash/902/0x00000200 [ 101.055726] Modules linked in: [ 101.058211] CPU: 3 PID: 902 Comm: bash Not tainted 6.9.0-rc4+ #1 [ 101.058760] Hardware name: [ 101.059434] Call Trace: [ 101.059436] [ 101.060873] dump_stack_lvl+0x51/0x60 [ 101.061275] __schedule_bug+0x4e/0x60 [ 101.061682] __schedule+0x612/0x7c0 [ 101.062078] ? __mod_timer+0x25c/0x370 [ 101.062486] schedule+0x25/0xd0 [ 101.062845] schedule_timeout+0x77/0xf0 [ 101.063265] ? asm_common_interrupt+0x22/0x40 [ 101.063724] ? __bpf_trace_itimer_state+0x10/0x10 [ 101.064215] __wait_for_common+0x87/0x190 [ 101.064648] ? usleep_range_state+0x90/0x90 [ 101.065091] cmd_exec+0x437/0xb20 [mlx5_core] [ 101.065569] mlx5_cmd_do+0x1e/0x40 [mlx5_core] [ 101.066051] mlx5_cmd_exec+0x18/0x30 [mlx5_core] [ 101.066552] mlx5_crypto_create_dek_key+0xea/0x120 [mlx5_core] [ 101.067163] ? bonding_sysfs_store_option+0x4d/0x80 [bonding] [ 101.067738] ? kmalloc_trace+0x4d/0x350 [ 101.068156] mlx5_ipsec_create_sa_ctx+0x33/0x100 [mlx5_core] [ 101.068747] mlx5e_xfrm_add_state+0x47b/0xaa0 [mlx5_core] [ 101.069312] bond_change_active_slave+0x392/0x900 [bonding] [ 101.069868] bond_option_active_slave_set+0x1c2/0x240 [bonding] [ 101.070454] __bond_opt_set+0xa6/0x430 [bonding] [ 101.070935] __bond_opt_set_notify+0x2f/0x90 [bonding] [ 101.071453] bond_opt_tryset_rtnl+0x72/0xb0 [bonding] [ 101.071965] bonding_sysfs_store_option+0x4d/0x80 [bonding] [ 101.072567] kernfs_fop_write_iter+0x10c/0x1a0 [ 101.073033] vfs_write+0x2d8/0x400 [ 101.073416] ? alloc_fd+0x48/0x180 [ 101.073798] ksys_write+0x5f/0xe0 [ 101.074175] do_syscall_64+0x52/0x110 [ 101.074576] entry_SYSCALL_64_after_hwframe+0x4b/0x53 As bond_ipsec_add_sa_all and bond_ipsec_del_sa_all are only called from bond_change_active_slave, which requires holding the RTNL lock. And bond_ipsec_add_sa and bond_ipsec_del_sa are xfrm state xdo_dev_state_add and xdo_dev_state_delete APIs, which are in user context. So ipsec_lock doesn't have to be spin lock, change it to mutex, and thus the above issue can be resolved. Fixes: 9a5605505d9c ("bonding: Add struct bond_ipesc to manage SA") Signed-off-by: Jianbo Liu Signed-off-by: Tariq Toukan Reviewed-by: Hangbin Liu Acked-by: Jay Vosburgh Link: https://patch.msgid.link/20240823031056.110999-4-jianbol@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/bonding.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bonding.h b/include/net/bonding.h index b61fb1aa3a56..8bb5f016969f 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -260,7 +260,7 @@ struct bonding { #ifdef CONFIG_XFRM_OFFLOAD struct list_head ipsec_list; /* protecting ipsec_list */ - spinlock_t ipsec_lock; + struct mutex ipsec_lock; #endif /* CONFIG_XFRM_OFFLOAD */ struct bpf_prog *xdp_prog; }; -- cgit v1.2.3 From 3410d0e14f9a6856386c6a8eb2310cbc58191777 Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Mon, 26 Aug 2024 09:07:41 -0700 Subject: net: mana: Implement get_ringparam/set_ringparam for mana Currently the values of WQs for RX and TX queues for MANA devices are hardcoded to default sizes. Allow configuring these values for MANA devices as ringparam configuration(get/set) through ethtool_ops. Pre-allocate buffers at the beginning of this operation, to prevent complete network loss in low-memory conditions. Signed-off-by: Shradha Gupta Reviewed-by: Haiyang Zhang Reviewed-by: Saurabh Sengar Link: https://patch.msgid.link/1724688461-12203-1-git-send-email-shradhagupta@linux.microsoft.com Signed-off-by: Jakub Kicinski --- include/net/mana/mana.h | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 7caa334f4888..1f869624811d 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -38,9 +38,21 @@ enum TRI_STATE { #define COMP_ENTRY_SIZE 64 -#define RX_BUFFERS_PER_QUEUE 512 +/* This Max value for RX buffers is derived from __alloc_page()'s max page + * allocation calculation. It allows maximum 2^(MAX_ORDER -1) pages. RX buffer + * size beyond this value gets rejected by __alloc_page() call. + */ +#define MAX_RX_BUFFERS_PER_QUEUE 8192 +#define DEF_RX_BUFFERS_PER_QUEUE 512 +#define MIN_RX_BUFFERS_PER_QUEUE 128 -#define MAX_SEND_BUFFERS_PER_QUEUE 256 +/* This max value for TX buffers is derived as the maximum allocatable + * pages supported on host per guest through testing. TX buffer size beyond + * this value is rejected by the hardware. + */ +#define MAX_TX_BUFFERS_PER_QUEUE 16384 +#define DEF_TX_BUFFERS_PER_QUEUE 256 +#define MIN_TX_BUFFERS_PER_QUEUE 128 #define EQ_SIZE (8 * MANA_PAGE_SIZE) @@ -286,7 +298,7 @@ struct mana_recv_buf_oob { void *buf_va; bool from_pool; /* allocated from a page pool */ - /* SGL of the buffer going to be sent has part of the work request. */ + /* SGL of the buffer going to be sent as part of the work request. */ u32 num_sge; struct gdma_sge sgl[MAX_RX_WQE_SGL_ENTRIES]; @@ -438,6 +450,9 @@ struct mana_port_context { unsigned int max_queues; unsigned int num_queues; + unsigned int rx_queue_size; + unsigned int tx_queue_size; + mana_handle_t port_handle; mana_handle_t pf_filter_handle; @@ -473,6 +488,8 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); void mana_query_gf_stats(struct mana_port_context *apc); +int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu); +void mana_pre_dealloc_rxbufs(struct mana_port_context *apc); extern const struct ethtool_ops mana_ethtool_ops; -- cgit v1.2.3 From a96c5515d0d15df103598b2bc57245d66143b5dd Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Mon, 26 Aug 2024 21:43:08 +0000 Subject: net: dsa: microchip: Add KSZ8895/KSZ8864 switch support KSZ8895/KSZ8864 is a switch family between KSZ8863/73 and KSZ8795, so it shares some registers and functions in those switches already implemented in the KSZ DSA driver. Signed-off-by: Tristram Ha Tested-by: Pieter Van Trappen Reviewed-by: Oleksij Rempel Signed-off-by: Jakub Kicinski --- include/linux/platform_data/microchip-ksz.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h index 8c659db4da6b..d074019474f5 100644 --- a/include/linux/platform_data/microchip-ksz.h +++ b/include/linux/platform_data/microchip-ksz.h @@ -28,6 +28,8 @@ enum ksz_chip_id { KSZ8794_CHIP_ID = 0x8794, KSZ8765_CHIP_ID = 0x8765, KSZ8830_CHIP_ID = 0x8830, + KSZ8864_CHIP_ID = 0x8864, + KSZ8895_CHIP_ID = 0x8895, KSZ9477_CHIP_ID = 0x00947700, KSZ9896_CHIP_ID = 0x00989600, KSZ9897_CHIP_ID = 0x00989700, -- cgit v1.2.3 From 17163f23678c7599e40758d7b96f68e3f3f2ea15 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Aug 2024 15:38:23 +0200 Subject: xfrm: minor update to sdb and xfrm_policy comments The spd is no longer maintained as a linear list. We also haven't been caching bundles in the xfrm_policy struct since 2010. While at it, add kdoc style comments for the xfrm_policy structure and extend the description of the current rbtree based search to mention why it needs to search the candidate set. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1fa2da22a49e..b6bfdc6416c7 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -67,13 +67,15 @@ - instance of a transformer, struct xfrm_state (=SA) - template to clone xfrm_state, struct xfrm_tmpl - SPD is plain linear list of xfrm_policy rules, ordered by priority. + SPD is organized as hash table (for policies that meet minimum address prefix + length setting, net->xfrm.policy_hthresh). Other policies are stored in + lists, sorted into rbtree ordered by destination and source address networks. + See net/xfrm/xfrm_policy.c for details. + (To be compatible with existing pfkeyv2 implementations, many rules with priority of 0x7fffffff are allowed to exist and such rules are ordered in an unpredictable way, thanks to bsd folks.) - Lookup is plain linear search until the first match with selector. - If "action" is "block", then we prohibit the flow, otherwise: if "xfrms_nr" is zero, the flow passes untransformed. Otherwise, policy entry has list of up to XFRM_MAX_DEPTH transformations, @@ -86,8 +88,6 @@ |---. child .-> dst -. xfrm .-> xfrm_state #3 |---. child .-> NULL - Bundles are cached at xrfm_policy struct (field ->bundles). - Resolution of xrfm_tmpl ----------------------- @@ -526,6 +526,36 @@ struct xfrm_policy_queue { unsigned long timeout; }; +/** + * struct xfrm_policy - xfrm policy + * @xp_net: network namespace the policy lives in + * @bydst: hlist node for SPD hash table or rbtree list + * @byidx: hlist node for index hash table + * @lock: serialize changes to policy structure members + * @refcnt: reference count, freed once it reaches 0 + * @pos: kernel internal tie-breaker to determine age of policy + * @timer: timer + * @genid: generation, used to invalidate old policies + * @priority: priority, set by userspace + * @index: policy index (autogenerated) + * @if_id: virtual xfrm interface id + * @mark: packet mark + * @selector: selector + * @lft: liftime configuration data + * @curlft: liftime state + * @walk: list head on pernet policy list + * @polq: queue to hold packets while aqcuire operaion in progress + * @bydst_reinsert: policy tree node needs to be merged + * @type: XFRM_POLICY_TYPE_MAIN or _SUB + * @action: XFRM_POLICY_ALLOW or _BLOCK + * @flags: XFRM_POLICY_LOCALOK, XFRM_POLICY_ICMP + * @xfrm_nr: number of used templates in @xfrm_vec + * @family: protocol family + * @security: SELinux security label + * @xfrm_vec: array of templates to resolve state + * @rcu: rcu head, used to defer memory release + * @xdo: hardware offload state + */ struct xfrm_policy { possible_net_t xp_net; struct hlist_node bydst; -- cgit v1.2.3 From a547a5880cba6f287179135381f1b484b251be31 Mon Sep 17 00:00:00 2001 From: Peter Newman Date: Thu, 22 Aug 2024 12:02:11 -0700 Subject: x86/resctrl: Fix arch_mbm_* array overrun on SNC When using resctrl on systems with Sub-NUMA Clustering enabled, monitoring groups may be allocated RMID values which would overrun the arch_mbm_{local,total} arrays. This is due to inconsistencies in whether the SNC-adjusted num_rmid value or the unadjusted value in resctrl_arch_system_num_rmid_idx() is used. The num_rmid value for the L3 resource is currently: resctrl_arch_system_num_rmid_idx() / snc_nodes_per_l3_cache As a simple fix, make resctrl_arch_system_num_rmid_idx() return the SNC-adjusted, L3 num_rmid value on x86. Fixes: e13db55b5a0d ("x86/resctrl: Introduce snc_nodes_per_l3_cache") Signed-off-by: Peter Newman Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/r/20240822190212.1848788-1-peternewman@google.com --- include/linux/resctrl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index b0875b99e811..d94abba1c716 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -248,6 +248,7 @@ struct resctrl_schema { /* The number of closid supported by this resource regardless of CDP */ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); +u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); /* -- cgit v1.2.3 From 1934b212615dc617ac84fc306333ab2b9fc3b04f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 9 Aug 2024 18:00:01 +0200 Subject: file: reclaim 24 bytes from f_owner We do embedd struct fown_struct into struct file letting it take up 32 bytes in total. We could tweak struct fown_struct to be more compact but really it shouldn't even be embedded in struct file in the first place. Instead, actual users of struct fown_struct should allocate the struct on demand. This frees up 24 bytes in struct file. That will have some potentially user-visible changes for the ownership fcntl()s. Some of them can now fail due to allocation failures. Practically, that probably will almost never happen as the allocations are small and they only happen once per file. The fown_struct is used during kill_fasync() which is used by e.g., pipes to generate a SIGIO signal. Sending of such signals is conditional on userspace having set an owner for the file using one of the F_OWNER fcntl()s. Such users will be unaffected if struct fown_struct is allocated during the fcntl() call. There are a few subsystems that call __f_setown() expecting file->f_owner to be allocated: (1) tun devices file->f_op->fasync::tun_chr_fasync() -> __f_setown() There are no callers of tun_chr_fasync(). (2) tty devices file->f_op->fasync::tty_fasync() -> __tty_fasync() -> __f_setown() tty_fasync() has no additional callers but __tty_fasync() has. Note that __tty_fasync() only calls __f_setown() if the @on argument is true. It's called from: file->f_op->release::tty_release() -> tty_release() -> __tty_fasync() -> __f_setown() tty_release() calls __tty_fasync() with @on false => __f_setown() is never called from tty_release(). => All callers of tty_release() are safe as well. file->f_op->release::tty_open() -> tty_release() -> __tty_fasync() -> __f_setown() __tty_hangup() calls __tty_fasync() with @on false => __f_setown() is never called from tty_release(). => All callers of __tty_hangup() are safe as well. From the callchains it's obvious that (1) and (2) end up getting called via file->f_op->fasync(). That can happen either through the F_SETFL fcntl() with the FASYNC flag raised or via the FIOASYNC ioctl(). If FASYNC is requested and the file isn't already FASYNC then file->f_op->fasync() is called with @on true which ends up causing both (1) and (2) to call __f_setown(). (1) and (2) are the only subsystems that call __f_setown() from the file->f_op->fasync() handler. So both (1) and (2) have been updated to allocate a struct fown_struct prior to calling fasync_helper() to register with the fasync infrastructure. That's safe as they both call fasync_helper() which also does allocations if @on is true. The other interesting case are file leases: (3) file leases lease_manager_ops->lm_setup::lease_setup() -> __f_setown() Which in turn is called from: generic_add_lease() -> lease_manager_ops->lm_setup::lease_setup() -> __f_setown() So here again we can simply make generic_add_lease() allocate struct fown_struct prior to the lease_manager_ops->lm_setup::lease_setup() which happens under a spinlock. With that the two remaining subsystems that call __f_setown() are: (4) dnotify (5) sockets Both have their own custom ioctls to set struct fown_struct and both have been converted to allocate a struct fown_struct on demand from their respective ioctls. Interactions with O_PATH are fine as well e.g., when opening a /dev/tty as O_PATH then no file->f_op->open() happens thus no file->f_owner is allocated. That's fine as no file operation will be set for those and the device has never been opened. fcntl()s called on such things will just allocate a ->f_owner on demand. Although I have zero idea why'd you care about f_owner on an O_PATH fd. Link: https://lore.kernel.org/r/20240813-work-f_owner-v2-1-4e9343a79f9f@kernel.org Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/fs.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index fb0426f349fc..7af239ca87e2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -947,6 +947,7 @@ static inline unsigned imajor(const struct inode *inode) } struct fown_struct { + struct file *file; /* backpointer for security modules */ rwlock_t lock; /* protects pid, uid, euid fields */ struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ @@ -1011,7 +1012,7 @@ struct file { struct mutex f_pos_lock; loff_t f_pos; unsigned int f_flags; - struct fown_struct f_owner; + struct fown_struct *f_owner; const struct cred *f_cred; struct file_ra_state f_ra; struct path f_path; @@ -1076,6 +1077,12 @@ struct file_lease; #define OFFT_OFFSET_MAX type_max(off_t) #endif +int file_f_owner_allocate(struct file *file); +static inline struct fown_struct *file_f_owner(const struct file *file) +{ + return READ_ONCE(file->f_owner); +} + extern void send_sigio(struct fown_struct *fown, int fd, int band); static inline struct inode *file_inode(const struct file *f) @@ -1124,7 +1131,7 @@ extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force extern int f_setown(struct file *filp, int who, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); -extern int send_sigurg(struct fown_struct *fown); +extern int send_sigurg(struct file *file); /* * sb->s_flags. Note that these mirror the equivalent MS_* flags where -- cgit v1.2.3 From a55d1cbd1720679cfe9837bce250e397ec513989 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 22 Aug 2024 16:14:46 +0200 Subject: fs: switch f_iocb_flags and f_ra Now that we shrank struct file by 24 bytes we still have a 4 byte hole. If we move struct file_ra_state into the union and f_iocb_flags out of the union we close that whole and bring down struct file to 192 bytes. Which means struct file is 3 cachelines and we managed to shrink it by 40 bytes this cycle. I've tried to audit all codepaths that use f_ra and none of them seem to rely on it in file->f_op->release() and never have since commit 1da177e4c3f4 ("Linux-2.6.12-rc2"). Link: https://lore.kernel.org/r/20240823-luftdicht-berappen-d69a2166a0db@brauner Reviewed-by: Jeff Layton Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7af239ca87e2..095a956aeb29 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -999,9 +999,9 @@ struct file { struct callback_head f_task_work; /* fput() must use workqueue (most kernel threads). */ struct llist_node f_llist; - unsigned int f_iocb_flags; + /* Invalid after last fput(). */ + struct file_ra_state f_ra; }; - /* * Protects f_ep, f_flags. * Must not be taken from IRQ context. @@ -1012,9 +1012,9 @@ struct file { struct mutex f_pos_lock; loff_t f_pos; unsigned int f_flags; + unsigned int f_iocb_flags; struct fown_struct *f_owner; const struct cred *f_cred; - struct file_ra_state f_ra; struct path f_path; struct inode *f_inode; /* cached value */ const struct file_operations *f_op; -- cgit v1.2.3 From 65dc80a78c5f42f5648396b218f50374cf1c2770 Mon Sep 17 00:00:00 2001 From: Brent Lu Date: Tue, 27 Aug 2024 20:32:06 +0800 Subject: ASoC: SOF: Intel: hda: support BT link mask in mach_params Add an new variable bt_link_mask to snd_soc_acpi_mach_params structure. SSP port mask of BT offload found in NHLT table will be sent to machine driver to setup BE dai link with correct SSP port number. This patch only detects and enables the BT dailink. The functionality will only be unlocked with a topology file that makes a reference to that BT dailink. For backwards-compatibility reasons, this topology will not be used by default. Chromebooks and Linux users willing to experiment shall use the tplg_name kernel parameter to force the use of an enhanced topology. Signed-off-by: Brent Lu Reviewed-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://patch.msgid.link/20240827123215.258859-9-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-acpi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index b6d301946244..c1552bc6e31c 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -73,6 +73,7 @@ static inline struct snd_soc_acpi_mach *snd_soc_acpi_codec_list(void *arg) * @subsystem_rev: optional PCI SSID revision value * @subsystem_id_set: true if a value has been written to * subsystem_vendor and subsystem_device. + * @bt_link_mask: BT offload link enabled on the board */ struct snd_soc_acpi_mach_params { u32 acpi_ipc_irq_index; @@ -89,6 +90,7 @@ struct snd_soc_acpi_mach_params { unsigned short subsystem_device; unsigned short subsystem_rev; bool subsystem_id_set; + u32 bt_link_mask; }; /** -- cgit v1.2.3 From 290f31e943a29c93532b1684652b04fd60d0f696 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 26 Aug 2024 23:41:57 +0000 Subject: ASoC: soc-pcm: makes snd_soc_dpcm_can_be_xxx() local function No driver is calling snd_soc_dpcm_can_be_xxx() functions. We don't need to have EXPORT_SYMBOL_GPL() for them. Let's makes it static function. One note is that snd_soc_dpcm_fe_can_update() is not used in upstream. Use #if-endif and keep it for future support. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87h6b6df7e.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dpcm.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h index 773f2db8c31c..c6fb350b4b06 100644 --- a/include/sound/soc-dpcm.h +++ b/include/sound/soc-dpcm.h @@ -113,24 +113,6 @@ struct snd_soc_dpcm_runtime { #define for_each_dpcm_be_rollback(fe, stream, _dpcm) \ list_for_each_entry_continue_reverse(_dpcm, &(fe)->dpcm[stream].be_clients, list_be) -/* can this BE stop and free */ -int snd_soc_dpcm_can_be_free_stop(struct snd_soc_pcm_runtime *fe, - struct snd_soc_pcm_runtime *be, int stream); - -/* can this BE perform a hw_params() */ -int snd_soc_dpcm_can_be_params(struct snd_soc_pcm_runtime *fe, - struct snd_soc_pcm_runtime *be, int stream); - -/* can this BE perform prepare */ -int snd_soc_dpcm_can_be_prepared(struct snd_soc_pcm_runtime *fe, - struct snd_soc_pcm_runtime *be, int stream); - -/* is the current PCM operation for this FE ? */ -int snd_soc_dpcm_fe_can_update(struct snd_soc_pcm_runtime *fe, int stream); - -/* is the current PCM operation for this BE ? */ -int snd_soc_dpcm_be_can_update(struct snd_soc_pcm_runtime *fe, - struct snd_soc_pcm_runtime *be, int stream); /* get the substream for this BE */ struct snd_pcm_substream * -- cgit v1.2.3 From 1da29f2c39b67b846b74205c81bf0ccd96d34727 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Aug 2024 23:06:49 +0100 Subject: netfs, cifs: Fix handling of short DIO read Short DIO reads, particularly in relation to cifs, are not being handled correctly by cifs and netfslib. This can be tested by doing a DIO read of a file where the size of read is larger than the size of the file. When it crosses the EOF, it gets a short read and this gets retried, and in the case of cifs, the retry read fails, with the failure being translated to ENODATA. Fix this by the following means: (1) Add a flag, NETFS_SREQ_HIT_EOF, for the filesystem to set when it detects that the read did hit the EOF. (2) Make the netfslib read assessment stop processing subrequests when it encounters one with that flag set. (3) Return rreq->transferred, the accumulated contiguous amount read to that point, to userspace for a DIO read. (4) Make cifs set the flag and clear the error if the read RPC returned ENODATA. (5) Make cifs set the flag and clear the error if a short read occurred without error and the read-to file position is now at the remote inode size. Fixes: 69c3c023af25 ("cifs: Implement netfslib hooks") Signed-off-by: David Howells cc: Steve French cc: Paulo Alcantara cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- include/linux/netfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 983816608f15..c47443e7a97e 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -198,6 +198,7 @@ struct netfs_io_subrequest { #define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */ #define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */ #define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */ +#define NETFS_SREQ_HIT_EOF 12 /* Set if we hit the EOF */ }; enum netfs_io_origin { -- cgit v1.2.3 From 789e51597d33ec0053b029127d797d86c0d857eb Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 28 Aug 2024 10:36:35 +0200 Subject: Revert "drm/ttm: Add a flag to allow drivers to skip clear-on-free" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove TTM_TT_FLAG_CLEARED_ON_FREE now that XE stopped using this flag. This reverts commit decbfaf06db05fa1f9b33149ebb3c145b44e878f. Cc: Christian König Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi Cc: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Signed-off-by: Nirmoy Das Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240828083635.23601-2-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi --- include/drm/ttm/ttm_tt.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index cfaf49de2419..2b9d856ff388 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -85,9 +85,6 @@ struct ttm_tt { * fault handling abuses the DMA api a bit and dma_map_attrs can't be * used to assure pgprot always matches. * - * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver handles - * clearing backing store - * * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is * set by TTM after ttm_tt_populate() has successfully returned, and is * then unset when TTM calls ttm_tt_unpopulate(). @@ -97,9 +94,8 @@ struct ttm_tt { #define TTM_TT_FLAG_EXTERNAL BIT(2) #define TTM_TT_FLAG_EXTERNAL_MAPPABLE BIT(3) #define TTM_TT_FLAG_DECRYPTED BIT(4) -#define TTM_TT_FLAG_CLEARED_ON_FREE BIT(5) -#define TTM_TT_FLAG_PRIV_POPULATED BIT(6) +#define TTM_TT_FLAG_PRIV_POPULATED BIT(5) uint32_t page_flags; /** @num_pages: Number of pages in the page array. */ uint32_t num_pages; -- cgit v1.2.3 From 947697c6f0f75f9866f2f891a102dece7a09a064 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 22 Aug 2024 10:18:52 +0530 Subject: uapi: Define GENMASK_U128 This adds GENMASK_U128() and __GENMASK_U128() macros using __BITS_PER_U128 and __int128 data types. These macros will be used in providing support for generating 128 bit masks. The macros wouldn't work in all assembler flavors for reasons described in the comments on top of declarations. Enforce it for more by adding !__ASSEMBLY__ guard. Cc: Yury Norov Cc: Rasmus Villemoes Cc: Arnd Bergmann > Cc: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org Reviewed-by: Arnd Bergmann Signed-off-by: Anshuman Khandual Signed-off-by: Yury Norov --- include/linux/bits.h | 15 +++++++++++++++ include/uapi/linux/bits.h | 3 +++ include/uapi/linux/const.h | 17 +++++++++++++++++ 3 files changed, 35 insertions(+) (limited to 'include') diff --git a/include/linux/bits.h b/include/linux/bits.h index 0eb24d21aac2..60044b608817 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -36,4 +36,19 @@ #define GENMASK_ULL(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) +#if !defined(__ASSEMBLY__) +/* + * Missing asm support + * + * __GENMASK_U128() depends on _BIT128() which would not work + * in the asm code, as it shifts an 'unsigned __init128' data + * type instead of direct representation of 128 bit constants + * such as long and unsigned long. The fundamental problem is + * that a 128 bit constant will get silently truncated by the + * gcc compiler. + */ +#define GENMASK_U128(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) +#endif + #endif /* __LINUX_BITS_H */ diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index 3c2a101986a3..5ee30f882736 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -12,4 +12,7 @@ (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_U128(h, l) \ + ((_BIT128((h)) << 1) - (_BIT128(l))) + #endif /* _UAPI_LINUX_BITS_H */ diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h index a429381e7ca5..e16be0d37746 100644 --- a/include/uapi/linux/const.h +++ b/include/uapi/linux/const.h @@ -28,6 +28,23 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +#if !defined(__ASSEMBLY__) +/* + * Missing asm support + * + * __BIT128() would not work in the asm code, as it shifts an + * 'unsigned __init128' data type as direct representation of + * 128 bit constants is not supported in the gcc compiler, as + * they get silently truncated. + * + * TODO: Please revisit this implementation when gcc compiler + * starts representing 128 bit constants directly like long + * and unsigned long etc. Subsequently drop the comment for + * GENMASK_U128() which would then start supporting asm code. + */ +#define _BIT128(x) ((unsigned __int128)(1) << (x)) +#endif + #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) -- cgit v1.2.3 From f951171044762a0d179caf9f1addcdbc7cb533bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Aug 2024 08:50:46 +0200 Subject: ext4: remove tracing for FALLOC_FL_NO_HIDE_STALE FALLOC_FL_NO_HIDE_STALE can't make it past vfs_fallocate (and if the flag does what the name implies that's a good thing as it would be highly dangerous). Remove the dead tracing code for it. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240827065123.1762168-3-hch@lst.de Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/trace/events/ext4.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index cc5e9b7b2b44..156908641e68 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -91,7 +91,6 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); #define show_falloc_mode(mode) __print_flags(mode, "|", \ { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ - { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \ { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) -- cgit v1.2.3 From 57413d8e172c10c90fbd91f98d0f7d8eb27e824c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Aug 2024 08:50:47 +0200 Subject: fs: sort out the fallocate mode vs flag mess The fallocate system call takes a mode argument, but that argument contains a wild mix of exclusive modes and an optional flags. Replace FALLOC_FL_SUPPORTED_MASK with FALLOC_FL_MODE_MASK, which excludes the optional flag bit, so that we can use switch statement on the value to easily enumerate the cases while getting the check for duplicate modes for free. To make this (and in the future the file system implementations) more readable also add a symbolic name for the 0 mode used to allocate blocks. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240827065123.1762168-4-hch@lst.de Reviewed-by: Darrick J. Wong Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/falloc.h | 18 ++++++++++++------ include/uapi/linux/falloc.h | 1 + 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/falloc.h b/include/linux/falloc.h index f3f0b97b1675..3f49f3df6af5 100644 --- a/include/linux/falloc.h +++ b/include/linux/falloc.h @@ -25,12 +25,18 @@ struct space_resv { #define FS_IOC_UNRESVSP64 _IOW('X', 43, struct space_resv) #define FS_IOC_ZERO_RANGE _IOW('X', 57, struct space_resv) -#define FALLOC_FL_SUPPORTED_MASK (FALLOC_FL_KEEP_SIZE | \ - FALLOC_FL_PUNCH_HOLE | \ - FALLOC_FL_COLLAPSE_RANGE | \ - FALLOC_FL_ZERO_RANGE | \ - FALLOC_FL_INSERT_RANGE | \ - FALLOC_FL_UNSHARE_RANGE) +/* + * Mask of all supported fallocate modes. Only one can be set at a time. + * + * In addition to the mode bit, the mode argument can also encode flags. + * FALLOC_FL_KEEP_SIZE is the only supported flag so far. + */ +#define FALLOC_FL_MODE_MASK (FALLOC_FL_ALLOCATE_RANGE | \ + FALLOC_FL_PUNCH_HOLE | \ + FALLOC_FL_COLLAPSE_RANGE | \ + FALLOC_FL_ZERO_RANGE | \ + FALLOC_FL_INSERT_RANGE | \ + FALLOC_FL_UNSHARE_RANGE) /* on ia32 l_start is on a 32-bit boundary */ #if defined(CONFIG_X86_64) diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h index 51398fa57f6c..5810371ed72b 100644 --- a/include/uapi/linux/falloc.h +++ b/include/uapi/linux/falloc.h @@ -2,6 +2,7 @@ #ifndef _UAPI_FALLOC_H_ #define _UAPI_FALLOC_H_ +#define FALLOC_FL_ALLOCATE_RANGE 0x00 /* allocate range */ #define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */ #define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */ #define FALLOC_FL_NO_HIDE_STALE 0x04 /* reserved codepoint */ -- cgit v1.2.3 From 654beb75ca95240c00c78be0bbc2ea66a125a997 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 21 Aug 2024 18:46:33 +0200 Subject: dma: ipu: Remove include/linux/dma/ipu-dma.h When this file was renamed in commit b8a6d9980f75 ("dma: ipu: rename mach/ipu.h to include/linux/dma/ipu-dma.h"), 4 .c files have been modified accordingly: drivers/dma/ipu/ipu_idmac.c drivers/dma/ipu/ipu_irq.c --> removed in commit f1de55ff7c70 ("dmaengine: ipu: Remove the driver") in 2023-08 drivers/media/platform/soc_camera/mx3_camera.c --> removed in commit c93cc61475eb ("[media] staging/media: remove deprecated mx3 driver") in 2016-06 drivers/video/mx3fb.c --> removed in commit bfac19e239a7 ("fbdev: mx3fb: Remove the driver") in 2023-08 Now include/linux/dma/ipu-dma.h is unused and can be removed as-well. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/532e7e2816ccf226f3ab1fa76ec7873bc09299d0.1724258714.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- include/linux/dma/ipu-dma.h | 174 -------------------------------------------- 1 file changed, 174 deletions(-) delete mode 100644 include/linux/dma/ipu-dma.h (limited to 'include') diff --git a/include/linux/dma/ipu-dma.h b/include/linux/dma/ipu-dma.h deleted file mode 100644 index 6969391580d2..000000000000 --- a/include/linux/dma/ipu-dma.h +++ /dev/null @@ -1,174 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2008 - * Guennadi Liakhovetski, DENX Software Engineering, - * - * Copyright (C) 2005-2007 Freescale Semiconductor, Inc. - */ - -#ifndef __LINUX_DMA_IPU_DMA_H -#define __LINUX_DMA_IPU_DMA_H - -#include -#include - -/* IPU DMA Controller channel definitions. */ -enum ipu_channel { - IDMAC_IC_0 = 0, /* IC (encoding task) to memory */ - IDMAC_IC_1 = 1, /* IC (viewfinder task) to memory */ - IDMAC_ADC_0 = 1, - IDMAC_IC_2 = 2, - IDMAC_ADC_1 = 2, - IDMAC_IC_3 = 3, - IDMAC_IC_4 = 4, - IDMAC_IC_5 = 5, - IDMAC_IC_6 = 6, - IDMAC_IC_7 = 7, /* IC (sensor data) to memory */ - IDMAC_IC_8 = 8, - IDMAC_IC_9 = 9, - IDMAC_IC_10 = 10, - IDMAC_IC_11 = 11, - IDMAC_IC_12 = 12, - IDMAC_IC_13 = 13, - IDMAC_SDC_0 = 14, /* Background synchronous display data */ - IDMAC_SDC_1 = 15, /* Foreground data (overlay) */ - IDMAC_SDC_2 = 16, - IDMAC_SDC_3 = 17, - IDMAC_ADC_2 = 18, - IDMAC_ADC_3 = 19, - IDMAC_ADC_4 = 20, - IDMAC_ADC_5 = 21, - IDMAC_ADC_6 = 22, - IDMAC_ADC_7 = 23, - IDMAC_PF_0 = 24, - IDMAC_PF_1 = 25, - IDMAC_PF_2 = 26, - IDMAC_PF_3 = 27, - IDMAC_PF_4 = 28, - IDMAC_PF_5 = 29, - IDMAC_PF_6 = 30, - IDMAC_PF_7 = 31, -}; - -/* Order significant! */ -enum ipu_channel_status { - IPU_CHANNEL_FREE, - IPU_CHANNEL_INITIALIZED, - IPU_CHANNEL_READY, - IPU_CHANNEL_ENABLED, -}; - -#define IPU_CHANNELS_NUM 32 - -enum pixel_fmt { - /* 1 byte */ - IPU_PIX_FMT_GENERIC, - IPU_PIX_FMT_RGB332, - IPU_PIX_FMT_YUV420P, - IPU_PIX_FMT_YUV422P, - IPU_PIX_FMT_YUV420P2, - IPU_PIX_FMT_YVU422P, - /* 2 bytes */ - IPU_PIX_FMT_RGB565, - IPU_PIX_FMT_RGB666, - IPU_PIX_FMT_BGR666, - IPU_PIX_FMT_YUYV, - IPU_PIX_FMT_UYVY, - /* 3 bytes */ - IPU_PIX_FMT_RGB24, - IPU_PIX_FMT_BGR24, - /* 4 bytes */ - IPU_PIX_FMT_GENERIC_32, - IPU_PIX_FMT_RGB32, - IPU_PIX_FMT_BGR32, - IPU_PIX_FMT_ABGR32, - IPU_PIX_FMT_BGRA32, - IPU_PIX_FMT_RGBA32, -}; - -enum ipu_color_space { - IPU_COLORSPACE_RGB, - IPU_COLORSPACE_YCBCR, - IPU_COLORSPACE_YUV -}; - -/* - * Enumeration of IPU rotation modes - */ -enum ipu_rotate_mode { - /* Note the enum values correspond to BAM value */ - IPU_ROTATE_NONE = 0, - IPU_ROTATE_VERT_FLIP = 1, - IPU_ROTATE_HORIZ_FLIP = 2, - IPU_ROTATE_180 = 3, - IPU_ROTATE_90_RIGHT = 4, - IPU_ROTATE_90_RIGHT_VFLIP = 5, - IPU_ROTATE_90_RIGHT_HFLIP = 6, - IPU_ROTATE_90_LEFT = 7, -}; - -/* - * Enumeration of DI ports for ADC. - */ -enum display_port { - DISP0, - DISP1, - DISP2, - DISP3 -}; - -struct idmac_video_param { - unsigned short in_width; - unsigned short in_height; - uint32_t in_pixel_fmt; - unsigned short out_width; - unsigned short out_height; - uint32_t out_pixel_fmt; - unsigned short out_stride; - bool graphics_combine_en; - bool global_alpha_en; - bool key_color_en; - enum display_port disp; - unsigned short out_left; - unsigned short out_top; -}; - -/* - * Union of initialization parameters for a logical channel. So far only video - * parameters are used. - */ -union ipu_channel_param { - struct idmac_video_param video; -}; - -struct idmac_tx_desc { - struct dma_async_tx_descriptor txd; - struct scatterlist *sg; /* scatterlist for this */ - unsigned int sg_len; /* tx-descriptor. */ - struct list_head list; -}; - -struct idmac_channel { - struct dma_chan dma_chan; - dma_cookie_t completed; /* last completed cookie */ - union ipu_channel_param params; - enum ipu_channel link; /* input channel, linked to the output */ - enum ipu_channel_status status; - void *client; /* Only one client per channel */ - unsigned int n_tx_desc; - struct idmac_tx_desc *desc; /* allocated tx-descriptors */ - struct scatterlist *sg[2]; /* scatterlist elements in buffer-0 and -1 */ - struct list_head free_list; /* free tx-descriptors */ - struct list_head queue; /* queued tx-descriptors */ - spinlock_t lock; /* protects sg[0,1], queue */ - struct mutex chan_mutex; /* protects status, cookie, free_list */ - bool sec_chan_en; - int active_buffer; - unsigned int eof_irq; - char eof_name[16]; /* EOF IRQ name for request_irq() */ -}; - -#define to_tx_desc(tx) container_of(tx, struct idmac_tx_desc, txd) -#define to_idmac_chan(c) container_of(c, struct idmac_channel, dma_chan) - -#endif /* __LINUX_DMA_IPU_DMA_H */ -- cgit v1.2.3 From 73d5fc92a11cacb73a1aac0b5793c47e48c5b537 Mon Sep 17 00:00:00 2001 From: Nishad Saraf Date: Mon, 19 Aug 2024 14:19:48 -0700 Subject: dmaengine: amd: qdma: Add AMD QDMA driver Adds driver to enable PCIe board which uses AMD QDMA (the Queue-based Direct Memory Access) subsystem. For example, Xilinx Alveo V70 AI Accelerator devices. https://www.xilinx.com/applications/data-center/v70.html The QDMA subsystem is used in conjunction with the PCI Express IP block to provide high performance data transfer between host memory and the card's DMA subsystem. +-------+ +-------+ +-----------+ PCIe | | | | | | Tx/Rx | | | | AXI | | <=======> | PCIE | <===> | QDMA | <====>| User Logic| | | | | | | +-------+ +-------+ +-----------+ The primary mechanism to transfer data using the QDMA is for the QDMA engine to operate on instructions (descriptors) provided by the host operating system. Using the descriptors, the QDMA can move data in both the Host to Card (H2C) direction, or the Card to Host (C2H) direction. The QDMA provides a per-queue basis option whether DMA traffic goes to an AXI4 memory map (MM) interface or to an AXI4-Stream interface. The hardware detail is provided by https://docs.xilinx.com/r/en-US/pg302-qdma Implements dmaengine APIs to support MM DMA transfers. - probe the available DMA channels - use dma_slave_map for channel lookup - use virtual channel to manage dmaengine tx descriptors - implement device_prep_slave_sg callback to handle host scatter gather list Signed-off-by: Nishad Saraf Signed-off-by: Lizhi Hou Link: https://lore.kernel.org/r/20240819211948.688786-2-lizhi.hou@amd.com Signed-off-by: Vinod Koul --- include/linux/platform_data/amd_qdma.h | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 include/linux/platform_data/amd_qdma.h (limited to 'include') diff --git a/include/linux/platform_data/amd_qdma.h b/include/linux/platform_data/amd_qdma.h new file mode 100644 index 000000000000..576d952f97ed --- /dev/null +++ b/include/linux/platform_data/amd_qdma.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _PLATDATA_AMD_QDMA_H +#define _PLATDATA_AMD_QDMA_H + +#include + +/** + * struct qdma_queue_info - DMA queue information. This information is used to + * match queue when DMA channel is requested + * @dir: Channel transfer direction + */ +struct qdma_queue_info { + enum dma_transfer_direction dir; +}; + +#define QDMA_FILTER_PARAM(qinfo) ((void *)(qinfo)) + +struct dma_slave_map; + +/** + * struct qdma_platdata - Platform specific data for QDMA engine + * @max_mm_channels: Maximum number of MM DMA channels in each direction + * @device_map: DMA slave map + * @irq_index: The index of first IRQ + */ +struct qdma_platdata { + u32 max_mm_channels; + u32 irq_index; + struct dma_slave_map *device_map; +}; + +#endif /* _PLATDATA_AMD_QDMA_H */ -- cgit v1.2.3 From 4bb59323450db610b06c549fa4d5936e94fb9a36 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Sat, 10 Aug 2024 17:45:40 +0800 Subject: dmaengine: ti: k3-udma: Remove unused declarations Commit d70241913413 ("dmaengine: ti: k3-udma: Add glue layer for non DMAengine users") declared but never implemented these. Signed-off-by: Yue Haibing Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20240810094540.2589310-1-yuehaibing@huawei.com Signed-off-by: Vinod Koul --- include/linux/dma/k3-udma-glue.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index 1e491c5dcac2..2dea217629d0 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -136,8 +136,6 @@ u32 k3_udma_glue_rx_flow_get_fdq_id(struct k3_udma_glue_rx_channel *rx_chn, u32 k3_udma_glue_rx_get_flow_id_base(struct k3_udma_glue_rx_channel *rx_chn); int k3_udma_glue_rx_get_irq(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_num); -void k3_udma_glue_rx_put_irq(struct k3_udma_glue_rx_channel *rx_chn, - u32 flow_num); void k3_udma_glue_reset_rx_chn(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_num, void *data, void (*cleanup)(void *data, dma_addr_t desc_dma), -- cgit v1.2.3 From fb234516c5a0728d7dbd718667c33c1523b55fe8 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Mon, 26 Aug 2024 18:39:46 +0200 Subject: dt-bindings: clock: rockchip: remove CLK_NR_CLKS and CLKPMU_NR_CLKS CLK_NR_CLKS and CLKPMU_NR_CLKS should not be part of the binding. Remove since the kernel code no longer uses it. Signed-off-by: Johan Jonker Acked-by: Conor Dooley Link: https://lore.kernel.org/r/a3292ed0-3489-4887-8567-40ea4983c592@gmail.com Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/px30-cru.h | 4 ---- include/dt-bindings/clock/rk3036-cru.h | 2 -- include/dt-bindings/clock/rk3228-cru.h | 2 -- include/dt-bindings/clock/rk3288-cru.h | 2 -- include/dt-bindings/clock/rk3308-cru.h | 2 -- include/dt-bindings/clock/rk3328-cru.h | 2 -- include/dt-bindings/clock/rk3368-cru.h | 2 -- include/dt-bindings/clock/rk3399-cru.h | 4 ---- 8 files changed, 20 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/px30-cru.h b/include/dt-bindings/clock/px30-cru.h index 5b1416fcde6f..a2abf1995c34 100644 --- a/include/dt-bindings/clock/px30-cru.h +++ b/include/dt-bindings/clock/px30-cru.h @@ -175,8 +175,6 @@ #define PCLK_CIF 352 #define PCLK_OTP_PHY 353 -#define CLK_NR_CLKS (PCLK_OTP_PHY + 1) - /* pmu-clocks indices */ #define PLL_GPLL 1 @@ -195,8 +193,6 @@ #define PCLK_GPIO0_PMU 20 #define PCLK_UART0_PMU 21 -#define CLKPMU_NR_CLKS (PCLK_UART0_PMU + 1) - /* soft-reset indices */ #define SRST_CORE0_PO 0 #define SRST_CORE1_PO 1 diff --git a/include/dt-bindings/clock/rk3036-cru.h b/include/dt-bindings/clock/rk3036-cru.h index a96a9870ad59..99cc617e1e54 100644 --- a/include/dt-bindings/clock/rk3036-cru.h +++ b/include/dt-bindings/clock/rk3036-cru.h @@ -94,8 +94,6 @@ #define HCLK_CPU 477 #define HCLK_PERI 478 -#define CLK_NR_CLKS (HCLK_PERI + 1) - /* soft-reset indices */ #define SRST_CORE0 0 #define SRST_CORE1 1 diff --git a/include/dt-bindings/clock/rk3228-cru.h b/include/dt-bindings/clock/rk3228-cru.h index de550ea56eeb..138b6ce514dd 100644 --- a/include/dt-bindings/clock/rk3228-cru.h +++ b/include/dt-bindings/clock/rk3228-cru.h @@ -146,8 +146,6 @@ #define HCLK_S_CRYPTO 477 #define HCLK_PERI 478 -#define CLK_NR_CLKS (HCLK_PERI + 1) - /* soft-reset indices */ #define SRST_CORE0_PO 0 #define SRST_CORE1_PO 1 diff --git a/include/dt-bindings/clock/rk3288-cru.h b/include/dt-bindings/clock/rk3288-cru.h index 33819acbfc56..c6034b01b050 100644 --- a/include/dt-bindings/clock/rk3288-cru.h +++ b/include/dt-bindings/clock/rk3288-cru.h @@ -195,8 +195,6 @@ #define HCLK_CPU 477 #define HCLK_PERI 478 -#define CLK_NR_CLKS (HCLK_PERI + 1) - /* soft-reset indices */ #define SRST_CORE0 0 #define SRST_CORE1 1 diff --git a/include/dt-bindings/clock/rk3308-cru.h b/include/dt-bindings/clock/rk3308-cru.h index d97840f9ee2e..ce4cd72b9d3d 100644 --- a/include/dt-bindings/clock/rk3308-cru.h +++ b/include/dt-bindings/clock/rk3308-cru.h @@ -212,8 +212,6 @@ #define PCLK_CAN 233 #define PCLK_OWIRE 234 -#define CLK_NR_CLKS (PCLK_OWIRE + 1) - /* soft-reset indices */ /* cru_softrst_con0 */ diff --git a/include/dt-bindings/clock/rk3328-cru.h b/include/dt-bindings/clock/rk3328-cru.h index 555b4ff660ae..8885a2e98c65 100644 --- a/include/dt-bindings/clock/rk3328-cru.h +++ b/include/dt-bindings/clock/rk3328-cru.h @@ -201,8 +201,6 @@ #define HCLK_RGA 340 #define HCLK_HDCP 341 -#define CLK_NR_CLKS (HCLK_HDCP + 1) - /* soft-reset indices */ #define SRST_CORE0_PO 0 #define SRST_CORE1_PO 1 diff --git a/include/dt-bindings/clock/rk3368-cru.h b/include/dt-bindings/clock/rk3368-cru.h index 83c72a163fd3..ebae3cbf8192 100644 --- a/include/dt-bindings/clock/rk3368-cru.h +++ b/include/dt-bindings/clock/rk3368-cru.h @@ -182,8 +182,6 @@ #define HCLK_BUS 477 #define HCLK_PERI 478 -#define CLK_NR_CLKS (HCLK_PERI + 1) - /* soft-reset indices */ #define SRST_CORE_B0 0 #define SRST_CORE_B1 1 diff --git a/include/dt-bindings/clock/rk3399-cru.h b/include/dt-bindings/clock/rk3399-cru.h index 39169d94a44e..4c90c7703a83 100644 --- a/include/dt-bindings/clock/rk3399-cru.h +++ b/include/dt-bindings/clock/rk3399-cru.h @@ -335,8 +335,6 @@ #define HCLK_SDIO_NOC 495 #define HCLK_SDIOAUDIO_NOC 496 -#define CLK_NR_CLKS (HCLK_SDIOAUDIO_NOC + 1) - /* pmu-clocks indices */ #define PLL_PPLL 1 @@ -378,8 +376,6 @@ #define PCLK_INTR_ARB_PMU 49 #define HCLK_NOC_PMU 50 -#define CLKPMU_NR_CLKS (HCLK_NOC_PMU + 1) - /* soft-reset indices */ /* cru_softrst_con0 */ -- cgit v1.2.3 From 41845541adebc503b8574943c92670016d5e566b Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 23 Aug 2024 17:05:18 +0800 Subject: firmware: arm_scmi: Add initial support for i.MX BBM protocol i.MX95 has a battery-backed module(BBM), which has persistent storage (GPR), an RTC, and the ON/OFF button. The System Manager(SM) firmware use SCMI vendor protocol(SCMI BBM) to let agent be able to use GPR, RTC and ON/OFF button. Reviewed-by: Cristian Marussi Signed-off-by: Peng Fan Message-Id: <20240823-imx95-bbm-misc-v2-v8-2-e600ed9e9271@nxp.com> Signed-off-by: Sudeep Holla --- include/linux/scmi_imx_protocol.h | 42 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 include/linux/scmi_imx_protocol.h (limited to 'include') diff --git a/include/linux/scmi_imx_protocol.h b/include/linux/scmi_imx_protocol.h new file mode 100644 index 000000000000..2df2ea0f1809 --- /dev/null +++ b/include/linux/scmi_imx_protocol.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * SCMI Message Protocol driver NXP extension header + * + * Copyright 2024 NXP. + */ + +#ifndef _LINUX_SCMI_NXP_PROTOCOL_H +#define _LINUX_SCMI_NXP_PROTOCOL_H + +#include +#include +#include +#include + +enum scmi_nxp_protocol { + SCMI_PROTOCOL_IMX_BBM = 0x81, +}; + +struct scmi_imx_bbm_proto_ops { + int (*rtc_time_set)(const struct scmi_protocol_handle *ph, u32 id, + uint64_t sec); + int (*rtc_time_get)(const struct scmi_protocol_handle *ph, u32 id, + u64 *val); + int (*rtc_alarm_set)(const struct scmi_protocol_handle *ph, u32 id, + bool enable, u64 sec); + int (*button_get)(const struct scmi_protocol_handle *ph, u32 *state); +}; + +enum scmi_nxp_notification_events { + SCMI_EVENT_IMX_BBM_RTC = 0x0, + SCMI_EVENT_IMX_BBM_BUTTON = 0x1, +}; + +struct scmi_imx_bbm_notif_report { + bool is_rtc; + bool is_button; + ktime_t timestamp; + unsigned int rtc_id; + unsigned int rtc_evt; +}; +#endif -- cgit v1.2.3 From 61c9f03e22fc57fe61726c513b1f92c0ed1ef00f Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 23 Aug 2024 17:05:19 +0800 Subject: firmware: arm_scmi: Add initial support for i.MX MISC protocol i.MX95 System Manager(SM) firmware includes a SCMI vendor protocol, SCMI MISC protocol which includes controls that are misc settings/actions that must be exposed from the SM to agents. They are device specific and are usually define to access bit fields in various mix block control modules, IOMUX_GPR, and other General Purpose registers, Control Status Registers owned by the SM. Reviewed-by: Cristian Marussi Signed-off-by: Peng Fan Message-Id: <20240823-imx95-bbm-misc-v2-v8-3-e600ed9e9271@nxp.com> Signed-off-by: Sudeep Holla --- include/linux/scmi_imx_protocol.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/scmi_imx_protocol.h b/include/linux/scmi_imx_protocol.h index 2df2ea0f1809..066216f1357a 100644 --- a/include/linux/scmi_imx_protocol.h +++ b/include/linux/scmi_imx_protocol.h @@ -15,6 +15,7 @@ enum scmi_nxp_protocol { SCMI_PROTOCOL_IMX_BBM = 0x81, + SCMI_PROTOCOL_IMX_MISC = 0x84, }; struct scmi_imx_bbm_proto_ops { @@ -30,6 +31,7 @@ struct scmi_imx_bbm_proto_ops { enum scmi_nxp_notification_events { SCMI_EVENT_IMX_BBM_RTC = 0x0, SCMI_EVENT_IMX_BBM_BUTTON = 0x1, + SCMI_EVENT_IMX_MISC_CONTROL = 0x0, }; struct scmi_imx_bbm_notif_report { @@ -39,4 +41,19 @@ struct scmi_imx_bbm_notif_report { unsigned int rtc_id; unsigned int rtc_evt; }; + +struct scmi_imx_misc_ctrl_notify_report { + ktime_t timestamp; + unsigned int ctrl_id; + unsigned int flags; +}; + +struct scmi_imx_misc_proto_ops { + int (*misc_ctrl_set)(const struct scmi_protocol_handle *ph, u32 id, + u32 num, u32 *val); + int (*misc_ctrl_get)(const struct scmi_protocol_handle *ph, u32 id, + u32 *num, u32 *val); + int (*misc_ctrl_req_notify)(const struct scmi_protocol_handle *ph, + u32 ctrl_id, u32 evt_id, u32 flags); +}; #endif -- cgit v1.2.3 From 0b4f8a68b292e7ee82107b1ce15c3aad31c864b1 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 23 Aug 2024 17:05:21 +0800 Subject: firmware: imx: Add i.MX95 MISC driver The i.MX95 System manager exports SCMI MISC protocol for linux to do various settings, such as set board gpio expander as wakeup source. The driver is to add the support. Reviewed-by: Cristian Marussi Signed-off-by: Peng Fan Message-Id: <20240823-imx95-bbm-misc-v2-v8-5-e600ed9e9271@nxp.com> Signed-off-by: Sudeep Holla --- include/linux/firmware/imx/sm.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 include/linux/firmware/imx/sm.h (limited to 'include') diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h new file mode 100644 index 000000000000..62a2690e2abd --- /dev/null +++ b/include/linux/firmware/imx/sm.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright 2024 NXP + */ + +#ifndef _SCMI_IMX_H +#define _SCMI_IMX_H + +#include +#include +#include + +#define SCMI_IMX_CTRL_PDM_CLK_SEL 0 /* AON PDM clock sel */ +#define SCMI_IMX_CTRL_MQS1_SETTINGS 1 /* AON MQS settings */ +#define SCMI_IMX_CTRL_SAI1_MCLK 2 /* AON SAI1 MCLK */ +#define SCMI_IMX_CTRL_SAI3_MCLK 3 /* WAKE SAI3 MCLK */ +#define SCMI_IMX_CTRL_SAI4_MCLK 4 /* WAKE SAI4 MCLK */ +#define SCMI_IMX_CTRL_SAI5_MCLK 5 /* WAKE SAI5 MCLK */ + +#if IS_ENABLED(CONFIG_IMX_SCMI_MISC_EXT) +int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val); +int scmi_imx_misc_ctrl_set(u32 id, u32 val); +#else +static inline int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val) +{ + return -EOPNOTSUPP; +} + +static inline int scmi_imx_misc_ctrl_set(u32 id, u32 val); +{ + return -EOPNOTSUPP; +} +#endif +#endif -- cgit v1.2.3 From 3e5cbbb1fb9a64588a2c6ddc5e432a303d36a488 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Aug 2024 01:52:49 +0000 Subject: tcp: remove volatile qualifier on tw_substate Using a volatile qualifier for a specific struct field is unusual. Use instead READ_ONCE()/WRITE_ONCE() where necessary. tcp_timewait_state_process() can change tw_substate while other threads are reading this field. Signed-off-by: Eric Dumazet Reviewed-by: Jason Xing Link: https://patch.msgid.link/20240827015250.3509197-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_timewait_sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index f88b68269012..beb533a0e880 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -58,7 +58,7 @@ struct inet_timewait_sock { #define tw_dr __tw_common.skc_tw_dr __u32 tw_mark; - volatile unsigned char tw_substate; + unsigned char tw_substate; unsigned char tw_rcv_wscale; /* Socket demultiplex comparisons on incoming packets. */ -- cgit v1.2.3 From 0870b0d8b393dde53106678a1e2cec9dfa52f9b7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Aug 2024 11:49:16 +0000 Subject: net: busy-poll: use ktime_get_ns() instead of local_clock() Typically, busy-polling durations are below 100 usec. When/if the busy-poller thread migrates to another cpu, local_clock() can be off by +/-2msec or more for small values of HZ, depending on the platform. Use ktimer_get_ns() to ensure deterministic behavior, which is the whole point of busy-polling. Fixes: 060212928670 ("net: add low latency socket poll") Fixes: 9a3c71aa8024 ("net: convert low latency sockets to sched_clock()") Fixes: 37089834528b ("sched, net: Fixup busy_loop_us_clock()") Signed-off-by: Eric Dumazet Cc: Mina Almasry Cc: Willem de Bruijn Reviewed-by: Joe Damato Link: https://patch.msgid.link/20240827114916.223377-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/busy_poll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 9b09acac538e..522f1da8b747 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -68,7 +68,7 @@ static inline bool sk_can_busy_loop(struct sock *sk) static inline unsigned long busy_loop_current_time(void) { #ifdef CONFIG_NET_RX_BUSY_POLL - return (unsigned long)(local_clock() >> 10); + return (unsigned long)(ktime_get_ns() >> 10); #else return 0; #endif -- cgit v1.2.3 From 3e3ac9c39e1b575e970ecab90504b6cb090f6b05 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 26 Aug 2024 11:20:05 +0800 Subject: scsi: core: Remove obsoleted declaration for scsi_driverbyte_string() scsi_driverbyte_string() has been unused since commit 54c29086195f ("scsi: core: Drop the now obsolete driver_byte definitions"). Remove it. Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20240826032005.4007834-1-cuigaosheng1@huawei.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/scsi/scsi_dbg.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_dbg.h b/include/scsi/scsi_dbg.h index 7b196d234626..bd29cdb513a5 100644 --- a/include/scsi/scsi_dbg.h +++ b/include/scsi/scsi_dbg.h @@ -24,7 +24,6 @@ extern const char *scsi_extd_sense_format(unsigned char, unsigned char, const char **); extern const char *scsi_mlreturn_string(int); extern const char *scsi_hostbyte_string(int); -extern const char *scsi_driverbyte_string(int); #else static inline bool scsi_opcode_sa_name(int cmd, int sa, @@ -76,12 +75,6 @@ scsi_hostbyte_string(int result) return NULL; } -static inline const char * -scsi_driverbyte_string(int result) -{ - return NULL; -} - #endif #endif /* _SCSI_SCSI_DBG_H */ -- cgit v1.2.3 From c42a01264ba1497eb3193c08ff3c2656d98250a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Jul 2024 06:10:24 +0200 Subject: dma-mapping: don't return errors from dma_set_min_align_mask A NULL dev->dma_parms indicates either a bus that is not DMA capable or grave bug in the implementation of the bus code. There isn't much the driver can do in terms of error handling for either case, so just warn and continue as DMA operations will fail anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Reviewed-by: Martin K. Petersen --- include/linux/dma-mapping.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f693aafe221f..cfd6bafec3f9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -575,13 +575,12 @@ static inline unsigned int dma_get_min_align_mask(struct device *dev) return 0; } -static inline int dma_set_min_align_mask(struct device *dev, +static inline void dma_set_min_align_mask(struct device *dev, unsigned int min_align_mask) { if (WARN_ON_ONCE(!dev->dma_parms)) - return -EIO; + return; dev->dma_parms->min_align_mask = min_align_mask; - return 0; } #ifndef dma_get_cache_alignment -- cgit v1.2.3 From 560a861ab4174b42240157ab5cebe36b8c7bc418 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Jul 2024 06:06:28 +0200 Subject: dma-mapping: don't return errors from dma_set_seg_boundary A NULL dev->dma_parms indicates either a bus that is not DMA capable or grave bug in the implementation of the bus code. There isn't much the driver can do in terms of error handling for either case, so just warn and continue as DMA operations will fail anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Reviewed-by: Martin K. Petersen --- include/linux/dma-mapping.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index cfd6bafec3f9..6bd1333dbacb 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -559,13 +559,11 @@ static inline unsigned long dma_get_seg_boundary_nr_pages(struct device *dev, return (dma_get_seg_boundary(dev) >> page_shift) + 1; } -static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) +static inline void dma_set_seg_boundary(struct device *dev, unsigned long mask) { - if (dev->dma_parms) { - dev->dma_parms->segment_boundary_mask = mask; - return 0; - } - return -EIO; + if (WARN_ON_ONCE(!dev->dma_parms)) + return; + dev->dma_parms->segment_boundary_mask = mask; } static inline unsigned int dma_get_min_align_mask(struct device *dev) -- cgit v1.2.3 From 334304ac2baca7f3e821c47cf5129d90e7a6b1e6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Jul 2024 06:07:38 +0200 Subject: dma-mapping: don't return errors from dma_set_max_seg_size A NULL dev->dma_parms indicates either a bus that is not DMA capable or grave bug in the implementation of the bus code. There isn't much the driver can do in terms of error handling for either case, so just warn and continue as DMA operations will fail anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Reviewed-by: Martin K. Petersen Acked-by: Ulf Hansson # For MMC --- include/linux/dma-mapping.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 6bd1333dbacb..1524da363734 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -524,13 +524,11 @@ static inline unsigned int dma_get_max_seg_size(struct device *dev) return SZ_64K; } -static inline int dma_set_max_seg_size(struct device *dev, unsigned int size) +static inline void dma_set_max_seg_size(struct device *dev, unsigned int size) { - if (dev->dma_parms) { - dev->dma_parms->max_segment_size = size; - return 0; - } - return -EIO; + if (WARN_ON_ONCE(!dev->dma_parms)) + return; + dev->dma_parms->max_segment_size = size; } static inline unsigned long dma_get_seg_boundary(struct device *dev) -- cgit v1.2.3 From b4f62001ccd3fa953769ccbd313c9a7a4f5f8f3d Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 29 Aug 2024 09:18:47 +0800 Subject: dt-bindings: clock: add RMII clock selection Add RMII clock selection for ENETC0 and ENETC1. Signed-off-by: Wei Fang Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240829011849.364987-3-wei.fang@nxp.com Signed-off-by: Abel Vesa --- include/dt-bindings/clock/nxp,imx95-clock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/nxp,imx95-clock.h b/include/dt-bindings/clock/nxp,imx95-clock.h index 782662c3e740..b7a713a9ac8c 100644 --- a/include/dt-bindings/clock/nxp,imx95-clock.h +++ b/include/dt-bindings/clock/nxp,imx95-clock.h @@ -25,4 +25,7 @@ #define IMX95_CLK_DISPMIX_ENG0_SEL 0 #define IMX95_CLK_DISPMIX_ENG1_SEL 1 +#define IMX95_CLK_NETCMIX_ENETC0_RMII 0 +#define IMX95_CLK_NETCMIX_ENETC1_RMII 1 + #endif /* __DT_BINDINGS_CLOCK_IMX95_H */ -- cgit v1.2.3 From 33929707b808ba7839c40c15d3e68cbc51070b31 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 12 Aug 2024 15:23:10 +0300 Subject: drm/mst: switch to guid_t type for GUID The kernel has a guid_t type for GUIDs. Switch to using it, but avoid any functional changes here. Reviewed-by: Daniel Vetter Acked-by: Harry Wentland Acked-by: Alex Deucher Acked-by: Hamza Mahfooz Link: https://patchwork.freedesktop.org/patch/msgid/20240812122312.1567046-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/display/drm_dp_mst_helper.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index cfe096389d94..dd466631f174 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -244,18 +244,18 @@ struct drm_dp_mst_branch { bool link_address_sent; /* global unique identifier to identify branch devices */ - u8 guid[16]; + guid_t guid; }; struct drm_dp_nak_reply { - u8 guid[16]; + guid_t guid; u8 reason; u8 nak_data; }; struct drm_dp_link_address_ack_reply { - u8 guid[16]; + guid_t guid; u8 nports; struct drm_dp_link_addr_reply_port { bool input_port; @@ -265,7 +265,7 @@ struct drm_dp_link_address_ack_reply { bool ddps; bool legacy_device_plug_status; u8 dpcd_revision; - u8 peer_guid[16]; + guid_t peer_guid; u8 num_sdp_streams; u8 num_sdp_stream_sinks; } ports[16]; @@ -348,7 +348,7 @@ struct drm_dp_allocate_payload_ack_reply { }; struct drm_dp_connection_status_notify { - u8 guid[16]; + guid_t guid; u8 port_number; bool legacy_device_plug_status; bool displayport_device_plug_status; @@ -425,7 +425,7 @@ struct drm_dp_query_payload { struct drm_dp_resource_status_notify { u8 port_number; - u8 guid[16]; + guid_t guid; u16 available_pbn; }; -- cgit v1.2.3 From 49c04453db81fc806906e26ef9fc53bdb635ff39 Mon Sep 17 00:00:00 2001 From: Detlev Casanova Date: Wed, 28 Aug 2024 15:42:50 +0000 Subject: dt-bindings: clock, reset: Add support for rk3576 Add clock and reset ID defines for rk3576. Compared to the downstream bindings written by Elaine, this uses continous gapless IDs starting at 0. Thus all numbers are different between downstream and upstream, but names are kept exactly the same. Also add documentation for the rk3576 CRU core. Signed-off-by: Elaine Zhang Signed-off-by: Sugar Zhang Signed-off-by: Detlev Casanova Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/0102019199a76766-f3a2b53f-d063-458b-b0d1-dfbc2ea1893c-000000@eu-west-1.amazonses.com Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rockchip,rk3576-cru.h | 592 ++++++++++++++++++++++++ include/dt-bindings/reset/rockchip,rk3576-cru.h | 564 ++++++++++++++++++++++ 2 files changed, 1156 insertions(+) create mode 100644 include/dt-bindings/clock/rockchip,rk3576-cru.h create mode 100644 include/dt-bindings/reset/rockchip,rk3576-cru.h (limited to 'include') diff --git a/include/dt-bindings/clock/rockchip,rk3576-cru.h b/include/dt-bindings/clock/rockchip,rk3576-cru.h new file mode 100644 index 000000000000..a2933021be89 --- /dev/null +++ b/include/dt-bindings/clock/rockchip,rk3576-cru.h @@ -0,0 +1,592 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* +* Copyright (c) 2023 Rockchip Electronics Co. Ltd. +* Copyright (c) 2024 Collabora Ltd. +* +* Author: Elaine Zhang +* Author: Detlev Casanova +*/ + +#ifndef _DT_BINDINGS_CLK_ROCKCHIP_RK3576_H +#define _DT_BINDINGS_CLK_ROCKCHIP_RK3576_H + +/* cru-clocks indices */ + +/* cru plls */ +#define PLL_BPLL 0 +#define PLL_LPLL 1 +#define PLL_VPLL 2 +#define PLL_AUPLL 3 +#define PLL_CPLL 4 +#define PLL_GPLL 5 +#define PLL_PPLL 6 +#define ARMCLK_L 7 +#define ARMCLK_B 8 + +/* cru clocks */ +#define CLK_CPLL_DIV20 9 +#define CLK_CPLL_DIV10 10 +#define CLK_GPLL_DIV8 11 +#define CLK_GPLL_DIV6 12 +#define CLK_CPLL_DIV4 13 +#define CLK_GPLL_DIV4 14 +#define CLK_SPLL_DIV2 15 +#define CLK_GPLL_DIV3 16 +#define CLK_CPLL_DIV2 17 +#define CLK_GPLL_DIV2 18 +#define CLK_SPLL_DIV1 19 +#define PCLK_TOP_ROOT 20 +#define ACLK_TOP 21 +#define HCLK_TOP 22 +#define CLK_AUDIO_FRAC_0 23 +#define CLK_AUDIO_FRAC_1 24 +#define CLK_AUDIO_FRAC_2 25 +#define CLK_AUDIO_FRAC_3 26 +#define CLK_UART_FRAC_0 27 +#define CLK_UART_FRAC_1 28 +#define CLK_UART_FRAC_2 29 +#define CLK_UART1_SRC_TOP 30 +#define CLK_AUDIO_INT_0 31 +#define CLK_AUDIO_INT_1 32 +#define CLK_AUDIO_INT_2 33 +#define CLK_PDM0_SRC_TOP 34 +#define CLK_PDM1_OUT 35 +#define CLK_GMAC0_125M_SRC 36 +#define CLK_GMAC1_125M_SRC 37 +#define LCLK_ASRC_SRC_0 38 +#define LCLK_ASRC_SRC_1 39 +#define REF_CLK0_OUT_PLL 40 +#define REF_CLK1_OUT_PLL 41 +#define REF_CLK2_OUT_PLL 42 +#define REFCLKO25M_GMAC0_OUT 43 +#define REFCLKO25M_GMAC1_OUT 44 +#define CLK_CIFOUT_OUT 45 +#define CLK_GMAC0_RMII_CRU 46 +#define CLK_GMAC1_RMII_CRU 47 +#define CLK_OTPC_AUTO_RD_G 48 +#define CLK_OTP_PHY_G 49 +#define CLK_MIPI_CAMERAOUT_M0 50 +#define CLK_MIPI_CAMERAOUT_M1 51 +#define CLK_MIPI_CAMERAOUT_M2 52 +#define MCLK_PDM0_SRC_TOP 53 +#define HCLK_AUDIO_ROOT 54 +#define HCLK_ASRC_2CH_0 55 +#define HCLK_ASRC_2CH_1 56 +#define HCLK_ASRC_4CH_0 57 +#define HCLK_ASRC_4CH_1 58 +#define CLK_ASRC_2CH_0 59 +#define CLK_ASRC_2CH_1 60 +#define CLK_ASRC_4CH_0 61 +#define CLK_ASRC_4CH_1 62 +#define MCLK_SAI0_8CH_SRC 63 +#define MCLK_SAI0_8CH 64 +#define HCLK_SAI0_8CH 65 +#define HCLK_SPDIF_RX0 66 +#define MCLK_SPDIF_RX0 67 +#define HCLK_SPDIF_RX1 68 +#define MCLK_SPDIF_RX1 69 +#define MCLK_SAI1_8CH_SRC 70 +#define MCLK_SAI1_8CH 71 +#define HCLK_SAI1_8CH 72 +#define MCLK_SAI2_2CH_SRC 73 +#define MCLK_SAI2_2CH 74 +#define HCLK_SAI2_2CH 75 +#define MCLK_SAI3_2CH_SRC 76 +#define MCLK_SAI3_2CH 77 +#define HCLK_SAI3_2CH 78 +#define MCLK_SAI4_2CH_SRC 79 +#define MCLK_SAI4_2CH 80 +#define HCLK_SAI4_2CH 81 +#define HCLK_ACDCDIG_DSM 82 +#define MCLK_ACDCDIG_DSM 83 +#define CLK_PDM1 84 +#define HCLK_PDM1 85 +#define MCLK_PDM1 86 +#define HCLK_SPDIF_TX0 87 +#define MCLK_SPDIF_TX0 88 +#define HCLK_SPDIF_TX1 89 +#define MCLK_SPDIF_TX1 90 +#define CLK_SAI1_MCLKOUT 91 +#define CLK_SAI2_MCLKOUT 92 +#define CLK_SAI3_MCLKOUT 93 +#define CLK_SAI4_MCLKOUT 94 +#define CLK_SAI0_MCLKOUT 95 +#define HCLK_BUS_ROOT 96 +#define PCLK_BUS_ROOT 97 +#define ACLK_BUS_ROOT 98 +#define HCLK_CAN0 99 +#define CLK_CAN0 100 +#define HCLK_CAN1 101 +#define CLK_CAN1 102 +#define CLK_KEY_SHIFT 103 +#define PCLK_I2C1 104 +#define PCLK_I2C2 105 +#define PCLK_I2C3 106 +#define PCLK_I2C4 107 +#define PCLK_I2C5 108 +#define PCLK_I2C6 109 +#define PCLK_I2C7 110 +#define PCLK_I2C8 111 +#define PCLK_I2C9 112 +#define PCLK_WDT_BUSMCU 113 +#define TCLK_WDT_BUSMCU 114 +#define ACLK_GIC 115 +#define CLK_I2C1 116 +#define CLK_I2C2 117 +#define CLK_I2C3 118 +#define CLK_I2C4 119 +#define CLK_I2C5 120 +#define CLK_I2C6 121 +#define CLK_I2C7 122 +#define CLK_I2C8 123 +#define CLK_I2C9 124 +#define PCLK_SARADC 125 +#define CLK_SARADC 126 +#define PCLK_TSADC 127 +#define CLK_TSADC 128 +#define PCLK_UART0 129 +#define PCLK_UART2 130 +#define PCLK_UART3 131 +#define PCLK_UART4 132 +#define PCLK_UART5 133 +#define PCLK_UART6 134 +#define PCLK_UART7 135 +#define PCLK_UART8 136 +#define PCLK_UART9 137 +#define PCLK_UART10 138 +#define PCLK_UART11 139 +#define SCLK_UART0 140 +#define SCLK_UART2 141 +#define SCLK_UART3 142 +#define SCLK_UART4 143 +#define SCLK_UART5 144 +#define SCLK_UART6 145 +#define SCLK_UART7 146 +#define SCLK_UART8 147 +#define SCLK_UART9 148 +#define SCLK_UART10 149 +#define SCLK_UART11 150 +#define PCLK_SPI0 151 +#define PCLK_SPI1 152 +#define PCLK_SPI2 153 +#define PCLK_SPI3 154 +#define PCLK_SPI4 155 +#define CLK_SPI0 156 +#define CLK_SPI1 157 +#define CLK_SPI2 158 +#define CLK_SPI3 159 +#define CLK_SPI4 160 +#define PCLK_WDT0 161 +#define TCLK_WDT0 162 +#define PCLK_PWM1 163 +#define CLK_PWM1 164 +#define CLK_OSC_PWM1 165 +#define CLK_RC_PWM1 166 +#define PCLK_BUSTIMER0 167 +#define PCLK_BUSTIMER1 168 +#define CLK_TIMER0_ROOT 169 +#define CLK_TIMER0 170 +#define CLK_TIMER1 171 +#define CLK_TIMER2 172 +#define CLK_TIMER3 173 +#define CLK_TIMER4 174 +#define CLK_TIMER5 175 +#define PCLK_MAILBOX0 176 +#define PCLK_GPIO1 177 +#define DBCLK_GPIO1 178 +#define PCLK_GPIO2 179 +#define DBCLK_GPIO2 180 +#define PCLK_GPIO3 181 +#define DBCLK_GPIO3 182 +#define PCLK_GPIO4 183 +#define DBCLK_GPIO4 184 +#define ACLK_DECOM 185 +#define PCLK_DECOM 186 +#define DCLK_DECOM 187 +#define CLK_TIMER1_ROOT 188 +#define CLK_TIMER6 189 +#define CLK_TIMER7 190 +#define CLK_TIMER8 191 +#define CLK_TIMER9 192 +#define CLK_TIMER10 193 +#define CLK_TIMER11 194 +#define ACLK_DMAC0 195 +#define ACLK_DMAC1 196 +#define ACLK_DMAC2 197 +#define ACLK_SPINLOCK 198 +#define HCLK_I3C0 199 +#define HCLK_I3C1 200 +#define HCLK_BUS_CM0_ROOT 201 +#define FCLK_BUS_CM0_CORE 202 +#define CLK_BUS_CM0_RTC 203 +#define PCLK_PMU2 204 +#define PCLK_PWM2 205 +#define CLK_PWM2 206 +#define CLK_RC_PWM2 207 +#define CLK_OSC_PWM2 208 +#define CLK_FREQ_PWM1 209 +#define CLK_COUNTER_PWM1 210 +#define SAI_SCLKIN_FREQ 211 +#define SAI_SCLKIN_COUNTER 212 +#define CLK_I3C0 213 +#define CLK_I3C1 214 +#define PCLK_CSIDPHY1 215 +#define PCLK_DDR_ROOT 216 +#define PCLK_DDR_MON_CH0 217 +#define TMCLK_DDR_MON_CH0 218 +#define ACLK_DDR_ROOT 219 +#define HCLK_DDR_ROOT 220 +#define FCLK_DDR_CM0_CORE 221 +#define CLK_DDR_TIMER_ROOT 222 +#define CLK_DDR_TIMER0 223 +#define CLK_DDR_TIMER1 224 +#define TCLK_WDT_DDR 225 +#define PCLK_WDT 226 +#define PCLK_TIMER 227 +#define CLK_DDR_CM0_RTC 228 +#define ACLK_RKNN0 229 +#define ACLK_RKNN1 230 +#define HCLK_RKNN_ROOT 231 +#define CLK_RKNN_DSU0 232 +#define PCLK_NPUTOP_ROOT 233 +#define PCLK_NPU_TIMER 234 +#define CLK_NPUTIMER_ROOT 235 +#define CLK_NPUTIMER0 236 +#define CLK_NPUTIMER1 237 +#define PCLK_NPU_WDT 238 +#define TCLK_NPU_WDT 239 +#define ACLK_RKNN_CBUF 240 +#define HCLK_NPU_CM0_ROOT 241 +#define FCLK_NPU_CM0_CORE 242 +#define CLK_NPU_CM0_RTC 243 +#define HCLK_RKNN_CBUF 244 +#define HCLK_NVM_ROOT 245 +#define ACLK_NVM_ROOT 246 +#define SCLK_FSPI_X2 247 +#define HCLK_FSPI 248 +#define CCLK_SRC_EMMC 249 +#define HCLK_EMMC 250 +#define ACLK_EMMC 251 +#define BCLK_EMMC 252 +#define TCLK_EMMC 253 +#define PCLK_PHP_ROOT 254 +#define ACLK_PHP_ROOT 255 +#define PCLK_PCIE0 256 +#define CLK_PCIE0_AUX 257 +#define ACLK_PCIE0_MST 258 +#define ACLK_PCIE0_SLV 259 +#define ACLK_PCIE0_DBI 260 +#define ACLK_USB3OTG1 261 +#define CLK_REF_USB3OTG1 262 +#define CLK_SUSPEND_USB3OTG1 263 +#define ACLK_MMU0 264 +#define ACLK_SLV_MMU0 265 +#define ACLK_MMU1 266 +#define ACLK_SLV_MMU1 267 +#define PCLK_PCIE1 268 +#define CLK_PCIE1_AUX 269 +#define ACLK_PCIE1_MST 270 +#define ACLK_PCIE1_SLV 271 +#define ACLK_PCIE1_DBI 272 +#define CLK_RXOOB0 273 +#define CLK_RXOOB1 274 +#define CLK_PMALIVE0 275 +#define CLK_PMALIVE1 276 +#define ACLK_SATA0 277 +#define ACLK_SATA1 278 +#define CLK_USB3OTG1_PIPE_PCLK 279 +#define CLK_USB3OTG1_UTMI 280 +#define CLK_USB3OTG0_PIPE_PCLK 281 +#define CLK_USB3OTG0_UTMI 282 +#define HCLK_SDGMAC_ROOT 283 +#define ACLK_SDGMAC_ROOT 284 +#define PCLK_SDGMAC_ROOT 285 +#define ACLK_GMAC0 286 +#define ACLK_GMAC1 287 +#define PCLK_GMAC0 288 +#define PCLK_GMAC1 289 +#define CCLK_SRC_SDIO 290 +#define HCLK_SDIO 291 +#define CLK_GMAC1_PTP_REF 292 +#define CLK_GMAC0_PTP_REF 293 +#define CLK_GMAC1_PTP_REF_SRC 294 +#define CLK_GMAC0_PTP_REF_SRC 295 +#define CCLK_SRC_SDMMC0 296 +#define HCLK_SDMMC0 297 +#define SCLK_FSPI1_X2 298 +#define HCLK_FSPI1 299 +#define ACLK_DSMC_ROOT 300 +#define ACLK_DSMC 301 +#define PCLK_DSMC 302 +#define CLK_DSMC_SYS 303 +#define HCLK_HSGPIO 304 +#define CLK_HSGPIO_TX 305 +#define CLK_HSGPIO_RX 306 +#define ACLK_HSGPIO 307 +#define PCLK_PHPPHY_ROOT 308 +#define PCLK_PCIE2_COMBOPHY0 309 +#define PCLK_PCIE2_COMBOPHY1 310 +#define CLK_PCIE_100M_SRC 311 +#define CLK_PCIE_100M_NDUTY_SRC 312 +#define CLK_REF_PCIE0_PHY 313 +#define CLK_REF_PCIE1_PHY 314 +#define CLK_REF_MPHY_26M 315 +#define HCLK_RKVDEC_ROOT 316 +#define ACLK_RKVDEC_ROOT 317 +#define HCLK_RKVDEC 318 +#define CLK_RKVDEC_HEVC_CA 319 +#define CLK_RKVDEC_CORE 320 +#define ACLK_UFS_ROOT 321 +#define ACLK_USB_ROOT 322 +#define PCLK_USB_ROOT 323 +#define ACLK_USB3OTG0 324 +#define CLK_REF_USB3OTG0 325 +#define CLK_SUSPEND_USB3OTG0 326 +#define ACLK_MMU2 327 +#define ACLK_SLV_MMU2 328 +#define ACLK_UFS_SYS 329 +#define ACLK_VPU_ROOT 330 +#define ACLK_VPU_MID_ROOT 331 +#define HCLK_VPU_ROOT 332 +#define ACLK_JPEG_ROOT 333 +#define ACLK_VPU_LOW_ROOT 334 +#define HCLK_RGA2E_0 335 +#define ACLK_RGA2E_0 336 +#define CLK_CORE_RGA2E_0 337 +#define ACLK_JPEG 338 +#define HCLK_JPEG 339 +#define HCLK_VDPP 340 +#define ACLK_VDPP 341 +#define CLK_CORE_VDPP 342 +#define HCLK_RGA2E_1 343 +#define ACLK_RGA2E_1 344 +#define CLK_CORE_RGA2E_1 345 +#define DCLK_EBC_FRAC_SRC 346 +#define HCLK_EBC 347 +#define ACLK_EBC 348 +#define DCLK_EBC 349 +#define HCLK_VEPU0_ROOT 350 +#define ACLK_VEPU0_ROOT 351 +#define HCLK_VEPU0 352 +#define ACLK_VEPU0 353 +#define CLK_VEPU0_CORE 354 +#define ACLK_VI_ROOT 355 +#define HCLK_VI_ROOT 356 +#define PCLK_VI_ROOT 357 +#define DCLK_VICAP 358 +#define ACLK_VICAP 359 +#define HCLK_VICAP 360 +#define CLK_ISP_CORE 361 +#define CLK_ISP_CORE_MARVIN 362 +#define CLK_ISP_CORE_VICAP 363 +#define ACLK_ISP 364 +#define HCLK_ISP 365 +#define ACLK_VPSS 366 +#define HCLK_VPSS 367 +#define CLK_CORE_VPSS 368 +#define PCLK_CSI_HOST_0 369 +#define PCLK_CSI_HOST_1 370 +#define PCLK_CSI_HOST_2 371 +#define PCLK_CSI_HOST_3 372 +#define PCLK_CSI_HOST_4 373 +#define ICLK_CSIHOST01 374 +#define ICLK_CSIHOST0 375 +#define CLK_ISP_PVTPLL_SRC 376 +#define ACLK_VI_ROOT_INTER 377 +#define CLK_VICAP_I0CLK 378 +#define CLK_VICAP_I1CLK 379 +#define CLK_VICAP_I2CLK 380 +#define CLK_VICAP_I3CLK 381 +#define CLK_VICAP_I4CLK 382 +#define ACLK_VOP_ROOT 383 +#define HCLK_VOP_ROOT 384 +#define PCLK_VOP_ROOT 385 +#define HCLK_VOP 386 +#define ACLK_VOP 387 +#define DCLK_VP0_SRC 388 +#define DCLK_VP1_SRC 389 +#define DCLK_VP2_SRC 390 +#define DCLK_VP0 391 +#define DCLK_VP1 392 +#define DCLK_VP2 393 +#define PCLK_VOPGRF 394 +#define ACLK_VO0_ROOT 395 +#define HCLK_VO0_ROOT 396 +#define PCLK_VO0_ROOT 397 +#define PCLK_VO0_GRF 398 +#define ACLK_HDCP0 399 +#define HCLK_HDCP0 400 +#define PCLK_HDCP0 401 +#define CLK_TRNG0_SKP 402 +#define PCLK_DSIHOST0 403 +#define CLK_DSIHOST0 404 +#define PCLK_HDMITX0 405 +#define CLK_HDMITX0_EARC 406 +#define CLK_HDMITX0_REF 407 +#define PCLK_EDP0 408 +#define CLK_EDP0_24M 409 +#define CLK_EDP0_200M 410 +#define MCLK_SAI5_8CH_SRC 411 +#define MCLK_SAI5_8CH 412 +#define HCLK_SAI5_8CH 413 +#define MCLK_SAI6_8CH_SRC 414 +#define MCLK_SAI6_8CH 415 +#define HCLK_SAI6_8CH 416 +#define HCLK_SPDIF_TX2 417 +#define MCLK_SPDIF_TX2 418 +#define HCLK_SPDIF_RX2 419 +#define MCLK_SPDIF_RX2 420 +#define HCLK_SAI8_8CH 421 +#define MCLK_SAI8_8CH_SRC 422 +#define MCLK_SAI8_8CH 423 +#define ACLK_VO1_ROOT 424 +#define HCLK_VO1_ROOT 425 +#define PCLK_VO1_ROOT 426 +#define MCLK_SAI7_8CH_SRC 427 +#define MCLK_SAI7_8CH 428 +#define HCLK_SAI7_8CH 429 +#define HCLK_SPDIF_TX3 430 +#define HCLK_SPDIF_TX4 431 +#define HCLK_SPDIF_TX5 432 +#define MCLK_SPDIF_TX3 433 +#define CLK_AUX16MHZ_0 434 +#define ACLK_DP0 435 +#define PCLK_DP0 436 +#define PCLK_VO1_GRF 437 +#define ACLK_HDCP1 438 +#define HCLK_HDCP1 439 +#define PCLK_HDCP1 440 +#define CLK_TRNG1_SKP 441 +#define HCLK_SAI9_8CH 442 +#define MCLK_SAI9_8CH_SRC 443 +#define MCLK_SAI9_8CH 444 +#define MCLK_SPDIF_TX4 445 +#define MCLK_SPDIF_TX5 446 +#define CLK_GPU_SRC_PRE 447 +#define CLK_GPU 448 +#define PCLK_GPU_ROOT 449 +#define ACLK_CENTER_ROOT 450 +#define ACLK_CENTER_LOW_ROOT 451 +#define HCLK_CENTER_ROOT 452 +#define PCLK_CENTER_ROOT 453 +#define ACLK_DMA2DDR 454 +#define ACLK_DDR_SHAREMEM 455 +#define PCLK_DMA2DDR 456 +#define PCLK_SHAREMEM 457 +#define HCLK_VEPU1_ROOT 458 +#define ACLK_VEPU1_ROOT 459 +#define HCLK_VEPU1 460 +#define ACLK_VEPU1 461 +#define CLK_VEPU1_CORE 462 +#define CLK_JDBCK_DAP 463 +#define PCLK_MIPI_DCPHY 464 +#define CLK_32K_USB2DEBUG 465 +#define PCLK_CSIDPHY 466 +#define PCLK_USBDPPHY 467 +#define CLK_PMUPHY_REF_SRC 468 +#define CLK_USBDP_COMBO_PHY_IMMORTAL 469 +#define CLK_HDMITXHDP 470 +#define PCLK_MPHY 471 +#define CLK_REF_OSC_MPHY 472 +#define CLK_REF_UFS_CLKOUT 473 +#define HCLK_PMU1_ROOT 474 +#define HCLK_PMU_CM0_ROOT 475 +#define CLK_200M_PMU_SRC 476 +#define CLK_100M_PMU_SRC 477 +#define CLK_50M_PMU_SRC 478 +#define FCLK_PMU_CM0_CORE 479 +#define CLK_PMU_CM0_RTC 480 +#define PCLK_PMU1 481 +#define CLK_PMU1 482 +#define PCLK_PMU1WDT 483 +#define TCLK_PMU1WDT 484 +#define PCLK_PMUTIMER 485 +#define CLK_PMUTIMER_ROOT 486 +#define CLK_PMUTIMER0 487 +#define CLK_PMUTIMER1 488 +#define PCLK_PMU1PWM 489 +#define CLK_PMU1PWM 490 +#define CLK_PMU1PWM_OSC 491 +#define PCLK_PMUPHY_ROOT 492 +#define PCLK_I2C0 493 +#define CLK_I2C0 494 +#define SCLK_UART1 495 +#define PCLK_UART1 496 +#define CLK_PMU1PWM_RC 497 +#define CLK_PDM0 498 +#define HCLK_PDM0 499 +#define MCLK_PDM0 500 +#define HCLK_VAD 501 +#define CLK_OSCCHK_PVTM 502 +#define CLK_PDM0_OUT 503 +#define CLK_HPTIMER_SRC 504 +#define PCLK_PMU0_ROOT 505 +#define PCLK_PMU0 506 +#define PCLK_GPIO0 507 +#define DBCLK_GPIO0 508 +#define CLK_OSC0_PMU1 509 +#define PCLK_PMU1_ROOT 510 +#define XIN_OSC0_DIV 511 +#define ACLK_USB 512 +#define ACLK_UFS 513 +#define ACLK_SDGMAC 514 +#define HCLK_SDGMAC 515 +#define PCLK_SDGMAC 516 +#define HCLK_VO1 517 +#define HCLK_VO0 518 +#define PCLK_CCI_ROOT 519 +#define ACLK_CCI_ROOT 520 +#define HCLK_VO0VOP_CHANNEL 521 +#define ACLK_VO0VOP_CHANNEL 522 +#define ACLK_TOP_MID 523 +#define ACLK_SECURE_HIGH 524 +#define CLK_USBPHY_REF_SRC 525 +#define CLK_PHY_REF_SRC 526 +#define CLK_CPLL_REF_SRC 527 +#define CLK_AUPLL_REF_SRC 528 +#define PCLK_SECURE_NS 529 +#define HCLK_SECURE_NS 530 +#define ACLK_SECURE_NS 531 +#define PCLK_OTPC_NS 532 +#define HCLK_CRYPTO_NS 533 +#define HCLK_TRNG_NS 534 +#define CLK_OTPC_NS 535 +#define SCLK_DSU 536 +#define SCLK_DDR 537 +#define ACLK_CRYPTO_NS 538 +#define CLK_PKA_CRYPTO_NS 539 +#define ACLK_RKVDEC_ROOT_BAK 540 +#define CLK_AUDIO_FRAC_0_SRC 541 +#define CLK_AUDIO_FRAC_1_SRC 542 +#define CLK_AUDIO_FRAC_2_SRC 543 +#define CLK_AUDIO_FRAC_3_SRC 544 +#define PCLK_HDPTX_APB 545 + +/* secure clk */ +#define CLK_STIMER0_ROOT 546 +#define CLK_STIMER1_ROOT 547 +#define PCLK_SECURE_S 548 +#define HCLK_SECURE_S 549 +#define ACLK_SECURE_S 550 +#define CLK_PKA_CRYPTO_S 551 +#define HCLK_VO1_S 552 +#define PCLK_VO1_S 553 +#define HCLK_VO0_S 554 +#define PCLK_VO0_S 555 +#define PCLK_KLAD 556 +#define HCLK_CRYPTO_S 557 +#define HCLK_KLAD 558 +#define ACLK_CRYPTO_S 559 +#define HCLK_TRNG_S 560 +#define PCLK_OTPC_S 561 +#define CLK_OTPC_S 562 +#define PCLK_WDT_S 563 +#define TCLK_WDT_S 564 +#define PCLK_HDCP0_TRNG 565 +#define PCLK_HDCP1_TRNG 566 +#define HCLK_HDCP_KEY0 567 +#define HCLK_HDCP_KEY1 568 +#define PCLK_EDP_S 569 +#define ACLK_KLAD 570 + +#endif diff --git a/include/dt-bindings/reset/rockchip,rk3576-cru.h b/include/dt-bindings/reset/rockchip,rk3576-cru.h new file mode 100644 index 000000000000..291fec0ecba0 --- /dev/null +++ b/include/dt-bindings/reset/rockchip,rk3576-cru.h @@ -0,0 +1,564 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* +* Copyright (c) 2023 Rockchip Electronics Co. Ltd. +* Copyright (c) 2024 Collabora Ltd. +* +* Author: Elaine Zhang +* Author: Detlev Casanova +*/ + +#ifndef _DT_BINDINGS_RESET_ROCKCHIP_RK3576_H +#define _DT_BINDINGS_RESET_ROCKCHIP_RK3576_H + +#define SRST_A_TOP_BIU 0 +#define SRST_P_TOP_BIU 1 +#define SRST_A_TOP_MID_BIU 2 +#define SRST_A_SECURE_HIGH_BIU 3 +#define SRST_H_TOP_BIU 4 + +#define SRST_H_VO0VOP_CHANNEL_BIU 5 +#define SRST_A_VO0VOP_CHANNEL_BIU 6 + +#define SRST_BISRINTF 7 + +#define SRST_H_AUDIO_BIU 8 +#define SRST_H_ASRC_2CH_0 9 +#define SRST_H_ASRC_2CH_1 10 +#define SRST_H_ASRC_4CH_0 11 +#define SRST_H_ASRC_4CH_1 12 +#define SRST_ASRC_2CH_0 13 +#define SRST_ASRC_2CH_1 14 +#define SRST_ASRC_4CH_0 15 +#define SRST_ASRC_4CH_1 16 +#define SRST_M_SAI0_8CH 17 +#define SRST_H_SAI0_8CH 18 +#define SRST_H_SPDIF_RX0 19 +#define SRST_M_SPDIF_RX0 20 + +#define SRST_H_SPDIF_RX1 21 +#define SRST_M_SPDIF_RX1 22 +#define SRST_M_SAI1_8CH 23 +#define SRST_H_SAI1_8CH 24 +#define SRST_M_SAI2_2CH 25 +#define SRST_H_SAI2_2CH 26 +#define SRST_M_SAI3_2CH 27 +#define SRST_H_SAI3_2CH 28 + +#define SRST_M_SAI4_2CH 29 +#define SRST_H_SAI4_2CH 30 +#define SRST_H_ACDCDIG_DSM 31 +#define SRST_M_ACDCDIG_DSM 32 +#define SRST_PDM1 33 +#define SRST_H_PDM1 34 +#define SRST_M_PDM1 35 +#define SRST_H_SPDIF_TX0 36 +#define SRST_M_SPDIF_TX0 37 +#define SRST_H_SPDIF_TX1 38 +#define SRST_M_SPDIF_TX1 39 + +#define SRST_A_BUS_BIU 40 +#define SRST_P_BUS_BIU 41 +#define SRST_P_CRU 42 +#define SRST_H_CAN0 43 +#define SRST_CAN0 44 +#define SRST_H_CAN1 45 +#define SRST_CAN1 46 +#define SRST_P_INTMUX2BUS 47 +#define SRST_P_VCCIO_IOC 48 +#define SRST_H_BUS_BIU 49 +#define SRST_KEY_SHIFT 50 + +#define SRST_P_I2C1 51 +#define SRST_P_I2C2 52 +#define SRST_P_I2C3 53 +#define SRST_P_I2C4 54 +#define SRST_P_I2C5 55 +#define SRST_P_I2C6 56 +#define SRST_P_I2C7 57 +#define SRST_P_I2C8 58 +#define SRST_P_I2C9 59 +#define SRST_P_WDT_BUSMCU 60 +#define SRST_T_WDT_BUSMCU 61 +#define SRST_A_GIC 62 +#define SRST_I2C1 63 +#define SRST_I2C2 64 +#define SRST_I2C3 65 +#define SRST_I2C4 66 + +#define SRST_I2C5 67 +#define SRST_I2C6 68 +#define SRST_I2C7 69 +#define SRST_I2C8 70 +#define SRST_I2C9 71 +#define SRST_P_SARADC 72 +#define SRST_SARADC 73 +#define SRST_P_TSADC 74 +#define SRST_TSADC 75 +#define SRST_P_UART0 76 +#define SRST_P_UART2 77 +#define SRST_P_UART3 78 +#define SRST_P_UART4 79 +#define SRST_P_UART5 80 +#define SRST_P_UART6 81 + +#define SRST_P_UART7 82 +#define SRST_P_UART8 83 +#define SRST_P_UART9 84 +#define SRST_P_UART10 85 +#define SRST_P_UART11 86 +#define SRST_S_UART0 87 +#define SRST_S_UART2 88 +#define SRST_S_UART3 89 +#define SRST_S_UART4 90 +#define SRST_S_UART5 91 + +#define SRST_S_UART6 92 +#define SRST_S_UART7 93 +#define SRST_S_UART8 94 +#define SRST_S_UART9 95 +#define SRST_S_UART10 96 +#define SRST_S_UART11 97 +#define SRST_P_SPI0 98 +#define SRST_P_SPI1 99 +#define SRST_P_SPI2 100 + +#define SRST_P_SPI3 101 +#define SRST_P_SPI4 102 +#define SRST_SPI0 103 +#define SRST_SPI1 104 +#define SRST_SPI2 105 +#define SRST_SPI3 106 +#define SRST_SPI4 107 +#define SRST_P_WDT0 108 +#define SRST_T_WDT0 109 +#define SRST_P_SYS_GRF 110 +#define SRST_P_PWM1 111 +#define SRST_PWM1 112 + +#define SRST_P_BUSTIMER0 113 +#define SRST_P_BUSTIMER1 114 +#define SRST_TIMER0 115 +#define SRST_TIMER1 116 +#define SRST_TIMER2 117 +#define SRST_TIMER3 118 +#define SRST_TIMER4 119 +#define SRST_TIMER5 120 +#define SRST_P_BUSIOC 121 +#define SRST_P_MAILBOX0 122 +#define SRST_P_GPIO1 123 + +#define SRST_GPIO1 124 +#define SRST_P_GPIO2 125 +#define SRST_GPIO2 126 +#define SRST_P_GPIO3 127 +#define SRST_GPIO3 128 +#define SRST_P_GPIO4 129 +#define SRST_GPIO4 130 +#define SRST_A_DECOM 131 +#define SRST_P_DECOM 132 +#define SRST_D_DECOM 133 +#define SRST_TIMER6 134 +#define SRST_TIMER7 135 +#define SRST_TIMER8 136 +#define SRST_TIMER9 137 +#define SRST_TIMER10 138 + +#define SRST_TIMER11 139 +#define SRST_A_DMAC0 140 +#define SRST_A_DMAC1 141 +#define SRST_A_DMAC2 142 +#define SRST_A_SPINLOCK 143 +#define SRST_REF_PVTPLL_BUS 144 +#define SRST_H_I3C0 145 +#define SRST_H_I3C1 146 +#define SRST_H_BUS_CM0_BIU 147 +#define SRST_F_BUS_CM0_CORE 148 +#define SRST_T_BUS_CM0_JTAG 149 + +#define SRST_P_INTMUX2PMU 150 +#define SRST_P_INTMUX2DDR 151 +#define SRST_P_PVTPLL_BUS 152 +#define SRST_P_PWM2 153 +#define SRST_PWM2 154 +#define SRST_FREQ_PWM1 155 +#define SRST_COUNTER_PWM1 156 +#define SRST_I3C0 157 +#define SRST_I3C1 158 + +#define SRST_P_DDR_MON_CH0 159 +#define SRST_P_DDR_BIU 160 +#define SRST_P_DDR_UPCTL_CH0 161 +#define SRST_TM_DDR_MON_CH0 162 +#define SRST_A_DDR_BIU 163 +#define SRST_DFI_CH0 164 +#define SRST_DDR_MON_CH0 165 +#define SRST_P_DDR_HWLP_CH0 166 +#define SRST_P_DDR_MON_CH1 167 +#define SRST_P_DDR_HWLP_CH1 168 + +#define SRST_P_DDR_UPCTL_CH1 169 +#define SRST_TM_DDR_MON_CH1 170 +#define SRST_DFI_CH1 171 +#define SRST_A_DDR01_MSCH0 172 +#define SRST_A_DDR01_MSCH1 173 +#define SRST_DDR_MON_CH1 174 +#define SRST_DDR_SCRAMBLE_CH0 175 +#define SRST_DDR_SCRAMBLE_CH1 176 +#define SRST_P_AHB2APB 177 +#define SRST_H_AHB2APB 178 +#define SRST_H_DDR_BIU 179 +#define SRST_F_DDR_CM0_CORE 180 + +#define SRST_P_DDR01_MSCH0 181 +#define SRST_P_DDR01_MSCH1 182 +#define SRST_DDR_TIMER0 183 +#define SRST_DDR_TIMER1 184 +#define SRST_T_WDT_DDR 185 +#define SRST_P_WDT 186 +#define SRST_P_TIMER 187 +#define SRST_T_DDR_CM0_JTAG 188 +#define SRST_P_DDR_GRF 189 + +#define SRST_DDR_UPCTL_CH0 190 +#define SRST_A_DDR_UPCTL_0_CH0 191 +#define SRST_A_DDR_UPCTL_1_CH0 192 +#define SRST_A_DDR_UPCTL_2_CH0 193 +#define SRST_A_DDR_UPCTL_3_CH0 194 +#define SRST_A_DDR_UPCTL_4_CH0 195 + +#define SRST_DDR_UPCTL_CH1 196 +#define SRST_A_DDR_UPCTL_0_CH1 197 +#define SRST_A_DDR_UPCTL_1_CH1 198 +#define SRST_A_DDR_UPCTL_2_CH1 199 +#define SRST_A_DDR_UPCTL_3_CH1 200 +#define SRST_A_DDR_UPCTL_4_CH1 201 + +#define SRST_REF_PVTPLL_DDR 202 +#define SRST_P_PVTPLL_DDR 203 + +#define SRST_A_RKNN0 204 +#define SRST_A_RKNN0_BIU 205 +#define SRST_L_RKNN0_BIU 206 + +#define SRST_A_RKNN1 207 +#define SRST_A_RKNN1_BIU 208 +#define SRST_L_RKNN1_BIU 209 + +#define SRST_NPU_DAP 210 +#define SRST_L_NPUSUBSYS_BIU 211 +#define SRST_P_NPUTOP_BIU 212 +#define SRST_P_NPU_TIMER 213 +#define SRST_NPUTIMER0 214 +#define SRST_NPUTIMER1 215 +#define SRST_P_NPU_WDT 216 +#define SRST_T_NPU_WDT 217 + +#define SRST_A_RKNN_CBUF 218 +#define SRST_A_RVCORE0 219 +#define SRST_P_NPU_GRF 220 +#define SRST_P_PVTPLL_NPU 221 +#define SRST_NPU_PVTPLL 222 +#define SRST_H_NPU_CM0_BIU 223 +#define SRST_F_NPU_CM0_CORE 224 +#define SRST_T_NPU_CM0_JTAG 225 +#define SRST_A_RKNNTOP_BIU 226 +#define SRST_H_RKNN_CBUF 227 +#define SRST_H_RKNNTOP_BIU 228 + +#define SRST_H_NVM_BIU 229 +#define SRST_A_NVM_BIU 230 +#define SRST_S_FSPI 231 +#define SRST_H_FSPI 232 +#define SRST_C_EMMC 233 +#define SRST_H_EMMC 234 +#define SRST_A_EMMC 235 +#define SRST_B_EMMC 236 +#define SRST_T_EMMC 237 + +#define SRST_P_GRF 238 +#define SRST_P_PHP_BIU 239 +#define SRST_A_PHP_BIU 240 +#define SRST_P_PCIE0 241 +#define SRST_PCIE0_POWER_UP 242 + +#define SRST_A_USB3OTG1 243 +#define SRST_A_MMU0 244 +#define SRST_A_SLV_MMU0 245 +#define SRST_A_MMU1 246 + +#define SRST_A_SLV_MMU1 247 +#define SRST_P_PCIE1 248 +#define SRST_PCIE1_POWER_UP 249 + +#define SRST_RXOOB0 250 +#define SRST_RXOOB1 251 +#define SRST_PMALIVE0 252 +#define SRST_PMALIVE1 253 +#define SRST_A_SATA0 254 +#define SRST_A_SATA1 255 +#define SRST_ASIC1 256 +#define SRST_ASIC0 257 + +#define SRST_P_CSIDPHY1 258 +#define SRST_SCAN_CSIDPHY1 259 + +#define SRST_P_SDGMAC_GRF 260 +#define SRST_P_SDGMAC_BIU 261 +#define SRST_A_SDGMAC_BIU 262 +#define SRST_H_SDGMAC_BIU 263 +#define SRST_A_GMAC0 264 +#define SRST_A_GMAC1 265 +#define SRST_P_GMAC0 266 +#define SRST_P_GMAC1 267 +#define SRST_H_SDIO 268 + +#define SRST_H_SDMMC0 269 +#define SRST_S_FSPI1 270 +#define SRST_H_FSPI1 271 +#define SRST_A_DSMC_BIU 272 +#define SRST_A_DSMC 273 +#define SRST_P_DSMC 274 +#define SRST_H_HSGPIO 275 +#define SRST_HSGPIO 276 +#define SRST_A_HSGPIO 277 + +#define SRST_H_RKVDEC 278 +#define SRST_H_RKVDEC_BIU 279 +#define SRST_A_RKVDEC_BIU 280 +#define SRST_RKVDEC_HEVC_CA 281 +#define SRST_RKVDEC_CORE 282 + +#define SRST_A_USB_BIU 283 +#define SRST_P_USBUFS_BIU 284 +#define SRST_A_USB3OTG0 285 +#define SRST_A_UFS_BIU 286 +#define SRST_A_MMU2 287 +#define SRST_A_SLV_MMU2 288 +#define SRST_A_UFS_SYS 289 + +#define SRST_A_UFS 290 +#define SRST_P_USBUFS_GRF 291 +#define SRST_P_UFS_GRF 292 + +#define SRST_H_VPU_BIU 293 +#define SRST_A_JPEG_BIU 294 +#define SRST_A_RGA_BIU 295 +#define SRST_A_VDPP_BIU 296 +#define SRST_A_EBC_BIU 297 +#define SRST_H_RGA2E_0 298 +#define SRST_A_RGA2E_0 299 +#define SRST_CORE_RGA2E_0 300 + +#define SRST_A_JPEG 301 +#define SRST_H_JPEG 302 +#define SRST_H_VDPP 303 +#define SRST_A_VDPP 304 +#define SRST_CORE_VDPP 305 +#define SRST_H_RGA2E_1 306 +#define SRST_A_RGA2E_1 307 +#define SRST_CORE_RGA2E_1 308 +#define SRST_H_EBC 309 +#define SRST_A_EBC 310 +#define SRST_D_EBC 311 + +#define SRST_H_VEPU0_BIU 312 +#define SRST_A_VEPU0_BIU 313 +#define SRST_H_VEPU0 314 +#define SRST_A_VEPU0 315 +#define SRST_VEPU0_CORE 316 + +#define SRST_A_VI_BIU 317 +#define SRST_H_VI_BIU 318 +#define SRST_P_VI_BIU 319 +#define SRST_D_VICAP 320 +#define SRST_A_VICAP 321 +#define SRST_H_VICAP 322 +#define SRST_ISP0 323 +#define SRST_ISP0_VICAP 324 + +#define SRST_CORE_VPSS 325 +#define SRST_P_CSI_HOST_0 326 +#define SRST_P_CSI_HOST_1 327 +#define SRST_P_CSI_HOST_2 328 +#define SRST_P_CSI_HOST_3 329 +#define SRST_P_CSI_HOST_4 330 + +#define SRST_CIFIN 331 +#define SRST_VICAP_I0CLK 332 +#define SRST_VICAP_I1CLK 333 +#define SRST_VICAP_I2CLK 334 +#define SRST_VICAP_I3CLK 335 +#define SRST_VICAP_I4CLK 336 + +#define SRST_A_VOP_BIU 337 +#define SRST_A_VOP2_BIU 338 +#define SRST_H_VOP_BIU 339 +#define SRST_P_VOP_BIU 340 +#define SRST_H_VOP 341 +#define SRST_A_VOP 342 +#define SRST_D_VP0 343 + +#define SRST_D_VP1 344 +#define SRST_D_VP2 345 +#define SRST_P_VOP2_BIU 346 +#define SRST_P_VOPGRF 347 + +#define SRST_H_VO0_BIU 348 +#define SRST_P_VO0_BIU 349 +#define SRST_A_HDCP0_BIU 350 +#define SRST_P_VO0_GRF 351 +#define SRST_A_HDCP0 352 +#define SRST_H_HDCP0 353 +#define SRST_HDCP0 354 + +#define SRST_P_DSIHOST0 355 +#define SRST_DSIHOST0 356 +#define SRST_P_HDMITX0 357 +#define SRST_HDMITX0_REF 358 +#define SRST_P_EDP0 359 +#define SRST_EDP0_24M 360 + +#define SRST_M_SAI5_8CH 361 +#define SRST_H_SAI5_8CH 362 +#define SRST_M_SAI6_8CH 363 +#define SRST_H_SAI6_8CH 364 +#define SRST_H_SPDIF_TX2 365 +#define SRST_M_SPDIF_TX2 366 +#define SRST_H_SPDIF_RX2 367 +#define SRST_M_SPDIF_RX2 368 + +#define SRST_H_SAI8_8CH 369 +#define SRST_M_SAI8_8CH 370 + +#define SRST_H_VO1_BIU 371 +#define SRST_P_VO1_BIU 372 +#define SRST_M_SAI7_8CH 373 +#define SRST_H_SAI7_8CH 374 +#define SRST_H_SPDIF_TX3 375 +#define SRST_H_SPDIF_TX4 376 +#define SRST_H_SPDIF_TX5 377 +#define SRST_M_SPDIF_TX3 378 + +#define SRST_DP0 379 +#define SRST_P_VO1_GRF 380 +#define SRST_A_HDCP1_BIU 381 +#define SRST_A_HDCP1 382 +#define SRST_H_HDCP1 383 +#define SRST_HDCP1 384 +#define SRST_H_SAI9_8CH 385 +#define SRST_M_SAI9_8CH 386 +#define SRST_M_SPDIF_TX4 387 +#define SRST_M_SPDIF_TX5 388 + +#define SRST_GPU 389 +#define SRST_A_S_GPU_BIU 390 +#define SRST_A_M0_GPU_BIU 391 +#define SRST_P_GPU_BIU 392 +#define SRST_P_GPU_GRF 393 +#define SRST_GPU_PVTPLL 394 +#define SRST_P_PVTPLL_GPU 395 + +#define SRST_A_CENTER_BIU 396 +#define SRST_A_DMA2DDR 397 +#define SRST_A_DDR_SHAREMEM 398 +#define SRST_A_DDR_SHAREMEM_BIU 399 +#define SRST_H_CENTER_BIU 400 +#define SRST_P_CENTER_GRF 401 +#define SRST_P_DMA2DDR 402 +#define SRST_P_SHAREMEM 403 +#define SRST_P_CENTER_BIU 404 + +#define SRST_LINKSYM_HDMITXPHY0 405 + +#define SRST_DP0_PIXELCLK 406 +#define SRST_PHY_DP0_TX 407 +#define SRST_DP1_PIXELCLK 408 +#define SRST_DP2_PIXELCLK 409 + +#define SRST_H_VEPU1_BIU 410 +#define SRST_A_VEPU1_BIU 411 +#define SRST_H_VEPU1 412 +#define SRST_A_VEPU1 413 +#define SRST_VEPU1_CORE 414 + +#define SRST_P_PHPPHY_CRU 415 +#define SRST_P_APB2ASB_SLV_CHIP_TOP 416 +#define SRST_P_PCIE2_COMBOPHY0 417 +#define SRST_P_PCIE2_COMBOPHY0_GRF 418 +#define SRST_P_PCIE2_COMBOPHY1 419 +#define SRST_P_PCIE2_COMBOPHY1_GRF 420 + +#define SRST_PCIE0_PIPE_PHY 421 +#define SRST_PCIE1_PIPE_PHY 422 + +#define SRST_H_CRYPTO_NS 423 +#define SRST_H_TRNG_NS 424 +#define SRST_P_OTPC_NS 425 +#define SRST_OTPC_NS 426 + +#define SRST_P_HDPTX_GRF 427 +#define SRST_P_HDPTX_APB 428 +#define SRST_P_MIPI_DCPHY 429 +#define SRST_P_DCPHY_GRF 430 +#define SRST_P_BOT0_APB2ASB 431 +#define SRST_P_BOT1_APB2ASB 432 +#define SRST_USB2DEBUG 433 +#define SRST_P_CSIPHY_GRF 434 +#define SRST_P_CSIPHY 435 +#define SRST_P_USBPHY_GRF_0 436 +#define SRST_P_USBPHY_GRF_1 437 +#define SRST_P_USBDP_GRF 438 +#define SRST_P_USBDPPHY 439 +#define SRST_USBDP_COMBO_PHY_INIT 440 + +#define SRST_USBDP_COMBO_PHY_CMN 441 +#define SRST_USBDP_COMBO_PHY_LANE 442 +#define SRST_USBDP_COMBO_PHY_PCS 443 +#define SRST_M_MIPI_DCPHY 444 +#define SRST_S_MIPI_DCPHY 445 +#define SRST_SCAN_CSIPHY 446 +#define SRST_P_VCCIO6_IOC 447 +#define SRST_OTGPHY_0 448 +#define SRST_OTGPHY_1 449 +#define SRST_HDPTX_INIT 450 +#define SRST_HDPTX_CMN 451 +#define SRST_HDPTX_LANE 452 +#define SRST_HDMITXHDP 453 + +#define SRST_MPHY_INIT 454 +#define SRST_P_MPHY_GRF 455 +#define SRST_P_VCCIO7_IOC 456 + +#define SRST_H_PMU1_BIU 457 +#define SRST_P_PMU1_NIU 458 +#define SRST_H_PMU_CM0_BIU 459 +#define SRST_PMU_CM0_CORE 460 +#define SRST_PMU_CM0_JTAG 461 + +#define SRST_P_CRU_PMU1 462 +#define SRST_P_PMU1_GRF 463 +#define SRST_P_PMU1_IOC 464 +#define SRST_P_PMU1WDT 465 +#define SRST_T_PMU1WDT 466 +#define SRST_P_PMUTIMER 467 +#define SRST_PMUTIMER0 468 +#define SRST_PMUTIMER1 469 +#define SRST_P_PMU1PWM 470 +#define SRST_PMU1PWM 471 + +#define SRST_P_I2C0 472 +#define SRST_I2C0 473 +#define SRST_S_UART1 474 +#define SRST_P_UART1 475 +#define SRST_PDM0 476 +#define SRST_H_PDM0 477 + +#define SRST_M_PDM0 478 +#define SRST_H_VAD 479 + +#define SRST_P_PMU0GRF 480 +#define SRST_P_PMU0IOC 481 +#define SRST_P_GPIO0 482 +#define SRST_DB_GPIO0 483 + +#endif -- cgit v1.2.3 From b31c9d9dc343146b9f4ce67b4eee748c49296e99 Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Tue, 27 Aug 2024 17:19:29 +0900 Subject: HID: hidraw: add HIDIOCREVOKE ioctl There is a need for userspace applications to open HID devices directly. Use-cases include configuration of gaming mice or direct access to joystick devices. The latter is currently handled by the uaccess tag in systemd, other devices include more custom/local configurations or just sudo. A better approach is what we already have for evdev devices: give the application a file descriptor and revoke it when it may no longer access that device. This patch is the hidraw equivalent to the EVIOCREVOKE ioctl, see commit c7dc65737c9a ("Input: evdev - add EVIOCREVOKE ioctl") for full details. An MR for systemd-logind has been filed here: https://github.com/systemd/systemd/pull/33970 Signed-off-by: Peter Hutterer Link: https://patch.msgid.link/20240827-hidraw-revoke-v5-1-d004a7451aea@kernel.org Signed-off-by: Benjamin Tissoires --- include/linux/hidraw.h | 1 + include/uapi/linux/hidraw.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hidraw.h b/include/linux/hidraw.h index cd67f4ca5599..18fd30a288de 100644 --- a/include/linux/hidraw.h +++ b/include/linux/hidraw.h @@ -32,6 +32,7 @@ struct hidraw_list { struct hidraw *hidraw; struct list_head node; struct mutex read_mutex; + bool revoked; }; #ifdef CONFIG_HIDRAW diff --git a/include/uapi/linux/hidraw.h b/include/uapi/linux/hidraw.h index 33ebad81720a..d5ee269864e0 100644 --- a/include/uapi/linux/hidraw.h +++ b/include/uapi/linux/hidraw.h @@ -46,6 +46,7 @@ struct hidraw_devinfo { /* The first byte of SOUTPUT and GOUTPUT is the report number */ #define HIDIOCSOUTPUT(len) _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x0B, len) #define HIDIOCGOUTPUT(len) _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x0C, len) +#define HIDIOCREVOKE _IOW('H', 0x0D, int) /* Revoke device access */ #define HIDRAW_FIRST_MINOR 0 #define HIDRAW_MAX_DEVICES 64 -- cgit v1.2.3 From b35243a447b9fe6457fa8e1352152b818436ba5a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 Aug 2024 19:37:54 +0200 Subject: block: rework bio splitting The current setup with bio_may_exceed_limit and __bio_split_to_limits is a bit of a mess. Change it so that __bio_split_to_limits does all the work and is just a variant of bio_split_to_limits that returns nr_segs. This is done by inlining it and instead have the various bio_split_* helpers directly submit the potentially split bios. To support btrfs, the rw version has a lower level helper split out that just returns the offset to split. This turns out to nicely clean up the btrfs flow as well. Signed-off-by: Christoph Hellwig Acked-by: David Sterba Reviewed-by: Damien Le Moal Tested-by: Hans Holmberg Reviewed-by: Hans Holmberg Link: https://lore.kernel.org/r/20240826173820.1690925-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index a46e2047bea4..faceadb040f9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -324,8 +324,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) void bio_trim(struct bio *bio, sector_t offset, sector_t size); extern struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs); -struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, - unsigned *segs, struct bio_set *bs, unsigned max_bytes); +int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, + unsigned *segs, unsigned max_bytes); /** * bio_next_split - get next @sectors from a bio, splitting if necessary -- cgit v1.2.3 From 379b122a3ec8033aa43cb70e8ecb6fb7f98aa68f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 Aug 2024 19:37:55 +0200 Subject: block: constify the lim argument to queue_limits_max_zone_append_sectors queue_limits_max_zone_append_sectors doesn't change the lim argument, so mark it as const. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Tested-by: Hans Holmberg Reviewed-by: Hans Holmberg Link: https://lore.kernel.org/r/20240826173820.1690925-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e85ec73a07d5..ec3ea5d1f99d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1187,7 +1187,8 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q) return q->limits.max_segment_size; } -static inline unsigned int queue_limits_max_zone_append_sectors(struct queue_limits *l) +static inline unsigned int +queue_limits_max_zone_append_sectors(const struct queue_limits *l) { unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors); -- cgit v1.2.3 From 2e1a57d5b0adbb5bd1d85245ec29b6d3cc7edc69 Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Wed, 21 Aug 2024 16:54:52 -0500 Subject: mfd: axp20x: Add ADC, BAT, and USB cells for AXP717 Add support for the AXP717 PMIC to utilize the ADC (for reading voltage, current, and temperature information from the PMIC) as well as the USB charger and battery. Signed-off-by: Chris Morgan Link: https://lore.kernel.org/r/20240821215456.962564-12-macroalpha82@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/axp20x.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 8c0a33a2e9ce..9b7ef473adcb 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -115,6 +115,16 @@ enum axp20x_variants { #define AXP313A_IRQ_STATE 0x21 #define AXP717_ON_INDICATE 0x00 +#define AXP717_PMU_STATUS_2 0x01 +#define AXP717_BC_DETECT 0x05 +#define AXP717_PMU_FAULT 0x08 +#define AXP717_MODULE_EN_CONTROL_1 0x0b +#define AXP717_MIN_SYS_V_CONTROL 0x15 +#define AXP717_INPUT_VOL_LIMIT_CTRL 0x16 +#define AXP717_INPUT_CUR_LIMIT_CTRL 0x17 +#define AXP717_MODULE_EN_CONTROL_2 0x19 +#define AXP717_BOOST_CONTROL 0x1e +#define AXP717_VSYS_V_POWEROFF 0x24 #define AXP717_IRQ0_EN 0x40 #define AXP717_IRQ1_EN 0x41 #define AXP717_IRQ2_EN 0x42 @@ -125,6 +135,9 @@ enum axp20x_variants { #define AXP717_IRQ2_STATE 0x4a #define AXP717_IRQ3_STATE 0x4b #define AXP717_IRQ4_STATE 0x4c +#define AXP717_ICC_CHG_SET 0x62 +#define AXP717_ITERM_CHG_SET 0x63 +#define AXP717_CV_CHG_SET 0x64 #define AXP717_DCDC_OUTPUT_CONTROL 0x80 #define AXP717_DCDC1_CONTROL 0x83 #define AXP717_DCDC2_CONTROL 0x84 @@ -145,6 +158,19 @@ enum axp20x_variants { #define AXP717_CLDO3_CONTROL 0x9d #define AXP717_CLDO4_CONTROL 0x9e #define AXP717_CPUSLDO_CONTROL 0x9f +#define AXP717_BATT_PERCENT_DATA 0xa4 +#define AXP717_ADC_CH_EN_CONTROL 0xc0 +#define AXP717_BATT_V_H 0xc4 +#define AXP717_BATT_V_L 0xc5 +#define AXP717_VBUS_V_H 0xc6 +#define AXP717_VBUS_V_L 0xc7 +#define AXP717_VSYS_V_H 0xc8 +#define AXP717_VSYS_V_L 0xc9 +#define AXP717_BATT_CHRG_I_H 0xca +#define AXP717_BATT_CHRG_I_L 0xcb +#define AXP717_ADC_DATA_SEL 0xcd +#define AXP717_ADC_DATA_H 0xce +#define AXP717_ADC_DATA_L 0xcf #define AXP806_STARTUP_SRC 0x00 #define AXP806_CHIP_ID 0x03 -- cgit v1.2.3 From 36b531aace379f45e4ca763f7efb8d0295216e83 Mon Sep 17 00:00:00 2001 From: Jiaqing Zhao Date: Tue, 27 Aug 2024 02:58:20 +0000 Subject: ACPICA: Detect FACS in reduced hardware build According to Section 5.2.10 of ACPI Specification, FACS is optional in reduced hardware model. Enable the detection for "Hardware-reduced ACPI support only" build (CONFIG_ACPI_REDUCED_HARDWARE_ONLY=y) also. Link: https://github.com/acpica/acpica/commit/ee53ed6b5452612bb44af542b68d605f8b2b1104 Signed-off-by: Jiaqing Zhao Link: https://patch.msgid.link/20240827025821.2099068-2-jiaqing.zhao@linux.intel.com Signed-off-by: Rafael J. Wysocki --- include/acpi/acconfig.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h index d768d9c568cf..2da5f4a6e814 100644 --- a/include/acpi/acconfig.h +++ b/include/acpi/acconfig.h @@ -67,7 +67,6 @@ * General Purpose Events (GPEs) * Global Lock * ACPI PM timer - * FACS table (Waking vectors and Global Lock) */ #ifndef ACPI_REDUCED_HARDWARE #define ACPI_REDUCED_HARDWARE FALSE -- cgit v1.2.3 From 36b5c1dc4b22913f9813a94350e24f6744db38a8 Mon Sep 17 00:00:00 2001 From: Jiaqing Zhao Date: Tue, 27 Aug 2024 02:58:21 +0000 Subject: ACPICA: Allow setting waking vector on reduced hardware platforms Allow setting waking vector in FACS table on reduced hardware platforms to support S3 wakeup. Link: https://github.com/acpica/acpica/commit/ee53ed6b5452612bb44af542b68d605f8b2b1104 Signed-off-by: Jiaqing Zhao Link: https://patch.msgid.link/20240827025821.2099068-3-jiaqing.zhao@linux.intel.com Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 80dc36f9d527..99bb45a46600 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -881,10 +881,10 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_leave_sleep_state_prep(u8 sleep_state)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_leave_sleep_state(u8 sleep_state)) -ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status - acpi_set_firmware_waking_vector - (acpi_physical_address physical_address, - acpi_physical_address physical_address64)) +ACPI_EXTERNAL_RETURN_STATUS(acpi_status + acpi_set_firmware_waking_vector + (acpi_physical_address physical_address, + acpi_physical_address physical_address64)) /* * ACPI Timer interfaces */ -- cgit v1.2.3 From c0390d541128e8820af8177a572d9d87ff68a3bb Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 23 Aug 2024 21:06:58 +0200 Subject: fs: pack struct file Now that we shrunk struct file to 192 bytes aka 3 cachelines reorder struct file to not leave any holes or have members cross cachelines. Add a short comment to each of the fields and mark the cachelines. It's possible that we may have to tweak this based on profiling in the future. So far I had Jens test this comparing io_uring with non-fixed and fixed files and it improved performance. The layout is a combination of Jens' and my changes. Link: https: //lore.kernel.org/r/20240824-peinigen-hocken-7384b977c643@brauner Signed-off-by: Christian Brauner --- include/linux/fs.h | 91 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 095a956aeb29..af8bbd4eeb3a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -987,52 +987,63 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) index < ra->start + ra->size); } -/* - * f_{lock,count,pos_lock} members can be highly contended and share - * the same cacheline. f_{lock,mode} are very frequently used together - * and so share the same cacheline as well. The read-mostly - * f_{path,inode,op} are kept on a separate cacheline. +/** + * struct file - Represents a file + * @f_count: reference count + * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context. + * @f_mode: FMODE_* flags often used in hotpaths + * @f_op: file operations + * @f_mapping: Contents of a cacheable, mappable object. + * @private_data: filesystem or driver specific data + * @f_inode: cached inode + * @f_flags: file flags + * @f_iocb_flags: iocb flags + * @f_cred: stashed credentials of creator/opener + * @f_path: path of the file + * @f_pos_lock: lock protecting file position + * @f_pos: file position + * @f_version: file version + * @f_security: LSM security context of this file + * @f_owner: file owner + * @f_wb_err: writeback error + * @f_sb_err: per sb writeback errors + * @f_ep: link of all epoll hooks for this file + * @f_task_work: task work entry point + * @f_llist: work queue entrypoint + * @f_ra: file's readahead state */ struct file { - union { - /* fput() uses task work when closing and freeing file (default). */ - struct callback_head f_task_work; - /* fput() must use workqueue (most kernel threads). */ - struct llist_node f_llist; - /* Invalid after last fput(). */ - struct file_ra_state f_ra; - }; - /* - * Protects f_ep, f_flags. - * Must not be taken from IRQ context. - */ - spinlock_t f_lock; - fmode_t f_mode; - atomic_long_t f_count; - struct mutex f_pos_lock; - loff_t f_pos; - unsigned int f_flags; - unsigned int f_iocb_flags; - struct fown_struct *f_owner; - const struct cred *f_cred; - struct path f_path; - struct inode *f_inode; /* cached value */ + atomic_long_t f_count; + spinlock_t f_lock; + fmode_t f_mode; const struct file_operations *f_op; - - u64 f_version; + struct address_space *f_mapping; + void *private_data; + struct inode *f_inode; + unsigned int f_flags; + unsigned int f_iocb_flags; + const struct cred *f_cred; + /* --- cacheline 1 boundary (64 bytes) --- */ + struct path f_path; + struct mutex f_pos_lock; + loff_t f_pos; + u64 f_version; + /* --- cacheline 2 boundary (128 bytes) --- */ #ifdef CONFIG_SECURITY - void *f_security; + void *f_security; #endif - /* needed for tty driver, and maybe others */ - void *private_data; - + struct fown_struct *f_owner; + errseq_t f_wb_err; + errseq_t f_sb_err; #ifdef CONFIG_EPOLL - /* Used by fs/eventpoll.c to link all the hooks to this file */ - struct hlist_head *f_ep; -#endif /* #ifdef CONFIG_EPOLL */ - struct address_space *f_mapping; - errseq_t f_wb_err; - errseq_t f_sb_err; /* for syncfs */ + struct hlist_head *f_ep; +#endif + union { + struct callback_head f_task_work; + struct llist_node f_llist; + struct file_ra_state f_ra; + }; + /* --- cacheline 3 boundary (192 bytes) --- */ } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ -- cgit v1.2.3 From d345bd2e9834e2da505977e154a1c179c793b7b2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 28 Aug 2024 12:56:24 +0200 Subject: mm: add kmem_cache_create_rcu() When a kmem cache is created with SLAB_TYPESAFE_BY_RCU the free pointer must be located outside of the object because we don't know what part of the memory can safely be overwritten as it may be needed to prevent object recycling. That has the consequence that SLAB_TYPESAFE_BY_RCU may end up adding a new cacheline. This is the case for e.g., struct file. After having it shrunk down by 40 bytes and having it fit in three cachelines we still have SLAB_TYPESAFE_BY_RCU adding a fourth cacheline because it needs to accommodate the free pointer. Add a new kmem_cache_create_rcu() function that allows the caller to specify an offset where the free pointer is supposed to be placed. Link: https://lore.kernel.org/r/20240828-work-kmem_cache-rcu-v3-2-5460bc1f09f6@kernel.org Acked-by: Mike Rapoport (Microsoft) Reviewed-by: Vlastimil Babka Signed-off-by: Christian Brauner --- include/linux/slab.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index eb2bf4629157..5b2da2cf31a8 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -212,6 +212,12 @@ enum _slab_flag_bits { #define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED #endif +/* + * freeptr_t represents a SLUB freelist pointer, which might be encoded + * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. + */ +typedef struct { unsigned long v; } freeptr_t; + /* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. * @@ -242,6 +248,9 @@ struct kmem_cache *kmem_cache_create_usercopy(const char *name, slab_flags_t flags, unsigned int useroffset, unsigned int usersize, void (*ctor)(void *)); +struct kmem_cache *kmem_cache_create_rcu(const char *name, unsigned int size, + unsigned int freeptr_offset, + slab_flags_t flags); void kmem_cache_destroy(struct kmem_cache *s); int kmem_cache_shrink(struct kmem_cache *s); -- cgit v1.2.3 From ea566e18b4deea6e998088de4f1a76d1f39c8d3f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 28 Aug 2024 12:56:25 +0200 Subject: fs: use kmem_cache_create_rcu() Switch to the new kmem_cache_create_rcu() helper which allows us to use a custom free pointer offset avoiding the need to have an external free pointer which would grow struct file behind our backs. Link: https://lore.kernel.org/r/20240828-work-kmem_cache-rcu-v3-3-5460bc1f09f6@kernel.org Acked-by: Mike Rapoport (Microsoft) Reviewed-by: Vlastimil Babka Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index af8bbd4eeb3a..58c91a52cad1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1011,6 +1011,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_task_work: task work entry point * @f_llist: work queue entrypoint * @f_ra: file's readahead state + * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.) */ struct file { atomic_long_t f_count; @@ -1042,6 +1043,7 @@ struct file { struct callback_head f_task_work; struct llist_node f_llist; struct file_ra_state f_ra; + freeptr_t f_freeptr; }; /* --- cacheline 3 boundary (192 bytes) --- */ } __randomize_layout -- cgit v1.2.3 From ae98dbf43d755b4e111fcd086e53939bef3e9a1a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Aug 2024 11:20:45 -0600 Subject: io_uring/kbuf: add support for incremental buffer consumption By default, any recv/read operation that uses provided buffers will consume at least 1 buffer fully (and maybe more, in case of bundles). This adds support for incremental consumption, meaning that an application may add large buffers, and each read/recv will just consume the part of the buffer that it needs. For example, let's say an application registers 1MB buffers in a provided buffer ring, for streaming receives. If it gets a short recv, then the full 1MB buffer will be consumed and passed back to the application. With incremental consumption, only the part that was actually used is consumed, and the buffer remains the current one. This means that both the application and the kernel needs to keep track of what the current receive point is. Each recv will still pass back a buffer ID and the size consumed, the only difference is that before the next receive would always be the next buffer in the ring. Now the same buffer ID may return multiple receives, each at an offset into that buffer from where the previous receive left off. Example: Application registers a provided buffer ring, and adds two 32K buffers to the ring. Buffer1 address: 0x1000000 (buffer ID 0) Buffer2 address: 0x2000000 (buffer ID 1) A recv completion is received with the following values: cqe->res 0x1000 (4k bytes received) cqe->flags 0x11 (CQE_F_BUFFER|CQE_F_BUF_MORE set, buffer ID 0) and the application now knows that 4096b of data is available at 0x1000000, the start of that buffer, and that more data from this buffer will be coming. Now the next receive comes in: cqe->res 0x2010 (8k bytes received) cqe->flags 0x11 (CQE_F_BUFFER|CQE_F_BUF_MORE set, buffer ID 0) which tells the application that 8k is available where the last completion left off, at 0x1001000. Next completion is: cqe->res 0x5000 (20k bytes received) cqe->flags 0x1 (CQE_F_BUFFER set, buffer ID 0) and the application now knows that 20k of data is available at 0x1003000, which is where the previous receive ended. CQE_F_BUF_MORE isn't set, as no more data is available in this buffer ID. The next completion is then: cqe->res 0x1000 (4k bytes received) cqe->flags 0x10001 (CQE_F_BUFFER|CQE_F_BUF_MORE set, buffer ID 1) which tells the application that buffer ID 1 is now the current one, hence there's 4k of valid data at 0x2000000. 0x2001000 will be the next receive point for this buffer ID. When a buffer will be reused by future CQE completions, IORING_CQE_BUF_MORE will be set in cqe->flags. This tells the application that the kernel isn't done with the buffer yet, and that it should expect more completions for this buffer ID. Will only be set by provided buffer rings setup with IOU_PBUF_RING INC, as that's the only type of buffer that will see multiple consecutive completions for the same buffer ID. For any other provided buffer type, any completion that passes back a buffer to the application is final. Once a buffer has been fully consumed, the buffer ring head is incremented and the next receive will indicate the next buffer ID in the CQE cflags. On the send side, the application can manage how much data is sent from an existing buffer by setting sqe->len to the desired send length. An application can request incremental consumption by setting IOU_PBUF_RING_INC in the provided buffer ring registration. Outside of that, any provided buffer ring setup and buffer additions is done like before, no changes there. The only change is in how an application may see multiple completions for the same buffer ID, hence needing to know where the next receive will happen. Note that like existing provided buffer rings, this should not be used with IOSQE_ASYNC, as both really require the ring to remain locked over the duration of the buffer selection and the operation completion. It will consume a buffer otherwise regardless of the size of the IO done. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 042eab793e26..a275f91d2ac0 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -440,11 +440,21 @@ struct io_uring_cqe { * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct * them from sends. + * IORING_CQE_F_BUF_MORE If set, the buffer ID set in the completion will get + * more completions. In other words, the buffer is being + * partially consumed, and will be used by the kernel for + * more completions. This is only set for buffers used via + * the incremental buffer consumption, as provided by + * a ring buffer setup with IOU_PBUF_RING_INC. For any + * other provided buffer type, all completions with a + * buffer passed back is automatically returned to the + * application. */ #define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_MORE (1U << 1) #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2) #define IORING_CQE_F_NOTIF (1U << 3) +#define IORING_CQE_F_BUF_MORE (1U << 4) #define IORING_CQE_BUFFER_SHIFT 16 @@ -716,9 +726,17 @@ struct io_uring_buf_ring { * mmap(2) with the offset set as: * IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT) * to get a virtual mapping for the ring. + * IOU_PBUF_RING_INC: If set, buffers consumed from this buffer ring can be + * consumed incrementally. Normally one (or more) buffers + * are fully consumed. With incremental consumptions, it's + * feasible to register big ranges of buffers, and each + * use of it will consume only as much as it needs. This + * requires that both the kernel and application keep + * track of where the current read/recv index is at. */ enum io_uring_register_pbuf_ring_flags { IOU_PBUF_RING_MMAP = 1, + IOU_PBUF_RING_INC = 2, }; /* argument for IORING_(UN)REGISTER_PBUF_RING */ -- cgit v1.2.3 From 7afea7bc49c5482ac11fc4488230827edc51e28a Mon Sep 17 00:00:00 2001 From: Adrien Destugues Date: Tue, 12 Dec 2023 12:28:17 +0100 Subject: ACPICA: haiku: Fix invalid value used for semaphores ACPICA commit 49fe4f25483feec2f685b204ef19e28d92979e95 In Haiku, semaphores are represented by integers, not pointers. So, we can't use NULL as the invalid/destroyed value, the correct value is -1. Introduce a platform overridable define to allow this. Fixes #162 (which was closed after coming to the conclusion that this should be done, but the change was never done). Link: https://github.com/acpica/acpica/commit/49fe4f25 Signed-off-by: Rafael J. Wysocki --- include/acpi/platform/acenv.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h index 337ffa931ee8..3f31df09a9d6 100644 --- a/include/acpi/platform/acenv.h +++ b/include/acpi/platform/acenv.h @@ -252,6 +252,12 @@ #define ACPI_RELEASE_GLOBAL_LOCK(Glptr, pending) pending = 0 #endif +/* NULL/invalid value to use for destroyed or not-yet-created semaphores. */ + +#ifndef ACPI_SEMAPHORE_NULL +#define ACPI_SEMAPHORE_NULL NULL +#endif + /* Flush CPU cache - used when going to sleep. Wbinvd or similar. */ #ifndef ACPI_FLUSH_CPU_CACHE -- cgit v1.2.3 From ff418f34ba44d8ff6cea953dd79546f3e4c6c807 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 20 Mar 2024 13:15:08 +0800 Subject: ACPICA: Complete CXL 3.0 CXIMS structures ACPICA commit eb2a2ff303416fb3f6c425d519dbcd6988dbd91f Commit 2d8dc0383d3c9 ("Add CXL 3.0 structures (CXIMS & RDPAS) to the CEDT table") introduces basic support for CXL XOR Interleave Math Structure (CXIMS). Complete the CXIMS structures. No functional change. Link: https://github.com/acpica/acpica/commit/eb2a2ff3 Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl1.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 841ef9f22795..8cfcd1e1c177 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -567,6 +567,10 @@ struct acpi_cedt_cxims { u64 xormap_list[]; }; +struct acpi_cedt_cxims_target_element { + u64 xormap; +}; + /* 3: CXL RCEC Downstream Port Association Structure */ struct acpi_cedt_rdpas { -- cgit v1.2.3 From 7741e3c5f848e867c58c6e612c63a27c1681a493 Mon Sep 17 00:00:00 2001 From: Sia Jee Heng Date: Wed, 28 Feb 2024 22:39:50 -0800 Subject: ACPICA: SPCR: Update the SPCR table to version 4 ACPICA commit 1eeff52124a45d5cd887ba5687bbad0116e4d211 The Microsoft Serial Port Console Redirection (SPCR) specification revision 1.09 comprises additional fields [1]. The newly added fields are: - RISC-V SBI - Precise Baud Rate - namespace_string_length - namespace_string_offset - namespace_string Additionaly, this code will support up to SPCR revision 1.10, as it includes only minor wording changes. Link: https://learn.microsoft.com/en-us/windows-hardware/drivers/serports/serial-port-console-redirection-table # [1] Link: https://github.com/acpica/acpica/commit/1eeff521 Signed-off-by: Sia Jee Heng Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl3.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h index 8f775e3a08fd..5cd755143b7d 100644 --- a/include/acpi/actbl3.h +++ b/include/acpi/actbl3.h @@ -92,10 +92,10 @@ struct acpi_table_slit { /******************************************************************************* * * SPCR - Serial Port Console Redirection table - * Version 2 + * Version 4 * * Conforms to "Serial Port Console Redirection Table", - * Version 1.03, August 10, 2015 + * Version 1.10, Jan 5, 2023 * ******************************************************************************/ @@ -112,7 +112,7 @@ struct acpi_table_spcr { u8 stop_bits; u8 flow_control; u8 terminal_type; - u8 reserved1; + u8 language; u16 pci_device_id; u16 pci_vendor_id; u8 pci_bus; @@ -120,7 +120,11 @@ struct acpi_table_spcr { u8 pci_function; u32 pci_flags; u8 pci_segment; - u32 reserved2; + u32 uart_clk_freq; + u32 precise_baudrate; + u16 name_space_string_length; + u16 name_space_string_offset; + char name_space_string[]; }; /* Masks for pci_flags field above */ -- cgit v1.2.3 From 4aca2bef90bd12969037468f71568fbef994a254 Mon Sep 17 00:00:00 2001 From: Sia Jee Heng Date: Wed, 28 Feb 2024 22:41:55 -0800 Subject: ACPICA: Headers: Add RISC-V SBI Subtype to DBG2 ACPICA commit 6f4c900bcf9ca065129353f17a83773aa58095aa Include the RISC-V SBI debugging subtype as documented in DBG2 dated April 10, 2023 [1]. Link: https://learn.microsoft.com/en-us/windows-hardware/drivers/bringup/acpi-debug-port-table # [1] Link: https://github.com/acpica/acpica/commit/6f4c900b Signed-off-by: Sia Jee Heng Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl1.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 8cfcd1e1c177..89f0df489dc3 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -755,6 +755,7 @@ struct acpi_dbg2_device { #define ACPI_DBG2_16550_WITH_GAS 0x0012 #define ACPI_DBG2_SDM845_7_372MHZ 0x0013 #define ACPI_DBG2_INTEL_LPSS 0x0014 +#define ACPI_DBG2_RISCV_SBI_CON 0x0015 #define ACPI_DBG2_1394_STANDARD 0x0000 -- cgit v1.2.3 From cf94e10a037c337559bf6c5486cc1abdf4900a08 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Wed, 7 Feb 2024 19:54:20 +0000 Subject: ACPICA: MPAM: Correct the typo in struct acpi_mpam_msc_node member ACPICA commit 3da3f7d776d17e9bfbb15de88317de8d7397ce38 A member of the struct acpi_mpam_msc_node that represents a Memory System Controller node structure - num_resource_nodes has a typo. Fix the typo No functional change. Link: https://github.com/acpica/acpica/commit/3da3f7d7 Signed-off-by: Punit Agrawal Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index e27958ef8264..d3858eebc255 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -1607,7 +1607,7 @@ struct acpi_mpam_msc_node { u32 max_nrdy_usec; u64 hardware_id_linked_device; u32 instance_id_linked_device; - u32 num_resouce_nodes; + u32 num_resource_nodes; }; struct acpi_table_mpam { -- cgit v1.2.3 From 632b746b108e3c62e0795072d00ed597371c738a Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Tue, 2 Apr 2024 21:09:45 -0700 Subject: ACPICA: Implement ACPI_WARNING_ONCE and ACPI_ERROR_ONCE ACPICA commit 2ad4e6e7c4118f4cdfcad321c930b836cec77406 In some cases it is not practical nor useful to nag user about some firmware errors that they cannot fix. Add a macro that will print a warning or error only once to be used in these cases. Link: https://github.com/acpica/acpica/commit/2ad4e6e7 Signed-off-by: Vasily Khoruzhick Signed-off-by: Rafael J. Wysocki --- include/acpi/acoutput.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h index b1571dd96310..5e0346142f98 100644 --- a/include/acpi/acoutput.h +++ b/include/acpi/acoutput.h @@ -193,6 +193,7 @@ */ #ifndef ACPI_NO_ERROR_MESSAGES #define AE_INFO _acpi_module_name, __LINE__ +#define ACPI_ONCE(_fn, _plist) { static char _done; if (!_done) { _done = 1; _fn _plist; } } /* * Error reporting. Callers module and line number are inserted by AE_INFO, @@ -201,8 +202,10 @@ */ #define ACPI_INFO(plist) acpi_info plist #define ACPI_WARNING(plist) acpi_warning plist +#define ACPI_WARNING_ONCE(plist) ACPI_ONCE(acpi_warning, plist) #define ACPI_EXCEPTION(plist) acpi_exception plist #define ACPI_ERROR(plist) acpi_error plist +#define ACPI_ERROR_ONCE(plist) ACPI_ONCE(acpi_error, plist) #define ACPI_BIOS_WARNING(plist) acpi_bios_warning plist #define ACPI_BIOS_EXCEPTION(plist) acpi_bios_exception plist #define ACPI_BIOS_ERROR(plist) acpi_bios_error plist @@ -214,8 +217,10 @@ #define ACPI_INFO(plist) #define ACPI_WARNING(plist) +#define ACPI_WARNING_ONCE(plist) #define ACPI_EXCEPTION(plist) #define ACPI_ERROR(plist) +#define ACPI_ERROR_ONCE(plist) #define ACPI_BIOS_WARNING(plist) #define ACPI_BIOS_EXCEPTION(plist) #define ACPI_BIOS_ERROR(plist) -- cgit v1.2.3 From f91f2a9879cc77db1f45f690f38f42698580416e Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 28 Aug 2024 16:34:00 -0700 Subject: dmaengine: idxd: Add a new DSA device ID for Granite Rapids-D platform A new DSA device ID, 0x11fb, is introduced for the Granite Rapids-D platform. Add the device ID to the IDXD driver. Since a potential security issue has been fixed on the new device, it's secure to assign the device to virtual machines, and therefore, the new device ID will not be added to the VFIO denylist. Additionally, the new device ID may be useful in identifying and addressing any other potential issues with this specific device in the future. The same is also applied to any other new DSA/IAA devices with new device IDs. Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20240828233401.186007-2-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e388c8b1cbc2..ff99047dac44 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2706,6 +2706,7 @@ #define PCI_DEVICE_ID_INTEL_82815_MC 0x1130 #define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132 #define PCI_DEVICE_ID_INTEL_SST_TNG 0x119a +#define PCI_DEVICE_ID_INTEL_DSA_GNRD 0x11fb #define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221 #define PCI_DEVICE_ID_INTEL_82437 0x122d #define PCI_DEVICE_ID_INTEL_82371FB_0 0x122e -- cgit v1.2.3 From 4fecf944c051ea852e98c062636bf5b4c7f5f8a7 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 28 Aug 2024 16:34:01 -0700 Subject: dmaengine: idxd: Add new DSA and IAA device IDs for Diamond Rapids platform A new DSA device ID, 0x1212, and a new IAA device ID, 0x1216, are introduced for Diamond Rapids platform. Add the device IDs to the IDXD driver. The name "IAA" is used in new code instead of the old name "IAX". However, the "IAX" naming (e.g., IDXD_TYPE_IAX) is retained for legacy code compatibility. Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20240828233401.186007-3-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ff99047dac44..8139231d0e86 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2707,6 +2707,8 @@ #define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132 #define PCI_DEVICE_ID_INTEL_SST_TNG 0x119a #define PCI_DEVICE_ID_INTEL_DSA_GNRD 0x11fb +#define PCI_DEVICE_ID_INTEL_DSA_DMR 0x1212 +#define PCI_DEVICE_ID_INTEL_IAA_DMR 0x1216 #define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221 #define PCI_DEVICE_ID_INTEL_82437 0x122d #define PCI_DEVICE_ID_INTEL_82371FB_0 0x122e -- cgit v1.2.3 From dd067afe3f8cbe548fb8ac76eaf6e9adfea013b2 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 24 Apr 2024 10:02:31 +0200 Subject: ACPICA: Add support for Windows 11 22H2 _OSI string ACPICA commit f56218c4e4dc1d1f699662d0726ad9e7a0d58548 See: https://github.com/microsoft_docs/windows-driver-docs/commit/be9d1c211adf8fabe5a43de71c034b1b6d7372de Link: https://github.com/acpica/acpica/commit/f56218c4 Signed-off-by: Armin Wolf Signed-off-by: Rafael J. Wysocki --- include/acpi/actypes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 85c2dcf2b704..80767e8bf3ad 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -1311,6 +1311,7 @@ typedef enum { #define ACPI_OSI_WIN_10_19H1 0x14 #define ACPI_OSI_WIN_10_20H1 0x15 #define ACPI_OSI_WIN_11 0x16 +#define ACPI_OSI_WIN_11_22H2 0x17 /* Definitions of getopt */ -- cgit v1.2.3 From 6143f9616627ddbfdf827795745fa4d9db166bc2 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 8 Aug 2024 16:18:01 -0700 Subject: ACPICA: HMAT: Add extended linear address mode to MSCIS ACPICA commit aaa08569b81aa4d9ff59f91f00e589e98d499e6c Redefine the 2 reserved bytes at offset 28 of Memory Side Cache Information Structure as "Address Mode" and add defines of the new value. * 0 - Reserved (Unkown Address Mode) * 1 - Extended-linear (N direct-map aliases linearly mapped) * 2..65535 - Reserved (Unknown Address Mode) Link: https://github.com/acpica/acpica/commit/aaa08569 Signed-off-by: Dave Jiang Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl1.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 89f0df489dc3..199afc2cd122 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -1796,7 +1796,7 @@ struct acpi_hmat_cache { u32 reserved1; u64 cache_size; u32 cache_attributes; - u16 reserved2; + u16 address_mode; u16 number_of_SMBIOShandles; }; @@ -1808,6 +1808,9 @@ struct acpi_hmat_cache { #define ACPI_HMAT_WRITE_POLICY (0x0000F000) #define ACPI_HMAT_CACHE_LINE_SIZE (0xFFFF0000) +#define ACPI_HMAT_CACHE_MODE_UNKNOWN (0) +#define ACPI_HMAT_CACHE_MODE_EXTENDED_LINEAR (1) + /* Values for cache associativity flag */ #define ACPI_HMAT_CA_NONE (0) -- cgit v1.2.3 From 9af32b4a25f59e3a2423a5f87e738a29487242ce Mon Sep 17 00:00:00 2001 From: Saket Dumbre Date: Tue, 27 Aug 2024 11:24:10 -0700 Subject: ACPICA: Setup for ACPICA release 20240827 ACPICA commit 86c762afe5bf915e8101a0455513368e2a60dd80 Link: https://github.com/acpica/acpica/commit/86c762af Signed-off-by: Saket Dumbre Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 99bb45a46600..6c1cd90efae8 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -12,7 +12,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20240322 +#define ACPI_CA_VERSION 0x20240827 #include #include -- cgit v1.2.3 From 6f606ffd6dd7583d8194ee3d858ba4da2eff26a3 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 29 Aug 2024 14:08:23 -0700 Subject: bpf: Move insn_buf[16] to bpf_verifier_env This patch moves the 'struct bpf_insn insn_buf[16]' stack usage to the bpf_verifier_env. A '#define INSN_BUF_SIZE 16' is also added to replace the ARRAY_SIZE(insn_buf) usages. Both convert_ctx_accesses() and do_misc_fixup() are changed to use the env->insn_buf. It is a refactoring work for adding the epilogue_buf[16] in a later patch. With this patch, the stack size usage decreased. Before: ./kernel/bpf/verifier.c:22133:5: warning: stack frame size (2584) After: ./kernel/bpf/verifier.c:22184:5: warning: stack frame size (2264) Reviewed-by: Eduard Zingerman Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240829210833.388152-2-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 279b4a640644..0ad2d189c546 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -23,6 +23,8 @@ * (in the "-8,-16,...,-512" form) */ #define TMP_STR_BUF_LEN 320 +/* Patch buffer size */ +#define INSN_BUF_SIZE 16 /* Liveness marks, used for registers and spilled-regs (in stack slots). * Read marks propagate upwards until they find a write mark; they record that @@ -780,6 +782,7 @@ struct bpf_verifier_env { * e.g., in reg_type_str() to generate reg_type string */ char tmp_str_buf[TMP_STR_BUF_LEN]; + struct bpf_insn insn_buf[INSN_BUF_SIZE]; }; static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog) -- cgit v1.2.3 From 169c31761c8d7f606f3ee628829c27998626c4f0 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 29 Aug 2024 14:08:25 -0700 Subject: bpf: Add gen_epilogue to bpf_verifier_ops This patch adds a .gen_epilogue to the bpf_verifier_ops. It is similar to the existing .gen_prologue. Instead of allowing a subsystem to run code at the beginning of a bpf prog, it allows the subsystem to run code just before the bpf prog exit. One of the use case is to allow the upcoming bpf qdisc to ensure that the skb->dev is the same as the qdisc->dev_queue->dev. The bpf qdisc struct_ops implementation could either fix it up or drop the skb. Another use case could be in bpf_tcp_ca.c to enforce snd_cwnd has sane value (e.g. non zero). The epilogue can do the useful thing (like checking skb->dev) if it can access the bpf prog's ctx. Unlike prologue, r1 may not hold the ctx pointer. This patch saves the r1 in the stack if the .gen_epilogue has returned some instructions in the "epilogue_buf". The existing .gen_prologue is done in convert_ctx_accesses(). The new .gen_epilogue is done in the convert_ctx_accesses() also. When it sees the (BPF_JMP | BPF_EXIT) instruction, it will be patched with the earlier generated "epilogue_buf". The epilogue patching is only done for the main prog. Only one epilogue will be patched to the main program. When the bpf prog has multiple BPF_EXIT instructions, a BPF_JA is used to goto the earlier patched epilogue. Majority of the archs support (BPF_JMP32 | BPF_JA): x86, arm, s390, risv64, loongarch, powerpc and arc. This patch keeps it simple and always use (BPF_JMP32 | BPF_JA). A new macro BPF_JMP32_A is added to generate the (BPF_JMP32 | BPF_JA) insn. Acked-by: Eduard Zingerman Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240829210833.388152-4-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ include/linux/bpf_verifier.h | 1 + include/linux/filter.h | 10 ++++++++++ 3 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index dc63083f76b7..6f87fb014fba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -974,6 +974,8 @@ struct bpf_verifier_ops { struct bpf_insn_access_aux *info); int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, const struct bpf_prog *prog); + int (*gen_epilogue)(struct bpf_insn *insn, const struct bpf_prog *prog, + s16 ctx_stack_off); int (*gen_ld_abs)(const struct bpf_insn *orig, struct bpf_insn *insn_buf); u32 (*convert_ctx_access)(enum bpf_access_type type, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 0ad2d189c546..2e20207315a9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -783,6 +783,7 @@ struct bpf_verifier_env { */ char tmp_str_buf[TMP_STR_BUF_LEN]; struct bpf_insn insn_buf[INSN_BUF_SIZE]; + struct bpf_insn epilogue_buf[INSN_BUF_SIZE]; }; static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog) diff --git a/include/linux/filter.h b/include/linux/filter.h index b6672ff61407..99b6fc83825b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -437,6 +437,16 @@ static inline bool insn_is_cast_user(const struct bpf_insn *insn) .off = OFF, \ .imm = 0 }) +/* Unconditional jumps, gotol pc + imm32 */ + +#define BPF_JMP32_A(IMM) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + /* Relative call */ #define BPF_CALL_REL(TGT) \ -- cgit v1.2.3 From 087adb4f0f91ee330446a70af899e6a996e5cc13 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 6 Aug 2024 19:28:46 +0200 Subject: vfs: dodge smp_mb in break_lease and break_deleg in the common case These inlines show up in the fast path (e.g., in do_dentry_open()) and induce said full barrier regarding i_flctx access when in most cases the pointer is NULL. The pointer can be safely checked before issuing the barrier, dodging it in most cases as a result. It is plausible the consume fence would be sufficient, but I don't want to go audit all callers regarding what they before calling here. Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20240806172846.886570-1-mjguzik@gmail.com Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/filelock.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/filelock.h b/include/linux/filelock.h index daee999d05f3..bb44224c6676 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -420,28 +420,38 @@ static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) #ifdef CONFIG_FILE_LOCKING static inline int break_lease(struct inode *inode, unsigned int mode) { + struct file_lock_context *flctx; + /* * Since this check is lockless, we must ensure that any refcounts * taken are done before checking i_flctx->flc_lease. Otherwise, we * could end up racing with tasks trying to set a new lease on this * file. */ + flctx = READ_ONCE(inode->i_flctx); + if (!flctx) + return 0; smp_mb(); - if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) + if (!list_empty_careful(&flctx->flc_lease)) return __break_lease(inode, mode, FL_LEASE); return 0; } static inline int break_deleg(struct inode *inode, unsigned int mode) { + struct file_lock_context *flctx; + /* * Since this check is lockless, we must ensure that any refcounts * taken are done before checking i_flctx->flc_lease. Otherwise, we * could end up racing with tasks trying to set a new lease on this * file. */ + flctx = READ_ONCE(inode->i_flctx); + if (!flctx) + return 0; smp_mb(); - if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) + if (!list_empty_careful(&flctx->flc_lease)) return __break_lease(inode, mode, FL_DELEG); return 0; } -- cgit v1.2.3 From 8447d848e1dc6d42d1dcd00f133d7715fc732c47 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sat, 10 Aug 2024 08:47:53 +0200 Subject: vfs: only read fops once in fops_get/put In do_dentry_open() the usage is: f->f_op = fops_get(inode->i_fop); In generated asm the compiler emits 2 reads from inode->i_fop instead of just one. This popped up due to false-sharing where loads from that offset end up bouncing a cacheline during parallel open. While this is going to be fixed, the spurious load does not need to be there. This makes do_dentry_open() go down from 1177 to 1154 bytes. fops_put() is patched to maintain some consistency. No functional changes. Reviewed-by: Josef Bacik Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20240810064753.1211441-1-mjguzik@gmail.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index c0286d479c9d..696c620f90e0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2553,10 +2553,17 @@ struct super_block *sget(struct file_system_type *type, struct super_block *sget_dev(struct fs_context *fc, dev_t dev); /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ -#define fops_get(fops) \ - (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) -#define fops_put(fops) \ - do { if (fops) module_put((fops)->owner); } while(0) +#define fops_get(fops) ({ \ + const struct file_operations *_fops = (fops); \ + (((_fops) && try_module_get((_fops)->owner) ? (_fops) : NULL)); \ +}) + +#define fops_put(fops) ({ \ + const struct file_operations *_fops = (fops); \ + if (_fops) \ + module_put((_fops)->owner); \ +}) + /* * This one is to be used *ONLY* from ->open() instances. * fops must be non-NULL, pinned down *and* module dependencies -- cgit v1.2.3 From 641bb4394f405cba498b100b44541ffc0aed5be1 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 9 Aug 2024 12:38:56 +0200 Subject: fs: move FMODE_UNSIGNED_OFFSET to fop_flags This is another flag that is statically set and doesn't need to use up an FMODE_* bit. Move it to ->fop_flags and free up another FMODE_* bit. (1) mem_open() used from proc_mem_operations (2) adi_open() used from adi_fops (3) drm_open_helper(): (3.1) accel_open() used from DRM_ACCEL_FOPS (3.2) drm_open() used from (3.2.1) amdgpu_driver_kms_fops (3.2.2) psb_gem_fops (3.2.3) i915_driver_fops (3.2.4) nouveau_driver_fops (3.2.5) panthor_drm_driver_fops (3.2.6) radeon_driver_kms_fops (3.2.7) tegra_drm_fops (3.2.8) vmwgfx_driver_fops (3.2.9) xe_driver_fops (3.2.10) DRM_GEM_FOPS (3.2.11) DEFINE_DRM_GEM_DMA_FOPS (4) struct memdev sets fmode flags based on type of device opened. For devices using struct mem_fops unsigned offset is used. Mark all these file operations as FOP_UNSIGNED_OFFSET and add asserts into the open helper to ensure that the flag is always set. Link: https://lore.kernel.org/r/20240809-work-fop_unsigned-v1-1-658e054d893e@kernel.org Reviewed-by: Jeff Layton Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/drm/drm_accel.h | 3 ++- include/drm/drm_gem.h | 3 ++- include/drm/drm_gem_dma_helper.h | 1 + include/linux/fs.h | 5 +++-- 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/drm_accel.h b/include/drm/drm_accel.h index f4d3784b1dce..41c78b7d712c 100644 --- a/include/drm/drm_accel.h +++ b/include/drm/drm_accel.h @@ -28,7 +28,8 @@ .poll = drm_poll,\ .read = drm_read,\ .llseek = noop_llseek, \ - .mmap = drm_gem_mmap + .mmap = drm_gem_mmap, \ + .fop_flags = FOP_UNSIGNED_OFFSET /** * DEFINE_DRM_ACCEL_FOPS() - macro to generate file operations for accelerators drivers diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index bae4865b2101..d8b86df2ec0d 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -447,7 +447,8 @@ struct drm_gem_object { .poll = drm_poll,\ .read = drm_read,\ .llseek = noop_llseek,\ - .mmap = drm_gem_mmap + .mmap = drm_gem_mmap, \ + .fop_flags = FOP_UNSIGNED_OFFSET /** * DEFINE_DRM_GEM_FOPS() - macro to generate file operations for GEM drivers diff --git a/include/drm/drm_gem_dma_helper.h b/include/drm/drm_gem_dma_helper.h index a827bde494f6..f2678e7ecb98 100644 --- a/include/drm/drm_gem_dma_helper.h +++ b/include/drm/drm_gem_dma_helper.h @@ -267,6 +267,7 @@ unsigned long drm_gem_dma_get_unmapped_area(struct file *filp, .read = drm_read,\ .llseek = noop_llseek,\ .mmap = drm_gem_mmap,\ + .fop_flags = FOP_UNSIGNED_OFFSET, \ DRM_GEM_DMA_UNMAPPED_AREA_FOPS \ } diff --git a/include/linux/fs.h b/include/linux/fs.h index 696c620f90e0..0145bda465ff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -146,8 +146,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* Expect random access pattern */ #define FMODE_RANDOM ((__force fmode_t)(1 << 12)) -/* File is huge (eg. /dev/mem): treat loff_t as unsigned */ -#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)(1 << 13)) +/* FMODE_* bit 13 */ /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)(1 << 14)) @@ -2073,6 +2072,8 @@ struct file_operations { #define FOP_DIO_PARALLEL_WRITE ((__force fop_flags_t)(1 << 3)) /* Contains huge pages */ #define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4)) +/* Treat loff_t as unsigned (e.g., /dev/mem) */ +#define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5)) /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, -- cgit v1.2.3 From 433f9d76a01056dfeaefc15167b11e514e56f956 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Wed, 14 Aug 2024 17:02:31 +0800 Subject: autofs: add per dentry expire timeout Add ability to set per-dentry mount expire timeout to autofs. There are two fairly well known automounter map formats, the autofs format and the amd format (more or less System V and Berkley). Some time ago Linux autofs added an amd map format parser that implemented a fair amount of the amd functionality. This was done within the autofs infrastructure and some functionality wasn't implemented because it either didn't make sense or required extra kernel changes. The idea was to restrict changes to be within the existing autofs functionality as much as possible and leave changes with a wider scope to be considered later. One of these changes is implementing the amd options: 1) "unmount", expire this mount according to a timeout (same as the current autofs default). 2) "nounmount", don't expire this mount (same as setting the autofs timeout to 0 except only for this specific mount) . 3) "utimeout=", expire this mount using the specified timeout (again same as setting the autofs timeout but only for this mount). To implement these options per-dentry expire timeouts need to be implemented for autofs indirect mounts. This is because all map keys (mounts) for autofs indirect mounts use an expire timeout stored in the autofs mount super block info. structure and all indirect mounts use the same expire timeout. Now I have a request to add the "nounmount" option so I need to add the per-dentry expire handling to the kernel implementation to do this. The implementation uses the trailing path component to identify the mount (and is also used as the autofs map key) which is passed in the autofs_dev_ioctl structure path field. The expire timeout is passed in autofs_dev_ioctl timeout field (well, of the timeout union). If the passed in timeout is equal to -1 the per-dentry timeout and flag are cleared providing for the "unmount" option. If the timeout is greater than or equal to 0 the timeout is set to the value and the flag is also set. If the dentry timeout is 0 the dentry will not expire by timeout which enables the implementation of the "nounmount" option for the specific mount. When the dentry timeout is greater than zero it allows for the implementation of the "utimeout=" option. Signed-off-by: Ian Kent Link: https://lore.kernel.org/r/20240814090231.963520-1-raven@themaw.net Signed-off-by: Christian Brauner --- include/uapi/linux/auto_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index 1f7925afad2d..8081df849743 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h @@ -23,7 +23,7 @@ #define AUTOFS_MIN_PROTO_VERSION 3 #define AUTOFS_MAX_PROTO_VERSION 5 -#define AUTOFS_PROTO_SUBVERSION 5 +#define AUTOFS_PROTO_SUBVERSION 6 /* * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed -- cgit v1.2.3 From 1c48d441468c425e878d81c5c3e7ff8a9a594cc0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 16 Aug 2024 16:35:52 +0200 Subject: inode: remove __I_DIO_WAKEUP Afaict, we can just rely on inode->i_dio_count for waiting instead of this awkward indirection through __I_DIO_WAKEUP. This survives LTP dio and xfstests dio tests. Link: https://lore.kernel.org/r/20240816-vfs-misc-dio-v1-1-80fe21a2c710@kernel.org Reviewed-by: Josef Bacik Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/fs.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0145bda465ff..8f8d274929d7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2373,8 +2373,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * * I_REFERENCED Marks the inode as recently references on the LRU list. * - * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit(). - * * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to * synchronize competing switching instances and to tell * wb stat updates to grab the i_pages lock. See @@ -2409,8 +2407,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define __I_SYNC 7 #define I_SYNC (1 << __I_SYNC) #define I_REFERENCED (1 << 8) -#define __I_DIO_WAKEUP 9 -#define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) #define I_WB_SWITCH (1 << 13) @@ -3227,7 +3223,9 @@ static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, } #endif +bool inode_dio_finished(const struct inode *inode); void inode_dio_wait(struct inode *inode); +void inode_dio_wait_interruptible(struct inode *inode); /** * inode_dio_begin - signal start of a direct I/O requests @@ -3251,7 +3249,7 @@ static inline void inode_dio_begin(struct inode *inode) static inline void inode_dio_end(struct inode *inode) { if (atomic_dec_and_test(&inode->i_dio_count)) - wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); + wake_up_var(&inode->i_dio_count); } extern void inode_set_flags(struct inode *inode, unsigned int flags, -- cgit v1.2.3 From 029c3f27fe84c5fd29d49a99cc5176433bf12209 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 22 Aug 2024 11:28:38 +0200 Subject: fs: remove unused path_put_init() This helper has been unused for a while now. Link: https://lore.kernel.org/r/20240822-bewuchs-werktag-46672b3c0606@brauner Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/path.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/path.h b/include/linux/path.h index ca073e70decd..7ea389dc764b 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -18,12 +18,6 @@ static inline int path_equal(const struct path *path1, const struct path *path2) return path1->mnt == path2->mnt && path1->dentry == path2->dentry; } -static inline void path_put_init(struct path *path) -{ - path_put(path); - *path = (struct path) { }; -} - /* * Cleanup macro for use with __free(path_put). Avoids dereference and * copying @path unlike DEFINE_FREE(). path_put() will handle the empty -- cgit v1.2.3 From 71ff58ce3428b2471efae5b00594e892b285c97c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 22 Aug 2024 11:30:58 +0200 Subject: fs: s/__u32/u32/ for s_fsnotify_mask The underscore variants are for uapi whereas the non-underscore variants are for in-kernel consumers. Link: https://lore.kernel.org/r/20240822-anwerben-nutzung-1cd6c82a565f@brauner Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8f8d274929d7..cda1f2545ea0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1266,7 +1266,7 @@ struct super_block { time64_t s_time_min; time64_t s_time_max; #ifdef CONFIG_FSNOTIFY - __u32 s_fsnotify_mask; + u32 s_fsnotify_mask; struct fsnotify_sb_info *s_fsnotify_info; #endif -- cgit v1.2.3 From da18ecbf0fb6fb73e6f9273e7aa15610f148ce17 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 23 Aug 2024 14:47:35 +0200 Subject: fs: add i_state helpers The i_state member is an unsigned long so that it can be used with the wait bit infrastructure which expects unsigned long. This wastes 4 bytes which we're unlikely to ever use. Switch to using the var event wait mechanism using the address of the bit. Thanks to Linus for the address idea. Link: https://lore.kernel.org/r/20240823-work-i_state-v3-1-5cd5fd207a57@kernel.org Reviewed-by: Josef Bacik Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index cda1f2545ea0..d35de348d2ec 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -744,6 +744,21 @@ struct inode { void *i_private; /* fs or device private pointer */ } __randomize_layout; +/* + * Get bit address from inode->i_state to use with wait_var_event() + * infrastructre. + */ +#define inode_state_wait_address(inode, bit) ((char *)&(inode)->i_state + (bit)) + +struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe, + struct inode *inode, u32 bit); + +static inline void inode_wake_up_bit(struct inode *inode, u32 bit) +{ + /* Caller is responsible for correct memory barriers. */ + wake_up_var(inode_state_wait_address(inode, bit)); +} + struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode); static inline unsigned int i_blocksize(const struct inode *node) -- cgit v1.2.3 From 2ed634c96ed1d6e4ee0497be176f9980866e81cb Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 23 Aug 2024 14:47:36 +0200 Subject: fs: reorder i_state bits so that we can use the first bits to derive unique addresses from i_state. Link: https://lore.kernel.org/r/20240823-work-i_state-v3-2-5cd5fd207a57@kernel.org Reviewed-by: Josef Bacik Reviewed-by: Jeff Layton Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d35de348d2ec..a868c9823c10 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2410,28 +2410,32 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * i_count. * * Q: What is the difference between I_WILL_FREE and I_FREEING? + * + * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait + * upon. There's one free address left. */ -#define I_DIRTY_SYNC (1 << 0) -#define I_DIRTY_DATASYNC (1 << 1) -#define I_DIRTY_PAGES (1 << 2) -#define __I_NEW 3 +#define __I_NEW 0 #define I_NEW (1 << __I_NEW) -#define I_WILL_FREE (1 << 4) -#define I_FREEING (1 << 5) -#define I_CLEAR (1 << 6) -#define __I_SYNC 7 +#define __I_SYNC 1 #define I_SYNC (1 << __I_SYNC) -#define I_REFERENCED (1 << 8) +#define __I_LRU_ISOLATING 2 +#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING) + +#define I_DIRTY_SYNC (1 << 3) +#define I_DIRTY_DATASYNC (1 << 4) +#define I_DIRTY_PAGES (1 << 5) +#define I_WILL_FREE (1 << 6) +#define I_FREEING (1 << 7) +#define I_CLEAR (1 << 8) +#define I_REFERENCED (1 << 9) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) -#define I_WB_SWITCH (1 << 13) -#define I_OVL_INUSE (1 << 14) -#define I_CREATING (1 << 15) -#define I_DONTCACHE (1 << 16) -#define I_SYNC_QUEUED (1 << 17) -#define I_PINNING_NETFS_WB (1 << 18) -#define __I_LRU_ISOLATING 19 -#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING) +#define I_WB_SWITCH (1 << 12) +#define I_OVL_INUSE (1 << 13) +#define I_CREATING (1 << 14) +#define I_DONTCACHE (1 << 15) +#define I_SYNC_QUEUED (1 << 16) +#define I_PINNING_NETFS_WB (1 << 17) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) -- cgit v1.2.3 From 0fe340a98b584a31b07c664dc5ff0c923730581e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 23 Aug 2024 14:47:38 +0200 Subject: inode: port __I_NEW to var event Port the __I_NEW mechanism to use the new var event mechanism. Link: https://lore.kernel.org/r/20240823-work-i_state-v3-4-5cd5fd207a57@kernel.org Reviewed-by: Josef Bacik Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/writeback.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 56b85841ae4c..8f651bb0a1a5 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -200,7 +200,8 @@ void inode_io_list_del(struct inode *inode); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) { - wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); + wait_var_event(inode_state_wait_address(inode, __I_NEW), + !(READ_ONCE(inode->i_state) & I_NEW)); } #ifdef CONFIG_CGROUP_WRITEBACK -- cgit v1.2.3 From 2b111edbe0a9c441605be5cfb73001dc98ec686f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 23 Aug 2024 14:47:40 +0200 Subject: inode: make i_state a u32 Now that we use the wait var event mechanism make i_state a u32 and free up 4 bytes. This means we currently have two 4 byte holes in struct inode which we can pack. Link: https://lore.kernel.org/r/20240823-work-i_state-v3-6-5cd5fd207a57@kernel.org Reviewed-by: Josef Bacik Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index a868c9823c10..a1ef8c65d828 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -681,7 +681,8 @@ struct inode { #endif /* Misc */ - unsigned long i_state; + u32 i_state; + /* 32-bit hole */ struct rw_semaphore i_rwsem; unsigned long dirtied_when; /* jiffies of first dirtying */ -- cgit v1.2.3 From 459ca85ae1feff78d1518344df88bb79a092780c Mon Sep 17 00:00:00 2001 From: Julian Sun Date: Wed, 28 Aug 2024 16:13:59 +0800 Subject: writeback: Refine the show_inode_state() macro definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the show_inode_state() macro only prints part of the state of inode->i_state. Let’s improve it to display more of its state. Signed-off-by: Julian Sun Link: https://lore.kernel.org/r/20240828081359.62429-1-sunjunchao2870@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/trace/events/writeback.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 54e353c9f919..a261e86e61fa 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -20,7 +20,15 @@ {I_CLEAR, "I_CLEAR"}, \ {I_SYNC, "I_SYNC"}, \ {I_DIRTY_TIME, "I_DIRTY_TIME"}, \ - {I_REFERENCED, "I_REFERENCED"} \ + {I_REFERENCED, "I_REFERENCED"}, \ + {I_LINKABLE, "I_LINKABLE"}, \ + {I_WB_SWITCH, "I_WB_SWITCH"}, \ + {I_OVL_INUSE, "I_OVL_INUSE"}, \ + {I_CREATING, "I_CREATING"}, \ + {I_DONTCACHE, "I_DONTCACHE"}, \ + {I_SYNC_QUEUED, "I_SYNC_QUEUED"}, \ + {I_PINNING_NETFS_WB, "I_PINNING_NETFS_WB"}, \ + {I_LRU_ISOLATING, "I_LRU_ISOLATING"} \ ) /* enums need to be exported to user space */ -- cgit v1.2.3 From 2a9f8995f7139a759fe8cb31d9e8a433757228ec Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Thu, 4 Jul 2024 19:23:18 +0200 Subject: mfd: 88pm80x: Constify read-only regmap structs `pm800_irq`, `pm805_irq` and `pm805_irq_chip` are not modified and can be declared as const to move their data to a read-only section. In order to keep the const modifier for the regmap_irq_chip structures, the pointer used to reference them must be converted to const as well. Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20240704-mfd-const-regmap_config-v2-8-0c8785b1331d@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/88pm80x.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/88pm80x.h b/include/linux/mfd/88pm80x.h index def5df6e74bf..551ef1c367d6 100644 --- a/include/linux/mfd/88pm80x.h +++ b/include/linux/mfd/88pm80x.h @@ -294,7 +294,7 @@ struct pm80x_chip { struct i2c_client *client; struct i2c_client *companion; struct regmap *regmap; - struct regmap_irq_chip *regmap_irq_chip; + const struct regmap_irq_chip *regmap_irq_chip; struct regmap_irq_chip_data *irq_data; int type; int irq; -- cgit v1.2.3 From 87b9c9ea01f4fd9193f30cc19bd03b05541ff4cb Mon Sep 17 00:00:00 2001 From: Wilken Gottwalt Date: Mon, 15 Jul 2024 08:17:28 +0000 Subject: mfd: ds1wm: Remove remaining header file The driver was removed after kernel 6.2 rendering the header file unused. Signed-off-by: Wilken Gottwalt Link: https://lore.kernel.org/r/ZpTbGHb6EX2Oe7ok@monster.localdomain Signed-off-by: Lee Jones --- include/linux/mfd/ds1wm.h | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 include/linux/mfd/ds1wm.h (limited to 'include') diff --git a/include/linux/mfd/ds1wm.h b/include/linux/mfd/ds1wm.h deleted file mode 100644 index 43dfca1c9702..000000000000 --- a/include/linux/mfd/ds1wm.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* MFD cell driver data for the DS1WM driver - * - * to be defined in the MFD device that is - * using this driver for one of his sub devices - */ - -struct ds1wm_driver_data { - int active_high; - int clock_rate; - /* in milliseconds, the amount of time to - * sleep following a reset pulse. Zero - * should work if your bus devices recover - * time respects the 1-wire spec since the - * ds1wm implements the precise timings of - * a reset pulse/presence detect sequence. - */ - unsigned int reset_recover_delay; - - /* Say 1 here for big endian Hardware - * (only relevant with bus-shift > 0 - */ - bool is_hw_big_endian; - - /* left shift of register number to get register address offsett. - * Only 0,1,2 allowed for 8,16 or 32 bit bus width respectively - */ - unsigned int bus_shift; -}; -- cgit v1.2.3 From 5e9629d0ae977d6f6916d7e519724804e95f0b07 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 27 Aug 2024 15:51:12 +0100 Subject: drivers/perf: arm_spe: Use perf_allow_kernel() for permissions Use perf_allow_kernel() for 'pa_enable' (physical addresses), 'pct_enable' (physical timestamps) and context IDs. This means that perf_event_paranoid is now taken into account and LSM hooks can be used, which is more consistent with other perf_event_open calls. For example PERF_SAMPLE_PHYS_ADDR uses perf_allow_kernel() rather than just perfmon_capable(). This also indirectly fixes the following error message which is misleading because perf_event_paranoid is not taken into account by perfmon_capable(): $ perf record -e arm_spe/pa_enable/ Error: Access to performance monitoring and observability operations is limited. Consider adjusting /proc/sys/kernel/perf_event_paranoid setting ... Suggested-by: Al Grant Signed-off-by: James Clark Link: https://lore.kernel.org/r/20240827145113.1224604-1-james.clark@linaro.org Link: https://lore.kernel.org/all/20240807120039.GD37996@noisy.programming.kicks-ass.net/ Signed-off-by: Will Deacon --- include/linux/perf_event.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1a8942277dda..e336306b8c08 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1602,13 +1602,7 @@ static inline int perf_is_paranoid(void) return sysctl_perf_event_paranoid > -1; } -static inline int perf_allow_kernel(struct perf_event_attr *attr) -{ - if (sysctl_perf_event_paranoid > 1 && !perfmon_capable()) - return -EACCES; - - return security_perf_event_open(attr, PERF_SECURITY_KERNEL); -} +int perf_allow_kernel(struct perf_event_attr *attr); static inline int perf_allow_cpu(struct perf_event_attr *attr) { -- cgit v1.2.3 From 6c17c7d5936e6af6a5bda9f9de98a5e2ee6e8a6f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 7 Aug 2024 15:19:20 -0300 Subject: iommu: Allow ATS to work on VFs when the PF uses IDENTITY PCI ATS has a global Smallest Translation Unit field that is located in the PF but shared by all of the VFs. The expectation is that the STU will be set to the root port's global STU capability which is driven by the IO page table configuration of the iommu HW. Today it becomes set when the iommu driver first enables ATS. Thus, to enable ATS on the VF, the PF must have already had the correct STU programmed, even if ATS is off on the PF. Unfortunately the PF only programs the STU when the PF enables ATS. The iommu drivers tend to leave ATS disabled when IDENTITY translation is being used. Thus we can get into a state where the PF is setup to use IDENTITY with the DMA API while the VF would like to use VFIO with a PAGING domain and have ATS turned on. This fails because the PF never loaded a PAGING domain and so it never setup the STU, and the VF can't do it. The simplest solution is to have the iommu driver set the ATS STU when it probes the device. This way the ATS STU is loaded immediately at boot time to all PFs and there is no issue when a VF comes to use it. Add a new call pci_prepare_ats() which should be called by iommu drivers in their probe_device() op for every PCI device if the iommu driver supports ATS. This will setup the STU based on whatever page size capability the iommu HW has. Signed-off-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/0-v1-0fb4d2ab6770+7e706-ats_vf_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/pci-ats.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index df54cd5b15db..0e8b74e63767 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -8,6 +8,7 @@ /* Address Translation Service */ bool pci_ats_supported(struct pci_dev *dev); int pci_enable_ats(struct pci_dev *dev, int ps); +int pci_prepare_ats(struct pci_dev *dev, int ps); void pci_disable_ats(struct pci_dev *dev); int pci_ats_queue_depth(struct pci_dev *dev); int pci_ats_page_aligned(struct pci_dev *dev); @@ -16,6 +17,8 @@ static inline bool pci_ats_supported(struct pci_dev *d) { return false; } static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; } +static inline int pci_prepare_ats(struct pci_dev *dev, int ps) +{ return -ENODEV; } static inline void pci_disable_ats(struct pci_dev *d) { } static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; } -- cgit v1.2.3 From 5c40e050e6ac0218af7c520095729d440cc87e6b Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 29 Aug 2024 15:06:40 +0200 Subject: fs: drop GFP_NOFAIL mode from alloc_page_buffers There is only one called of alloc_page_buffers and it doesn't require __GFP_NOFAIL so drop this allocation mode. Signed-off-by: Michal Hocko Link: https://lore.kernel.org/r/20240829130640.1397970-1-mhocko@kernel.org Acked-by: Song Liu Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/buffer_head.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 14acf1bbe0ce..7e903457967a 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -199,8 +199,7 @@ void folio_set_bh(struct buffer_head *bh, struct folio *folio, unsigned long offset); struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size, gfp_t gfp); -struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, - bool retry); +struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size); struct buffer_head *create_empty_buffers(struct folio *folio, unsigned long blocksize, unsigned long b_state); void end_buffer_read_sync(struct buffer_head *bh, int uptodate); -- cgit v1.2.3 From 1a5caec7f80ca2e659c03f45378ee26915f4eda2 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 30 Aug 2024 07:35:12 -0700 Subject: regulator: core: Stub devm_regulator_bulk_get_const() if !CONFIG_REGULATOR When adding devm_regulator_bulk_get_const() I missed adding a stub for when CONFIG_REGULATOR is not enabled. Under certain conditions (like randconfig testing) this can cause the compiler to reports errors like: error: implicit declaration of function 'devm_regulator_bulk_get_const'; did you mean 'devm_regulator_bulk_get_enable'? Add the stub. Fixes: 1de452a0edda ("regulator: core: Allow drivers to define their init data as const") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408301813.TesFuSbh-lkp@intel.com/ Cc: Neil Armstrong Signed-off-by: Douglas Anderson Link: https://patch.msgid.link/20240830073511.1.Ib733229a8a19fad8179213c05e1af01b51e42328@changeid Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index d986ec13092e..b9ce521910a0 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -452,6 +452,14 @@ static inline int of_regulator_bulk_get_all(struct device *dev, struct device_no return 0; } +static inline int devm_regulator_bulk_get_const( + struct device *dev, int num_consumers, + const struct regulator_bulk_data *in_consumers, + struct regulator_bulk_data **out_consumers) +{ + return 0; +} + static inline int regulator_bulk_enable(int num_consumers, struct regulator_bulk_data *consumers) { -- cgit v1.2.3 From a06c3fad49a50d5d5eb078f93e70f4d3eca5d5a5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Aug 2024 14:01:45 +0100 Subject: drivers/virt: pkvm: Add initial support for running as a protected guest Implement a pKVM protected guest driver to probe the presence of pKVM and determine the memory protection granule using the HYP_MEMINFO hypercall. Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240830130150.8568-3-will@kernel.org Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 083f85653716..16b6dcc54e02 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -115,6 +115,7 @@ /* KVM "vendor specific" services */ #define ARM_SMCCC_KVM_FUNC_FEATURES 0 #define ARM_SMCCC_KVM_FUNC_PTP 1 +#define ARM_SMCCC_KVM_FUNC_HYP_MEMINFO 2 #define ARM_SMCCC_KVM_FUNC_FEATURES_2 127 #define ARM_SMCCC_KVM_NUM_FUNCS 128 @@ -137,6 +138,12 @@ ARM_SMCCC_OWNER_VENDOR_HYP, \ ARM_SMCCC_KVM_FUNC_PTP) +#define ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_HYP_MEMINFO) + /* ptp_kvm counter type ID */ #define KVM_PTP_VIRT_COUNTER 0 #define KVM_PTP_PHYS_COUNTER 1 -- cgit v1.2.3 From ebc59b120c588156feb7ce194a9636584ced18ba Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Aug 2024 14:01:47 +0100 Subject: drivers/virt: pkvm: Hook up mem_encrypt API using pKVM hypercalls If we detect the presence of pKVM's SHARE and UNSHARE hypercalls, then register a backend implementation of the mem_encrypt API so that things like DMA buffers can be shared appropriately with the host. Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240830130150.8568-5-will@kernel.org Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 16b6dcc54e02..9cb7c95920b0 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -116,6 +116,8 @@ #define ARM_SMCCC_KVM_FUNC_FEATURES 0 #define ARM_SMCCC_KVM_FUNC_PTP 1 #define ARM_SMCCC_KVM_FUNC_HYP_MEMINFO 2 +#define ARM_SMCCC_KVM_FUNC_MEM_SHARE 3 +#define ARM_SMCCC_KVM_FUNC_MEM_UNSHARE 4 #define ARM_SMCCC_KVM_FUNC_FEATURES_2 127 #define ARM_SMCCC_KVM_NUM_FUNCS 128 @@ -144,6 +146,18 @@ ARM_SMCCC_OWNER_VENDOR_HYP, \ ARM_SMCCC_KVM_FUNC_HYP_MEMINFO) +#define ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_MEM_SHARE) + +#define ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_MEM_UNSHARE) + /* ptp_kvm counter type ID */ #define KVM_PTP_VIRT_COUNTER 0 #define KVM_PTP_PHYS_COUNTER 1 -- cgit v1.2.3 From 0f12694958001c96bda811473fdb23f333c6d3ca Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Aug 2024 14:01:49 +0100 Subject: drivers/virt: pkvm: Intercept ioremap using pKVM MMIO_GUARD hypercall Hook up pKVM's MMIO_GUARD hypercall so that ioremap() and friends will register the target physical address as MMIO with the hypervisor, allowing guest exits to that page to be emulated by the host with full syndrome information. Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240830130150.8568-7-will@kernel.org Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 9cb7c95920b0..e93c1f7cea70 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -118,6 +118,7 @@ #define ARM_SMCCC_KVM_FUNC_HYP_MEMINFO 2 #define ARM_SMCCC_KVM_FUNC_MEM_SHARE 3 #define ARM_SMCCC_KVM_FUNC_MEM_UNSHARE 4 +#define ARM_SMCCC_KVM_FUNC_MMIO_GUARD 7 #define ARM_SMCCC_KVM_FUNC_FEATURES_2 127 #define ARM_SMCCC_KVM_NUM_FUNCS 128 @@ -158,6 +159,12 @@ ARM_SMCCC_OWNER_VENDOR_HYP, \ ARM_SMCCC_KVM_FUNC_MEM_UNSHARE) +#define ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_MMIO_GUARD) + /* ptp_kvm counter type ID */ #define KVM_PTP_VIRT_COUNTER 0 #define KVM_PTP_PHYS_COUNTER 1 -- cgit v1.2.3 From 21be9f7110d4c044c2b49bafbd7246335f236221 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Aug 2024 14:01:50 +0100 Subject: arm64: smccc: Reserve block of KVM "vendor" services for pKVM hypercalls pKVM relies on hypercalls to expose services such as memory sharing to protected guests. Tentatively allocate a block of 58 hypercalls (i.e. fill the remaining space in the first 64 function IDs) for pKVM usage, as future extensions such as pvIOMMU support, range-based memory sharing and validation of assigned devices will require additional services. Suggested-by: Marc Zyngier Link: https://lore.kernel.org/r/86a5h5yg5y.wl-maz@kernel.org Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240830130150.8568-8-will@kernel.org Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 60 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index e93c1f7cea70..f59099a213d0 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -115,10 +115,70 @@ /* KVM "vendor specific" services */ #define ARM_SMCCC_KVM_FUNC_FEATURES 0 #define ARM_SMCCC_KVM_FUNC_PTP 1 +/* Start of pKVM hypercall range */ #define ARM_SMCCC_KVM_FUNC_HYP_MEMINFO 2 #define ARM_SMCCC_KVM_FUNC_MEM_SHARE 3 #define ARM_SMCCC_KVM_FUNC_MEM_UNSHARE 4 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_5 5 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_6 6 #define ARM_SMCCC_KVM_FUNC_MMIO_GUARD 7 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_8 8 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_9 9 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_10 10 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_11 11 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_12 12 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_13 13 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_14 14 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_15 15 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_16 16 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_17 17 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_18 18 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_19 19 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_20 20 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_21 21 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_22 22 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_23 23 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_24 24 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_25 25 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_26 26 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_27 27 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_28 28 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_29 29 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_30 30 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_31 31 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_32 32 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_33 33 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_34 34 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_35 35 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_36 36 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_37 37 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_38 38 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_39 39 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_40 40 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_41 41 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_42 42 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_43 43 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_44 44 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_45 45 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_46 46 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_47 47 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_48 48 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_49 49 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_50 50 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_51 51 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_52 52 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_53 53 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_54 54 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_55 55 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_56 56 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_57 57 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_58 58 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_59 59 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_60 60 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_61 61 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_62 62 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_63 63 +/* End of pKVM hypercall range */ #define ARM_SMCCC_KVM_FUNC_FEATURES_2 127 #define ARM_SMCCC_KVM_NUM_FUNCS 128 -- cgit v1.2.3 From 929c81ade6355b87097a2a4886c10750e68626cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 30 Aug 2024 11:45:57 +0200 Subject: fbdev: Introduce devm_register_framebuffer() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a device-managed variant of register_framebuffer() which automatically unregisters the framebuffer on device destruction. This can simplify the error handling and resource management in drivers. Signed-off-by: Thomas Weißschuh Signed-off-by: Helge Deller --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index db7d97b10964..abf6643ebcaf 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -601,6 +601,7 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf, /* fbmem.c */ extern int register_framebuffer(struct fb_info *fb_info); extern void unregister_framebuffer(struct fb_info *fb_info); +extern int devm_register_framebuffer(struct device *dev, struct fb_info *fb_info); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); extern void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx, u32 height, u32 shift_high, u32 shift_low, u32 mod); -- cgit v1.2.3 From d7eafed3223af19add14b67a390ec2b983d890e0 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 7 Aug 2024 14:04:58 +0100 Subject: drm/msm: Expose expanded UBWC config uapi This adds extra parameters that affect UBWC tiling that will be used by the Mesa implementation of VK_EXT_host_image_copy. Signed-off-by: Connor Abbott Patchwork: https://patchwork.freedesktop.org/patch/607401/ Signed-off-by: Rob Clark --- include/uapi/drm/msm_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 3fca72f73861..2377147b6af0 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -88,6 +88,8 @@ struct drm_msm_timespec { #define MSM_PARAM_VA_SIZE 0x0f /* RO: size of valid GPU iova range (bytes) */ #define MSM_PARAM_HIGHEST_BANK_BIT 0x10 /* RO */ #define MSM_PARAM_RAYTRACING 0x11 /* RO */ +#define MSM_PARAM_UBWC_SWIZZLE 0x12 /* RO */ +#define MSM_PARAM_MACROTILE_MODE 0x13 /* RO */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # -- cgit v1.2.3 From 9aee8262445d185960431e972e2d997e6aba3de0 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 26 Aug 2024 11:58:23 +0800 Subject: ARM: OMAP2+: Remove obsoleted declaration for gpmc_onenand_init The gpmc_onenand_init() have been removed since commit 2514830b8b8c ("ARM: OMAP2+: Remove gpmc-onenand"), and now it is useless, so remove it. Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20240826035823.4043171-1-cuigaosheng1@huawei.com Signed-off-by: Kevin Hilman --- include/linux/omap-gpmc.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index 082841908fe7..c9e3843d2dd5 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -84,13 +84,3 @@ extern void gpmc_read_settings_dt(struct device_node *np, struct gpmc_timings; struct omap_nand_platform_data; struct omap_onenand_platform_data; - -#if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2) -extern int gpmc_onenand_init(struct omap_onenand_platform_data *d); -#else -#define board_onenand_data NULL -static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d) -{ - return 0; -} -#endif -- cgit v1.2.3 From 8c2bd38b95f75f3d2a08c93e35303e26d480d24e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Aug 2024 14:46:39 +0000 Subject: icmp: change the order of rate limits ICMP messages are ratelimited : After the blamed commits, the two rate limiters are applied in this order: 1) host wide ratelimit (icmp_global_allow()) 2) Per destination ratelimit (inetpeer based) In order to avoid side-channels attacks, we need to apply the per destination check first. This patch makes the following change : 1) icmp_global_allow() checks if the host wide limit is reached. But credits are not yet consumed. This is deferred to 3) 2) The per destination limit is checked/updated. This might add a new node in inetpeer tree. 3) icmp_global_consume() consumes tokens if prior operations succeeded. This means that host wide ratelimit is still effective in keeping inetpeer tree small even under DDOS. As a bonus, I removed icmp_global.lock as the fast path can use a lock-free operation. Fixes: c0303efeab73 ("net: reduce cycles spend on ICMP replies that gets rate limited") Fixes: 4cdf507d5452 ("icmp: add a global rate limitation") Reported-by: Keyu Man Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Cc: Jesper Dangaard Brouer Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20240829144641.3880376-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index c5606cadb1a5..82248813619e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -795,6 +795,8 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) } bool icmp_global_allow(void); +void icmp_global_consume(void); + extern int sysctl_icmp_msgs_per_sec; extern int sysctl_icmp_msgs_burst; -- cgit v1.2.3 From b056b4cd9178f7a1d5d57f7b48b073c29729ddaa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Aug 2024 14:46:40 +0000 Subject: icmp: move icmp_global.credit and icmp_global.stamp to per netns storage Host wide ICMP ratelimiter should be per netns, to provide better isolation. Following patch in this series makes the sysctl per netns. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://patch.msgid.link/20240829144641.3880376-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 4 ++-- include/net/netns/ipv4.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 82248813619e..d3bca4e83979 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -794,8 +794,8 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0); } -bool icmp_global_allow(void); -void icmp_global_consume(void); +bool icmp_global_allow(struct net *net); +void icmp_global_consume(struct net *net); extern int sysctl_icmp_msgs_per_sec; extern int sysctl_icmp_msgs_burst; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 5fcd61ada622..54fe7c079fff 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -122,7 +122,8 @@ struct netns_ipv4 { u8 sysctl_icmp_errors_use_inbound_ifaddr; int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; - + atomic_t icmp_global_credit; + u32 icmp_global_stamp; u32 ip_rt_min_pmtu; int ip_rt_mtu_expires; int ip_rt_min_advmss; -- cgit v1.2.3 From f17bf505ff89595df5147755e51441632a5dc563 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Aug 2024 14:46:41 +0000 Subject: icmp: icmp_msgs_per_sec and icmp_msgs_burst sysctls become per netns Previous patch made ICMP rate limits per netns, it makes sense to allow each netns to change the associated sysctl. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://patch.msgid.link/20240829144641.3880376-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 3 --- include/net/netns/ipv4.h | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index d3bca4e83979..1ee472fa8b37 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -797,9 +797,6 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) bool icmp_global_allow(struct net *net); void icmp_global_consume(struct net *net); -extern int sysctl_icmp_msgs_per_sec; -extern int sysctl_icmp_msgs_burst; - #ifdef CONFIG_PROC_FS int ip_misc_proc_init(void); #endif diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 54fe7c079fff..276f622f3516 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -122,6 +122,8 @@ struct netns_ipv4 { u8 sysctl_icmp_errors_use_inbound_ifaddr; int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; + int sysctl_icmp_msgs_per_sec; + int sysctl_icmp_msgs_burst; atomic_t icmp_global_credit; u32 icmp_global_stamp; u32 ip_rt_min_pmtu; -- cgit v1.2.3 From ddc94d0b17e8ea8179ecbbefacac3fba0fb77265 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Aug 2024 10:01:43 -0700 Subject: dma-buf: Split out dma fence array create into alloc and arm functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Useful to preallocate dma fence array and then arm in path of reclaim or a dma fence. v2: - s/arm/init (Christian) - Drop !array warn (Christian) v3: - Fix kernel doc typos (dim) Cc: Sumit Semwal Cc: Christian König Signed-off-by: Matthew Brost Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240826170144.2492062-2-matthew.brost@intel.com --- include/linux/dma-fence-array.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 29c5650c1038..079b3dec0a16 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -79,6 +79,12 @@ to_dma_fence_array(struct dma_fence *fence) for (index = 0, fence = dma_fence_array_first(head); fence; \ ++(index), fence = dma_fence_array_next(head, index)) +struct dma_fence_array *dma_fence_array_alloc(int num_fences); +void dma_fence_array_init(struct dma_fence_array *array, + int num_fences, struct dma_fence **fences, + u64 context, unsigned seqno, + bool signal_on_any); + struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context, unsigned seqno, -- cgit v1.2.3 From 1abab1ba0775036bb67c6c57945c637be644c04f Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Fri, 30 Aug 2024 10:02:28 +0000 Subject: cgroup/cpuset: guard cpuset-v1 code under CONFIG_CPUSETS_V1 This patch introduces CONFIG_CPUSETS_V1 and guard cpuset-v1 code under CONFIG_CPUSETS_V1. The default value of CONFIG_CPUSETS_V1 is N, so that user who adopted v2 don't have 'pay' for cpuset v1. Signed-off-by: Chen Ridong Acked-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cpuset.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 2a6981eeebf8..835e7b793f6a 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -99,6 +99,7 @@ static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, const struct task_struct *tsk2); +#ifdef CONFIG_CPUSETS_V1 #define cpuset_memory_pressure_bump() \ do { \ if (cpuset_memory_pressure_enabled) \ @@ -106,6 +107,9 @@ extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, } while (0) extern int cpuset_memory_pressure_enabled; extern void __cpuset_memory_pressure_bump(void); +#else +static inline void cpuset_memory_pressure_bump(void) { } +#endif extern void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task); -- cgit v1.2.3 From 7eba264a3c102b6c006aa429f7eb25d2b8533da7 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 29 Aug 2024 17:10:49 +0100 Subject: mac802154: Correct spelling in mac802154.h Correct spelling in mac802154.h. As reported by codespell. Signed-off-by: Simon Horman Message-ID: <20240829-wpan-spell-v1-1-799d840e02c4@kernel.org> Signed-off-by: Stefan Schmidt --- include/net/mac802154.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 4a3a9de9da73..1b5488fa2ff0 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -140,7 +140,7 @@ enum ieee802154_hw_flags { * * xmit_sync: * Handler that 802.15.4 module calls for each transmitted frame. - * skb cntains the buffer starting from the IEEE 802.15.4 header. + * skb contains the buffer starting from the IEEE 802.15.4 header. * The low-level driver should send the frame based on available * configuration. This is called by a workqueue and useful for * synchronous 802.15.4 drivers. @@ -152,7 +152,7 @@ enum ieee802154_hw_flags { * * xmit_async: * Handler that 802.15.4 module calls for each transmitted frame. - * skb cntains the buffer starting from the IEEE 802.15.4 header. + * skb contains the buffer starting from the IEEE 802.15.4 header. * The low-level driver should send the frame based on available * configuration. * This function should return zero or negative errno. -- cgit v1.2.3 From 3682c302e72d71abeb17a81a6d29f281b22928e2 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 29 Aug 2024 17:10:50 +0100 Subject: ieee802154: Correct spelling in nl802154.h Correct spelling in nl802154.h. As reported by codespell. Signed-off-by: Simon Horman Message-ID: <20240829-wpan-spell-v1-2-799d840e02c4@kernel.org> Signed-off-by: Stefan Schmidt --- include/net/nl802154.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 4c752f799957..a994dea74596 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -192,7 +192,7 @@ enum nl802154_iftype { * @NL802154_CAP_ATTR_TX_POWERS: a nested attribute for * nl802154_wpan_phy_tx_power * @NL802154_CAP_ATTR_MIN_CCA_ED_LEVEL: minimum value for cca_ed_level - * @NL802154_CAP_ATTR_MAX_CCA_ED_LEVEL: maxmimum value for cca_ed_level + * @NL802154_CAP_ATTR_MAX_CCA_ED_LEVEL: maximum value for cca_ed_level * @NL802154_CAP_ATTR_CCA_MODES: nl802154_cca_modes flags * @NL802154_CAP_ATTR_CCA_OPTS: nl802154_cca_opts flags * @NL802154_CAP_ATTR_MIN_MINBE: minimum of minbe value -- cgit v1.2.3 From c898f6d7b093bd71e66569cd6797c87d4056f44b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 26 Aug 2024 15:47:30 -0400 Subject: Bluetooth: hci_sync: Introduce hci_cmd_sync_run/hci_cmd_sync_run_once This introduces hci_cmd_sync_run/hci_cmd_sync_run_once which acts like hci_cmd_sync_queue/hci_cmd_sync_queue_once but runs immediately when already on hdev->cmd_sync_work context. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 75e052909b5f..f3052cb252ef 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -73,6 +73,10 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); +int hci_cmd_sync_run(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); +int hci_cmd_sync_run_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); struct hci_cmd_sync_work_entry * hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); -- cgit v1.2.3 From 532f8bcd1c2c4e8112f62e1922fd1703bc0ffce0 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 27 Aug 2024 14:37:22 -0400 Subject: Revert "Bluetooth: MGMT/SMP: Fix address type when using SMP over BREDR/LE" This reverts commit 59b047bc98084f8af2c41483e4d68a5adf2fa7f7 which breaks compatibility with commands like: bluetoothd[46328]: @ MGMT Command: Load.. (0x0013) plen 74 {0x0001} [hci0] Keys: 2 BR/EDR Address: C0:DC:DA:A5:E5:47 (Samsung Electronics Co.,Ltd) Key type: Authenticated key from P-256 (0x03) Central: 0x00 Encryption size: 16 Diversifier[2]: 0000 Randomizer[8]: 0000000000000000 Key[16]: 6ed96089bd9765be2f2c971b0b95f624 LE Address: D7:2A:DE:1E:73:A2 (Static) Key type: Unauthenticated key from P-256 (0x02) Central: 0x00 Encryption size: 16 Diversifier[2]: 0000 Randomizer[8]: 0000000000000000 Key[16]: 87dd2546ededda380ffcdc0a8faa4597 @ MGMT Event: Command Status (0x0002) plen 3 {0x0001} [hci0] Load Long Term Keys (0x0013) Status: Invalid Parameters (0x0d) Cc: stable@vger.kernel.org Link: https://github.com/bluez/bluez/issues/875 Fixes: 59b047bc9808 ("Bluetooth: MGMT/SMP: Fix address type when using SMP over BREDR/LE") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index e449dba698f3..1a32e602630e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -186,7 +186,6 @@ struct blocked_key { struct smp_csrk { bdaddr_t bdaddr; u8 bdaddr_type; - u8 link_type; u8 type; u8 val[16]; }; @@ -196,7 +195,6 @@ struct smp_ltk { struct rcu_head rcu; bdaddr_t bdaddr; u8 bdaddr_type; - u8 link_type; u8 authenticated; u8 type; u8 enc_size; @@ -211,7 +209,6 @@ struct smp_irk { bdaddr_t rpa; bdaddr_t bdaddr; u8 addr_type; - u8 link_type; u8 val[16]; }; @@ -219,8 +216,6 @@ struct link_key { struct list_head list; struct rcu_head rcu; bdaddr_t bdaddr; - u8 bdaddr_type; - u8 link_type; u8 type; u8 val[HCI_LINK_KEY_SIZE]; u8 pin_len; -- cgit v1.2.3 From ff95cb5e521b60d046e6571ada697a0977b189c3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 29 Aug 2024 09:54:51 +0300 Subject: ipv4: Unmask upper DSCP bits in ip_sock_rt_tos() The function is used to read the DS field that was stored in IPv4 sockets via the IP_TOS socket option so that it could be used to initialize the flowi4_tos field before resolving an output route. Unmask the upper DSCP bits so that in the future the output route lookup could be performed according to the full DSCP value. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: David S. Miller --- include/net/route.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index 93833cfe9c96..b896f086ec8e 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -45,7 +46,7 @@ static inline __u8 ip_sock_rt_scope(const struct sock *sk) static inline __u8 ip_sock_rt_tos(const struct sock *sk) { - return RT_TOS(READ_ONCE(inet_sk(sk)->tos)); + return READ_ONCE(inet_sk(sk)->tos) & INET_DSCP_MASK; } struct ip_tunnel_info; -- cgit v1.2.3 From 356d054a4967e6190ee558b8e839fad3e9db35ec Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 29 Aug 2024 09:54:52 +0300 Subject: ipv4: Unmask upper DSCP bits in get_rttos() The function is used by a few socket types to retrieve the TOS value with which to perform the FIB lookup for packets sent through the socket (flowi4_tos). If a DS field was passed using the IP_TOS control message, then it is used. Otherwise the one specified via the IP_TOS socket option. Unmask the upper DSCP bits so that in the future the lookup could be performed according to the full DSCP value. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: David S. Miller --- include/net/ip.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 1ee472fa8b37..d92d3bc3ec0e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -33,6 +33,7 @@ #include #include #include +#include #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ #define IPV4_MIN_MTU 68 /* RFC 791 */ @@ -258,7 +259,9 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet, static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet) { - return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(READ_ONCE(inet->tos)); + u8 dsfield = ipc->tos != -1 ? ipc->tos : READ_ONCE(inet->tos); + + return dsfield & INET_DSCP_MASK; } /* datagram.c */ -- cgit v1.2.3 From b261b2c6c18bcb81d69de011fd991bdfb97259f7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 29 Aug 2024 09:54:54 +0300 Subject: xfrm: Unmask upper DSCP bits in xfrm_get_tos() The function returns a value that is used to initialize 'flowi4_tos' before being passed to the FIB lookup API in the following call chain: xfrm_bundle_create() tos = xfrm_get_tos(fl, family) xfrm_dst_lookup(..., tos, ...) __xfrm_dst_lookup(..., tos, ...) xfrm4_dst_lookup(..., tos, ...) __xfrm4_dst_lookup(..., tos, ...) fl4->flowi4_tos = tos __ip_route_output_key(net, fl4) Unmask the upper DSCP bits so that in the future the output route lookup could be performed according to the full DSCP value. Remove IPTOS_RT_MASK since it is no longer used. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: David S. Miller --- include/net/route.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index b896f086ec8e..1789f1e6640b 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -266,8 +266,6 @@ static inline void ip_rt_put(struct rtable *rt) dst_release(&rt->dst); } -#define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3) - extern const __u8 ip_tos2prio[16]; static inline char rt_tos2priority(u8 tos) -- cgit v1.2.3 From 0328947c50324cf4b2d8b181bf948edb8101f59f Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 28 Aug 2024 21:16:15 +0530 Subject: PCI: endpoint: Assign PCI domain number for endpoint controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now, PCI endpoint subsystem doesn't assign PCI domain number for the PCI endpoint controllers. But this domain number could be useful to the EPC drivers to uniquely identify each controller based on the hardware instance when there are multiple ones present in an SoC (even multiple RC/EP). So let's make use of the existing pci_bus_find_domain_nr() API to allocate domain numbers based on either devicetree (linux,pci-domain) property or dynamic domain number allocation scheme. It should be noted that the domain number allocated by this API will be based on both RC and EP controllers in a SoC. If the 'linux,pci-domain' DT property is present, then the domain number represents the actual hardware instance of the PCI endpoint controller. If not, then the domain number will be allocated based on the PCI EP/RC controller probe order. If the architecture doesn't support CONFIG_PCI_DOMAINS_GENERIC (rare), then currently a warning is thrown to indicate that the architecture specific implementation is needed. Link: https://lore.kernel.org/linux-pci/20240828-pci-qcom-hotplug-v4-5-263a385fbbcb@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Reviewed-by: Frank Li --- include/linux/pci-epc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 85bdf2adb760..8e3dcac55dcd 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -128,6 +128,7 @@ struct pci_epc_mem { * @group: configfs group representing the PCI EPC device * @lock: mutex to protect pci_epc ops * @function_num_map: bitmap to manage physical function number + * @domain_nr: PCI domain number of the endpoint controller * @init_complete: flag to indicate whether the EPC initialization is complete * or not */ @@ -145,6 +146,7 @@ struct pci_epc { /* mutex to protect against concurrent access of EP controller */ struct mutex lock; unsigned long function_num_map; + int domain_nr; bool init_complete; }; -- cgit v1.2.3 From cef48236dfe55fa266d505e8a497963a7bc5ef2a Mon Sep 17 00:00:00 2001 From: Chen Hanxiao Date: Thu, 18 Jul 2024 15:06:16 +0800 Subject: NFS: trace: show TIMEDOUT instead of 0x6e __nfs_revalidate_inode may return ETIMEDOUT. print symbol of ETIMEDOUT in nfs trace: before: cat-5191 [005] 119.331127: nfs_revalidate_inode_exit: error=-110 (0x6e) after: cat-1738 [004] 44.365509: nfs_revalidate_inode_exit: error=-110 (TIMEDOUT) Signed-off-by: Chen Hanxiao Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/trace/misc/nfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h index 7b221d51133a..c82233e950ac 100644 --- a/include/trace/misc/nfs.h +++ b/include/trace/misc/nfs.h @@ -51,6 +51,7 @@ TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); { NFSERR_IO, "IO" }, \ { NFSERR_NXIO, "NXIO" }, \ { ECHILD, "CHILD" }, \ + { ETIMEDOUT, "TIMEDOUT" }, \ { NFSERR_EAGAIN, "AGAIN" }, \ { NFSERR_ACCES, "ACCES" }, \ { NFSERR_EXIST, "EXIST" }, \ -- cgit v1.2.3 From 4ed9ef32606386efc562bada891d7baa16fc46b4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 15 Jul 2024 17:14:14 +1000 Subject: lockd: discard nlmsvc_timeout nlmsvc_timeout always has the same value as (nlm_timeout * HZ), so use that in the one place that nlmsvc_timeout is used. In truth it *might* not always be the same as nlmsvc_timeout is only set when lockd is started while nlm_timeout can be set at anytime via sysctl. I think this difference it not helpful so removing it is good. Also remove the test for nlm_timout being 0. This is not possible - unless a module parameter is used to set the minimum timeout to 0, and if that happens then it probably should be honoured. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/lockd/lockd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 1b95fe31051f..61c4b9c41904 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -200,7 +200,7 @@ extern const struct svc_procedure nlmsvc_procedures[24]; extern const struct svc_procedure nlmsvc_procedures4[24]; #endif extern int nlmsvc_grace_period; -extern unsigned long nlmsvc_timeout; +extern unsigned long nlm_timeout; extern bool nsm_use_hostnames; extern u32 nsm_local_state; -- cgit v1.2.3 From f2b27e1d72527f94f030b6356b3187576e60885b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 15 Jul 2024 17:14:15 +1000 Subject: SUNRPC: make various functions static, or not exported. Various functions are only used within the sunrpc module, and several are only use in the one file. So clean up: These are marked static, and any EXPORT is removed. svc_rcpb_setup() svc_rqst_alloc() svc_rqst_free() - also moved before first use svc_rpcbind_set_version() svc_drop() - also moved to svc.c These are now not EXPORTed, but are not static. svc_authenticate() svc_sock_update_bufs() Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 9 --------- include/linux/sunrpc/svcauth.h | 1 - include/linux/sunrpc/svcsock.h | 2 -- 3 files changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index a7d0406b9ef5..e4fa25fafa97 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -401,17 +401,13 @@ struct svc_procedure { */ int sunrpc_set_pool_mode(const char *val); int sunrpc_get_pool_mode(char *val, size_t size); -int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, int (*threadfn)(void *data)); -struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, - struct svc_pool *pool, int node); bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page); void svc_rqst_release_pages(struct svc_rqst *rqstp); -void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *prog, struct svc_stat *stats, @@ -446,11 +442,6 @@ int svc_generic_rpcbind_set(struct net *net, u32 version, int family, unsigned short proto, unsigned short port); -int svc_rpcbind_set_version(struct net *net, - const struct svc_program *progp, - u32 version, int family, - unsigned short proto, - unsigned short port); #define RPC_MAX_ADDRBUFLEN (63U) diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 61c455f1e1f5..63cf6fb26dcc 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -151,7 +151,6 @@ struct auth_ops { struct svc_xprt; -extern enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp); extern rpc_authflavor_t svc_auth_flavor(struct svc_rqst *rqstp); extern int svc_authorise(struct svc_rqst *rqstp); extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp); diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 7c78ec6356b9..bf45d9e8492a 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -58,8 +58,6 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk) */ void svc_recv(struct svc_rqst *rqstp); void svc_send(struct svc_rqst *rqstp); -void svc_drop(struct svc_rqst *); -void svc_sock_update_bufs(struct svc_serv *serv); int svc_addsock(struct svc_serv *serv, struct net *net, const int fd, char *name_return, const size_t len, const struct cred *cred); -- cgit v1.2.3 From 5fe690a5946442f7f2d08448341dd621367f80bc Mon Sep 17 00:00:00 2001 From: Matthew Cassell Date: Mon, 22 Jul 2024 17:13:16 +0000 Subject: mm: add node_reclaim successes to VM event counters /proc/vmstat currently shows the number of node_reclaim() failures when vm.zone_reclaim_mode is set appropriately. It would be convenient to have the number of successes right next to zone_reclaim_failed (similar to compaction and migration). While just a trivially addition to the vmstat file. It was helpful during benchmarking to not have to probe node_reclaim() to observe the success/failure ratio. Link: https://lkml.kernel.org/r/20240722171316.7517-1-mcassell411@gmail.com Signed-off-by: Matthew Cassell Cc: Domenico Cerasuolo Cc: "Huang, Ying" Cc: Li Zhijian Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/vm_event_item.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 747943bc8cc2..6fff8ed3b1fd 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -50,6 +50,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGSTEAL_ANON, PGSTEAL_FILE, #ifdef CONFIG_NUMA + PGSCAN_ZONE_RECLAIM_SUCCESS, PGSCAN_ZONE_RECLAIM_FAILED, #endif PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL, -- cgit v1.2.3 From 3ddc2fefe6f34ec870fcb22f4cd656e53db079f2 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 22 Jul 2024 18:29:23 +0200 Subject: mm: vmalloc: implement vrealloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "Align kvrealloc() with krealloc()", v2. Besides the obvious (and desired) difference between krealloc() and kvrealloc(), there is some inconsistency in their function signatures and behavior: - krealloc() frees the memory when the requested size is zero, whereas kvrealloc() simply returns a pointer to the existing allocation. - krealloc() behaves like kmalloc() if a NULL pointer is passed, whereas kvrealloc() does not accept a NULL pointer at all and, if passed, would fault instead. - krealloc() is self-contained, whereas kvrealloc() relies on the caller to provide the size of the previous allocation. Inconsistent behavior throughout allocation APIs is error prone, hence make kvrealloc() behave like krealloc(), which seems superior in all mentioned aspects. In order to be able to get rid of kvrealloc()'s oldsize parameter, introduce vrealloc() and make use of it in kvrealloc(). Making use of vrealloc() in kvrealloc() also provides oppertunities to grow (and shrink) allocations more efficiently. For instance, vrealloc() can be optimized to allocate and map additional pages to grow the allocation or unmap and free unused pages to shrink the allocation. Besides the above, those functions are required by Rust's allocator abstractons [1] (rework based on this series in [2]). With `Vec` or `KVec` respectively, potentially growing (and shrinking) data structures are rather common. [1] https://lore.kernel.org/lkml/20240704170738.3621-1-dakr@redhat.com/ [2] https://git.kernel.org/pub/scm/linux/kernel/git/dakr/linux.git/log/?h=rust/mm This patch (of 2): Implement vrealloc() analogous to krealloc(). Currently, krealloc() requires the caller to pass the size of the previous memory allocation, which, instead, should be self-contained. We attempt to fix this in a subsequent patch which, in order to do so, requires vrealloc(). Besides that, we need realloc() functions for kernel allocators in Rust too. With `Vec` or `KVec` respectively, potentially growing (and shrinking) data structures are rather common. [dakr@kernel.org: fix missing nommu implementation] Link: https://lkml.kernel.org/r/20240725141227.13954-1-dakr@kernel.org [dakr@kernel.org: document concurrency restrictions] Link: https://lkml.kernel.org/r/20240725125442.4957-1-dakr@kernel.org [dakr@kernel.org: consider spare memory for __GFP_ZERO] Link: https://lkml.kernel.org/r/20240730185049.6244-3-dakr@kernel.org [dakr@kernel.org: properly document __GFP_ZERO behavior] Link: https://lkml.kernel.org/r/20240730185049.6244-4-dakr@kernel.org Link: https://lkml.kernel.org/r/20240722163111.4766-1-dakr@kernel.org Link: https://lkml.kernel.org/r/20240722163111.4766-2-dakr@kernel.org Signed-off-by: Danilo Krummrich Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Chandan Babu R Cc: Christian König Cc: Christoph Hellwig Cc: Christoph Lameter Cc: David Rientjes Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joonsoo Kim Cc: Kees Cook Cc: Marc Zyngier Cc: Michael Ellerman Cc: Miguel Ojeda Cc: Oliver Upton Cc: Pekka Enberg Cc: Roman Gushchin Cc: Uladzislau Rezki Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- include/linux/vmalloc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index e4a631ec430b..ad2ce7a6ab7a 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -189,6 +189,10 @@ extern void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1 extern void *vcalloc_noprof(size_t n, size_t size) __alloc_size(1, 2); #define vcalloc(...) alloc_hooks(vcalloc_noprof(__VA_ARGS__)) +void * __must_check vrealloc_noprof(const void *p, size_t size, gfp_t flags) + __realloc_size(2); +#define vrealloc(...) alloc_hooks(vrealloc_noprof(__VA_ARGS__)) + extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); -- cgit v1.2.3 From 590b9d576caec6b4c46bba49ed36223a399c3fc5 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 22 Jul 2024 18:29:24 +0200 Subject: mm: kvmalloc: align kvrealloc() with krealloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Besides the obvious (and desired) difference between krealloc() and kvrealloc(), there is some inconsistency in their function signatures and behavior: - krealloc() frees the memory when the requested size is zero, whereas kvrealloc() simply returns a pointer to the existing allocation. - krealloc() behaves like kmalloc() if a NULL pointer is passed, whereas kvrealloc() does not accept a NULL pointer at all and, if passed, would fault instead. - krealloc() is self-contained, whereas kvrealloc() relies on the caller to provide the size of the previous allocation. Inconsistent behavior throughout allocation APIs is error prone, hence make kvrealloc() behave like krealloc(), which seems superior in all mentioned aspects. Besides that, implementing kvrealloc() by making use of krealloc() and vrealloc() provides oppertunities to grow (and shrink) allocations more efficiently. For instance, vrealloc() can be optimized to allocate and map additional pages to grow the allocation or unmap and free unused pages to shrink the allocation. [dakr@kernel.org: document concurrency restrictions] Link: https://lkml.kernel.org/r/20240725125442.4957-1-dakr@kernel.org [dakr@kernel.org: disable KASAN when switching to vmalloc] Link: https://lkml.kernel.org/r/20240730185049.6244-2-dakr@kernel.org [dakr@kernel.org: properly document __GFP_ZERO behavior] Link: https://lkml.kernel.org/r/20240730185049.6244-5-dakr@kernel.org Link: https://lkml.kernel.org/r/20240722163111.4766-3-dakr@kernel.org Signed-off-by: Danilo Krummrich Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Chandan Babu R Cc: Christian König Cc: Christoph Hellwig Cc: Christoph Lameter Cc: David Rientjes Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joonsoo Kim Cc: Kees Cook Cc: Marc Zyngier Cc: Michael Ellerman Cc: Miguel Ojeda Cc: Oliver Upton Cc: Pekka Enberg Cc: Roman Gushchin Cc: Uladzislau Rezki Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- include/linux/slab.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index eb2bf4629157..c9cb42203183 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -841,8 +841,8 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) #define kvcalloc_node(...) alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__)) #define kvcalloc(...) alloc_hooks(kvcalloc_noprof(__VA_ARGS__)) -extern void *kvrealloc_noprof(const void *p, size_t oldsize, size_t newsize, gfp_t flags) - __realloc_size(3); +void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags) + __realloc_size(2); #define kvrealloc(...) alloc_hooks(kvrealloc_noprof(__VA_ARGS__)) extern void kvfree(const void *addr); -- cgit v1.2.3 From d58a2a581f132529eefac5377676011562b631b8 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 22 Jul 2024 13:43:18 +0800 Subject: mm: shmem: rename shmem_is_huge() to shmem_huge_global_enabled() shmem_is_huge() is now used to check if the top-level huge page is enabled, thus rename it to reflect its usage. Link: https://lkml.kernel.org/r/da53296e0ab6359aa083561d9dc01e4223d60fbe.1721626645.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Reviewed-by: Ryan Roberts Acked-by: David Hildenbrand Cc: Barry Song <21cnbao@gmail.com> Cc: Hugh Dickins Cc: Lance Yang Cc: Matthew Wilcox (Oracle) Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 1d06b1e5408a..405ee8d3589a 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -111,14 +111,15 @@ extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); #ifdef CONFIG_TRANSPARENT_HUGEPAGE -extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, - struct mm_struct *mm, unsigned long vm_flags); +extern bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index, bool shmem_huge_force, + struct mm_struct *mm, unsigned long vm_flags); unsigned long shmem_allowable_huge_orders(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, bool global_huge); #else -static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, - struct mm_struct *mm, unsigned long vm_flags) +static __always_inline bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index, + bool shmem_huge_force, struct mm_struct *mm, + unsigned long vm_flags) { return false; } -- cgit v1.2.3 From 6beeab870e70b2d4f49baf6c6be9da1b61c169f8 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 22 Jul 2024 13:43:19 +0800 Subject: mm: shmem: move shmem_huge_global_enabled() into shmem_allowable_huge_orders() Move shmem_huge_global_enabled() into shmem_allowable_huge_orders(), so that shmem_allowable_huge_orders() can also help to find the allowable huge orders for tmpfs. Moreover the shmem_huge_global_enabled() can become static. While we are at it, passing the vma instead of mm for shmem_huge_global_enabled() makes code cleaner. No functional changes. Link: https://lkml.kernel.org/r/8e825146bb29ee1a1c7bd64d2968ff3e19be7815.1721626645.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Reviewed-by: Ryan Roberts Acked-by: David Hildenbrand Cc: Barry Song <21cnbao@gmail.com> Cc: Hugh Dickins Cc: Lance Yang Cc: Matthew Wilcox (Oracle) Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 405ee8d3589a..1564d7d3ca61 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -111,21 +111,13 @@ extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); #ifdef CONFIG_TRANSPARENT_HUGEPAGE -extern bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index, bool shmem_huge_force, - struct mm_struct *mm, unsigned long vm_flags); unsigned long shmem_allowable_huge_orders(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, - bool global_huge); + bool shmem_huge_force); #else -static __always_inline bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index, - bool shmem_huge_force, struct mm_struct *mm, - unsigned long vm_flags) -{ - return false; -} static inline unsigned long shmem_allowable_huge_orders(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, - bool global_huge) + bool shmem_huge_force) { return 0; } -- cgit v1.2.3 From 4fd568faf6e7e21f206cd66dd4fca9a72e7d922c Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 18 Jul 2024 17:18:21 +0800 Subject: mm: kmem: remove mem_cgroup_from_obj() There is no user of mem_cgroup_from_obj(), remove it. Link: https://lkml.kernel.org/r/20240718091821.44740-1-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Shakeel Butt Acked-by: Roman Gushchin Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0e5bf25d324f..af7da7bd00af 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1717,7 +1717,6 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg) return memcg ? memcg->kmemcg_id : -1; } -struct mem_cgroup *mem_cgroup_from_obj(void *p); struct mem_cgroup *mem_cgroup_from_slab_obj(void *p); static inline void count_objcg_event(struct obj_cgroup *objcg, @@ -1780,11 +1779,6 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg) return -1; } -static inline struct mem_cgroup *mem_cgroup_from_obj(void *p) -{ - return NULL; -} - static inline struct mem_cgroup *mem_cgroup_from_slab_obj(void *p) { return NULL; -- cgit v1.2.3 From 2a28713a67fd28eedc13b32300df6b9c5a2381f5 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 24 Jul 2024 09:01:14 -0400 Subject: memory tiering: introduce folio_use_access_time() check If memory tiering mode is on and a folio is not in the top tier memory, folio's cpupid field is repurposed to store page access time. Instead of an open coded check, use a function to encapsulate the check. Link: https://lkml.kernel.org/r/20240724130115.793641-3-ziy@nvidia.com Signed-off-by: Zi Yan Reviewed-by: "Huang, Ying" Acked-by: David Hildenbrand Reviewed-by: Kefeng Wang Cc: Baolin Wang Cc: Lorenzo Stoakes Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6549d0979b28..6c2078ca3391 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1745,6 +1745,8 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) __set_bit(pid_bit, &vma->numab_state->pids_active[1]); } } + +bool folio_use_access_time(struct folio *folio); #else /* !CONFIG_NUMA_BALANCING */ static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid) { @@ -1798,6 +1800,10 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) { } +static inline bool folio_use_access_time(struct folio *folio) +{ + return false; +} #endif /* CONFIG_NUMA_BALANCING */ #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) -- cgit v1.2.3 From c4a6fce85640beb4f79e8217272d1ef79839cc17 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Wed, 24 Jul 2024 20:33:21 +0000 Subject: vmstat: kernel stack usage histogram As part of the dynamic kernel stack project, we need to know the amount of data that can be saved by reducing the default kernel stack size [1]. Provide a kernel stack usage histogram to aid in optimizing kernel stack sizes and minimizing memory waste in large-scale environments. The histogram divides stack usage into power-of-two buckets and reports the results in /proc/vmstat. This information is especially valuable in environments with millions of machines, where even small optimizations can have a significant impact. The histogram data is presented in /proc/vmstat with entries like "kstack_1k", "kstack_2k", and so on, indicating the number of threads that exited with stack usage falling within each respective bucket. Example outputs: Intel: $ grep kstack /proc/vmstat kstack_1k 3 kstack_2k 188 kstack_4k 11391 kstack_8k 243 kstack_16k 0 ARM with 64K page_size: $ grep kstack /proc/vmstat kstack_1k 1 kstack_2k 340 kstack_4k 25212 kstack_8k 1659 kstack_16k 0 kstack_32k 0 kstack_64k 0 Note: once the dynamic kernel stack is implemented it will depend on the implementation the usability of this feature: On hardware that supports faults on kernel stacks, we will have other metrics that show the total number of pages allocated for stacks. On hardware where faults are not supported, we will most likely have some optimization where only some threads are extended, and for those, these metrics will still be very useful. [1] https://lwn.net/Articles/974367 Link: https://lkml.kernel.org/r/20240730150158.832783-3-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240724203322.2765486-3-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Reviewed-by: Kent Overstreet Acked-by: Shakeel Butt Cc: Domenico Cerasuolo Cc: Li Zhijian Cc: Matthew Wilcox Cc: Nhat Pham Cc: Peter Zijlstra Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/vm_event_item.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 6fff8ed3b1fd..aae5c7c5cfb4 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -155,6 +155,30 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, VMA_LOCK_RETRY, VMA_LOCK_MISS, #endif +#ifdef CONFIG_DEBUG_STACK_USAGE + KSTACK_1K, +#if THREAD_SIZE > 1024 + KSTACK_2K, +#endif +#if THREAD_SIZE > 2048 + KSTACK_4K, +#endif +#if THREAD_SIZE > 4096 + KSTACK_8K, +#endif +#if THREAD_SIZE > 8192 + KSTACK_16K, +#endif +#if THREAD_SIZE > 16384 + KSTACK_32K, +#endif +#if THREAD_SIZE > 32768 + KSTACK_64K, +#endif +#if THREAD_SIZE > 65536 + KSTACK_REST, +#endif +#endif /* CONFIG_DEBUG_STACK_USAGE */ NR_VM_EVENT_ITEMS }; -- cgit v1.2.3 From fbe76a6557a83af5ef3819fd7b7ffd0a5d3b4e51 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Wed, 24 Jul 2024 20:33:22 +0000 Subject: task_stack: uninline stack_not_used Given that stack_not_used() is not performance critical function uninline it. Link: https://lkml.kernel.org/r/20240730150158.832783-4-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240724203322.2765486-4-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Acked-by: Shakeel Butt Cc: Domenico Cerasuolo Cc: Kent Overstreet Cc: Li Zhijian Cc: Matthew Wilcox Cc: Nhat Pham Cc: Peter Zijlstra Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/sched/task_stack.h | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index ccd72b978e1f..bf10bdb487dd 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -95,23 +95,11 @@ static inline int object_is_on_stack(const void *obj) extern void thread_stack_cache_init(void); #ifdef CONFIG_DEBUG_STACK_USAGE +unsigned long stack_not_used(struct task_struct *p); +#else static inline unsigned long stack_not_used(struct task_struct *p) { - unsigned long *n = end_of_stack(p); - - do { /* Skip over canary */ -# ifdef CONFIG_STACK_GROWSUP - n--; -# else - n++; -# endif - } while (!*n); - -# ifdef CONFIG_STACK_GROWSUP - return (unsigned long)end_of_stack(p) - (unsigned long)n; -# else - return (unsigned long)n - (unsigned long)end_of_stack(p); -# endif + return 0; } #endif extern void set_task_stack_end_magic(struct task_struct *tsk); -- cgit v1.2.3 From f77bd4b14ccfd38dfcfe67eecad517b8ec1b7f37 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 26 Jul 2024 20:31:08 +0000 Subject: mm: memcg: don't call propagate_protected_usage() needlessly Patch series "mm: memcg: page counters optimizations", v3. This patchset contains 3 independent small optimizations of page counters. This patch (of 3): Memory protection (min/low) requires a constant tracking of protected memory usage. propagate_protected_usage() is called on each page counters update and does a number of operations even in cases when the actual memory protection functionality is not supported (e.g. hugetlb cgroups or memcg swap counters). It's obviously inefficient and leads to a waste of CPU cycles. It can be addressed by calling propagate_protected_usage() only for the counters which do support memory guarantees. As of now it's only memcg->memory - the unified memory memcg counter. Link: https://lkml.kernel.org/r/20240726203110.1577216-2-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/page_counter.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 904c52f97284..0b8e993f9163 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -31,6 +31,7 @@ struct page_counter { /* Keep all the read most fields in a separete cacheline. */ CACHELINE_PADDING(_pad2_); + bool protection_support; unsigned long min; unsigned long low; unsigned long high; @@ -44,12 +45,17 @@ struct page_counter { #define PAGE_COUNTER_MAX (LONG_MAX / PAGE_SIZE) #endif +/* + * Protection is supported only for the first counter (with id 0). + */ static inline void page_counter_init(struct page_counter *counter, - struct page_counter *parent) + struct page_counter *parent, + bool protection_support) { atomic_long_set(&counter->usage, 0); counter->max = PAGE_COUNTER_MAX; counter->parent = parent; + counter->protection_support = protection_support; } static inline unsigned long page_counter_read(struct page_counter *counter) -- cgit v1.2.3 From 941ce635234162c420c9185816c59f7d5dd1ad07 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 26 Jul 2024 20:31:09 +0000 Subject: mm: page_counters: put page_counter_calculate_protection() under CONFIG_MEMCG Put page_counter_calculate_protection() under CONFIG_MEMCG. The protection functionality (min/low limits) is not supported by any other cgroup subsystem, so page_counter_calculate_protection() and related static effective_protection() can be compiled out if CONFIG_MEMCG is not enabled. Link: https://lkml.kernel.org/r/20240726203110.1577216-3-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/page_counter.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 0b8e993f9163..1b3e92c09b80 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -87,8 +87,14 @@ static inline void page_counter_reset_watermark(struct page_counter *counter) counter->watermark = page_counter_read(counter); } +#ifdef CONFIG_MEMCG void page_counter_calculate_protection(struct page_counter *root, struct page_counter *counter, bool recursive_protection); +#else +static inline void page_counter_calculate_protection(struct page_counter *root, + struct page_counter *counter, + bool recursive_protection) {} +#endif #endif /* _LINUX_PAGE_COUNTER_H */ -- cgit v1.2.3 From 57979fabff554bf4d1edeeee69251b22ca9bf55e Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 26 Jul 2024 20:31:10 +0000 Subject: mm: page_counters: initialize usage using ATOMIC_LONG_INIT() macro When a page_counter structure is initialized, there is no need to use an atomic set operation to initialize the usage counter because at this point the structure is not visible to anybody else. ATOMIC_LONG_INIT() is what should be used in such cases. Link: https://lkml.kernel.org/r/20240726203110.1577216-4-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/page_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 1b3e92c09b80..66ebf9a73158 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -52,7 +52,7 @@ static inline void page_counter_init(struct page_counter *counter, struct page_counter *parent, bool protection_support) { - atomic_long_set(&counter->usage, 0); + counter->usage = (atomic_long_t)ATOMIC_LONG_INIT(0); counter->max = PAGE_COUNTER_MAX; counter->parent = parent; counter->protection_support = protection_support; -- cgit v1.2.3 From 394290cba9664ed3ab80d0e247402102a9b7287a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 26 Jul 2024 17:07:26 +0200 Subject: mm: turn USE_SPLIT_PTE_PTLOCKS / USE_SPLIT_PTE_PTLOCKS into Kconfig options Patch series "mm: split PTE/PMD PT table Kconfig cleanups+clarifications". This series is a follow up to the fixes: "[PATCH v1 0/2] mm/hugetlb: fix hugetlb vs. core-mm PT locking" When working on the fixes, I wondered why 8xx is fine (-> never uses split PT locks) and how PT locking even works properly with PMD page table sharing (-> always requires split PMD PT locks). Let's improve the split PT lock detection, make hugetlb properly depend on it and make 8xx bail out if it would ever get enabled by accident. As an alternative to patch #3 we could extend the Kconfig SPLIT_PTE_PTLOCKS option from patch #2 -- but enforcing it closer to the code that actually implements it feels a bit nicer for documentation purposes, and there is no need to actually disable it because it should always be disabled (!SMP). Did a bunch of cross-compilations to make sure that split PTE/PMD PT locks are still getting used where we would expect them. [1] https://lkml.kernel.org/r/20240725183955.2268884-1-david@redhat.com This patch (of 3): Let's clean that up a bit and prepare for depending on CONFIG_SPLIT_PMD_PTLOCKS in other Kconfig options. More cleanups would be reasonable (like the arch-specific "depends on" for CONFIG_SPLIT_PTE_PTLOCKS), but we'll leave that for another day. Link: https://lkml.kernel.org/r/20240726150728.3159964-1-david@redhat.com Link: https://lkml.kernel.org/r/20240726150728.3159964-2-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Mike Rapoport (Microsoft) Reviewed-by: Russell King (Oracle) Reviewed-by: Qi Zheng Cc: Alexander Viro Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Christian Brauner Cc: Christophe Leroy Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Juergen Gross Cc: Michael Ellerman Cc: Muchun Song Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/mm.h | 8 ++++---- include/linux/mm_types.h | 2 +- include/linux/mm_types_task.h | 3 --- 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6c2078ca3391..b7000fa300f3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2891,7 +2891,7 @@ static inline void pagetable_free(struct ptdesc *pt) __free_pages(page, compound_order(page)); } -#if USE_SPLIT_PTE_PTLOCKS +#if defined(CONFIG_SPLIT_PTE_PTLOCKS) #if ALLOC_SPLIT_PTLOCKS void __init ptlock_cache_init(void); bool ptlock_alloc(struct ptdesc *ptdesc); @@ -2949,7 +2949,7 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) return true; } -#else /* !USE_SPLIT_PTE_PTLOCKS */ +#else /* !defined(CONFIG_SPLIT_PTE_PTLOCKS) */ /* * We use mm->page_table_lock to guard all pagetable pages of the mm. */ @@ -2964,7 +2964,7 @@ static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) static inline void ptlock_cache_init(void) {} static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} -#endif /* USE_SPLIT_PTE_PTLOCKS */ +#endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */ static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc) { @@ -3024,7 +3024,7 @@ pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd, ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \ NULL: pte_offset_kernel(pmd, address)) -#if USE_SPLIT_PMD_PTLOCKS +#if defined(CONFIG_SPLIT_PMD_PTLOCKS) static inline struct page *pmd_pgtable_page(pmd_t *pmd) { diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 485424979254..165c58b12ccc 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -947,7 +947,7 @@ struct mm_struct { #ifdef CONFIG_MMU_NOTIFIER struct mmu_notifier_subscriptions *notifier_subscriptions; #endif -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS) pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif #ifdef CONFIG_NUMA_BALANCING diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index a2f6179b672b..bff5706b76e1 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -16,9 +16,6 @@ #include #endif -#define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) -#define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \ - IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) #define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) /* -- cgit v1.2.3 From 188cac58a8bcdf82c7f63275b68f7a46871e45d6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 26 Jul 2024 17:07:27 +0200 Subject: mm/hugetlb: enforce that PMD PT sharing has split PMD PT locks Sharing page tables between processes but falling back to per-MM page table locks cannot possibly work. So, let's make sure that we do have split PMD locks by adding a new Kconfig option and letting that depend on CONFIG_SPLIT_PMD_PTLOCKS. Link: https://lkml.kernel.org/r/20240726150728.3159964-3-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Mike Rapoport (Microsoft) Cc: Alexander Viro Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Christian Brauner Cc: Christophe Leroy Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Juergen Gross Cc: Michael Ellerman Cc: Muchun Song Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Cc: Russell King Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 45bf05ad5c53..9b7bcfce6920 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -1251,7 +1251,7 @@ static inline __init void hugetlb_cma_reserve(int order) } #endif -#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING static inline bool hugetlb_pmd_shared(pte_t *pte) { return page_count(virt_to_page(pte)) > 1; @@ -1287,8 +1287,7 @@ bool __vma_private_lock(struct vm_area_struct *vma); static inline pte_t * hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) { -#if defined(CONFIG_HUGETLB_PAGE) && \ - defined(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && defined(CONFIG_LOCKDEP) +#if defined(CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING) && defined(CONFIG_LOCKDEP) struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; /* -- cgit v1.2.3 From e5a41fc77771c7c7f6b9bef85a02b38b802b7371 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 29 Jul 2024 20:38:42 +0200 Subject: mm: simplify arch_make_folio_accessible() Patch series "mm: remove arch_make_page_accessible()". Now that s390x implements arch_make_folio_accessible(), let's convert remaining users to use arch_make_folio_accessible() instead so we can remove arch_make_page_accessible(). This patch (of 3): Now that s390x implements HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE, let's turn generic arch_make_folio_accessible() into a NOP: there are no other targets that implement HAVE_ARCH_MAKE_PAGE_ACCESSIBLE but not HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE. Link: https://lkml.kernel.org/r/20240729183844.388481-1-david@redhat.com Link: https://lkml.kernel.org/r/20240729183844.388481-2-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Vishal Moola (Oracle) Reviewed-by: Claudio Imbrenda Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: David Hildenbrand Cc: Heiko Carstens Cc: Janosch Frank Cc: Sven Schnelle Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- include/linux/mm.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index b7000fa300f3..fc7bd3864bcd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2215,16 +2215,7 @@ static inline int arch_make_page_accessible(struct page *page) #ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE static inline int arch_make_folio_accessible(struct folio *folio) { - int ret; - long i, nr = folio_nr_pages(folio); - - for (i = 0; i < nr; i++) { - ret = arch_make_page_accessible(folio_page(folio, i)); - if (ret) - break; - } - - return ret; + return 0; } #endif -- cgit v1.2.3 From 3290ef3c7f2a8171fc534e02fb95a512eeae689a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 29 Jul 2024 20:38:44 +0200 Subject: s390/uv: drop arch_make_page_accessible() All code was converted to using arch_make_folio_accessible(), let's drop arch_make_page_accessible(). Link: https://lkml.kernel.org/r/20240729183844.388481-4-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Vishal Moola (Oracle) Reviewed-by: Claudio Imbrenda Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Heiko Carstens Cc: Janosch Frank Cc: Sven Schnelle Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index fc7bd3864bcd..153a4662a0cd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2205,13 +2205,6 @@ static inline bool folio_likely_mapped_shared(struct folio *folio) return atomic_read(&folio->_mapcount) > 0; } -#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE -static inline int arch_make_page_accessible(struct page *page) -{ - return 0; -} -#endif - #ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE static inline int arch_make_folio_accessible(struct folio *folio) { -- cgit v1.2.3 From c6f53ed8f213a66ae8bc40aa9112c32412c35a21 Mon Sep 17 00:00:00 2001 From: David Finkel Date: Mon, 29 Jul 2024 10:37:42 -0400 Subject: mm, memcg: cg2 memory{.swap,}.peak write handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm, memcg: cg2 memory{.swap,}.peak write handlers", v7. This patch (of 2): Other mechanisms for querying the peak memory usage of either a process or v1 memory cgroup allow for resetting the high watermark. Restore parity with those mechanisms, but with a less racy API. For example: - Any write to memory.max_usage_in_bytes in a cgroup v1 mount resets the high watermark. - writing "5" to the clear_refs pseudo-file in a processes's proc directory resets the peak RSS. This change is an evolution of a previous patch, which mostly copied the cgroup v1 behavior, however, there were concerns about races/ownership issues with a global reset, so instead this change makes the reset filedescriptor-local. Writing any non-empty string to the memory.peak and memory.swap.peak pseudo-files reset the high watermark to the current usage for subsequent reads through that same FD. Notably, following Johannes's suggestion, this implementation moves the O(FDs that have written) behavior onto the FD write(2) path. Instead, on the page-allocation path, we simply add one additional watermark to conditionally bump per-hierarchy level in the page-counter. Additionally, this takes Longman's suggestion of nesting the page-charging-path checks for the two watermarks to reduce the number of common-case comparisons. This behavior is particularly useful for work scheduling systems that need to track memory usage of worker processes/cgroups per-work-item. Since memory can't be squeezed like CPU can (the OOM-killer has opinions), these systems need to track the peak memory usage to compute system/container fullness when binpacking workitems. Most notably, Vimeo's use-case involves a system that's doing global binpacking across many Kubernetes pods/containers, and while we can use PSI for some local decisions about overload, we strive to avoid packing workloads too tightly in the first place. To facilitate this, we track the peak memory usage. However, since we run with long-lived workers (to amortize startup costs) we need a way to track the high watermark while a work-item is executing. Polling runs the risk of missing short spikes that last for timescales below the polling interval, and peak memory tracking at the cgroup level is otherwise perfect for this use-case. As this data is used to ensure that binpacked work ends up with sufficient headroom, this use-case mostly avoids the inaccuracies surrounding reclaimable memory. Link: https://lkml.kernel.org/r/20240730231304.761942-1-davidf@vimeo.com Link: https://lkml.kernel.org/r/20240729143743.34236-1-davidf@vimeo.com Link: https://lkml.kernel.org/r/20240729143743.34236-2-davidf@vimeo.com Signed-off-by: David Finkel Suggested-by: Johannes Weiner Suggested-by: Waiman Long Acked-by: Johannes Weiner Reviewed-by: Michal Koutný Acked-by: Tejun Heo Reviewed-by: Roman Gushchin Cc: Jonathan Corbet Cc: Michal Hocko Cc: Muchun Song Cc: Shakeel Butt Cc: Shuah Khan Cc: Zefan Li Signed-off-by: Andrew Morton --- include/linux/cgroup-defs.h | 5 +++++ include/linux/cgroup.h | 3 +++ include/linux/memcontrol.h | 5 +++++ include/linux/page_counter.h | 11 ++++++++++- 4 files changed, 23 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ae04035b6cbe..7fc2d0195f56 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -775,6 +775,11 @@ struct cgroup_subsys { extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; +struct cgroup_of_peak { + unsigned long value; + struct list_head list; +}; + /** * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups * @tsk: target task diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c60ba0ab1462..3e0563753cc3 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -854,4 +855,6 @@ static inline void cgroup_bpf_put(struct cgroup *cgrp) {} struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id); +struct cgroup_of_peak *of_peak(struct kernfs_open_file *of); + #endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index af7da7bd00af..1b79760af685 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -193,6 +193,11 @@ struct mem_cgroup { struct page_counter memsw; /* v1 only */ }; + /* registered local peak watchers */ + struct list_head memory_peaks; + struct list_head swap_peaks; + spinlock_t peaks_lock; + /* Range enforcement for interrupt charges */ struct work_struct high_work; diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 66ebf9a73158..79dbd8bc35a7 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -26,6 +26,8 @@ struct page_counter { atomic_long_t children_low_usage; unsigned long watermark; + /* Latest cg2 reset watermark */ + unsigned long local_watermark; unsigned long failcnt; /* Keep all the read most fields in a separete cacheline. */ @@ -84,7 +86,14 @@ int page_counter_memparse(const char *buf, const char *max, static inline void page_counter_reset_watermark(struct page_counter *counter) { - counter->watermark = page_counter_read(counter); + unsigned long usage = page_counter_read(counter); + + /* + * Update local_watermark first, so it's always <= watermark + * (modulo CPU/compiler re-ordering) + */ + counter->local_watermark = usage; + counter->watermark = usage; } #ifdef CONFIG_MEMCG -- cgit v1.2.3 From a17c7d8fd2b097a422fc892b660e879e19c20214 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 29 Jul 2024 12:50:35 +0100 Subject: userfaultfd: move core VMA manipulation logic to mm/userfaultfd.c Patch series "Make core VMA operations internal and testable", v4. There are a number of "core" VMA manipulation functions implemented in mm/mmap.c, notably those concerning VMA merging, splitting, modifying, expanding and shrinking, which logically don't belong there. More importantly this functionality represents an internal implementation detail of memory management and should not be exposed outside of mm/ itself. This patch series isolates core VMA manipulation functionality into its own file, mm/vma.c, and provides an API to the rest of the mm code in mm/vma.h. Importantly, it also carefully implements mm/vma_internal.h, which specifies which headers need to be imported by vma.c, leading to the very useful property that vma.c depends only on mm/vma.h and mm/vma_internal.h. This means we can then re-implement vma_internal.h in userland, adding shims for kernel mechanisms as required, allowing us to unit test internal VMA functionality. This testing is useful as opposed to an e.g. kunit implementation as this way we can avoid all external kernel side-effects while testing, run tests VERY quickly, and iterate on and debug problems quickly. Excitingly this opens the door to, in the future, recreating precise problems observed in production in userland and very quickly debugging problems that might otherwise be very difficult to reproduce. This patch series takes advantage of existing shim logic and full userland maple tree support contained in tools/testing/radix-tree/ and tools/include/linux/, separating out shared components of the radix tree implementation to provide this testing. Kernel functionality is stubbed and shimmed as needed in tools/testing/vma/ which contains a fully functional userland vma_internal.h file and which imports mm/vma.c and mm/vma.h to be directly tested from userland. A simple, skeleton testing implementation is provided in tools/testing/vma/vma.c as a proof-of-concept, asserting that simple VMA merge, modify (testing split), expand and shrink functionality work correctly. This patch (of 4): This patch forms part of a patch series intending to separate out VMA logic and render it testable from userspace, which requires that core manipulation functions be exposed in an mm/-internal header file. In order to do this, we must abstract APIs we wish to test, in this instance functions which ultimately invoke vma_modify(). This patch therefore moves all logic which ultimately invokes vma_modify() to mm/userfaultfd.c, trying to transfer code at a functional granularity where possible. [lorenzo.stoakes@oracle.com: fix user-after-free in userfaultfd_clear_vma()] Link: https://lkml.kernel.org/r/3c947ddc-b804-49b7-8fe9-3ea3ca13def5@lucifer.local Link: https://lkml.kernel.org/r/cover.1722251717.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/50c3ed995fd81c45876c86304c8a00bf3e396cfd.1722251717.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Reviewed-by: Liam R. Howlett Cc: Alexander Viro Cc: Brendan Higgins Cc: Christian Brauner Cc: David Gow Cc: Eric W. Biederman Cc: Jan Kara Cc: Kees Cook Cc: Matthew Wilcox (Oracle) Cc: Rae Moar Cc: SeongJae Park Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Pengfei Xu Signed-off-by: Andrew Morton --- include/linux/userfaultfd_k.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index a12bcf042551..9fc6ce15c499 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -267,6 +267,25 @@ extern void userfaultfd_unmap_complete(struct mm_struct *mm, extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); extern bool userfaultfd_wp_async(struct vm_area_struct *vma); +void userfaultfd_reset_ctx(struct vm_area_struct *vma); + +struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end); + +int userfaultfd_register_range(struct userfaultfd_ctx *ctx, + struct vm_area_struct *vma, + unsigned long vm_flags, + unsigned long start, unsigned long end, + bool wp_async); + +void userfaultfd_release_new(struct userfaultfd_ctx *ctx); + +void userfaultfd_release_all(struct mm_struct *mm, + struct userfaultfd_ctx *ctx); + #else /* CONFIG_USERFAULTFD */ /* mm helpers */ -- cgit v1.2.3 From fa04c08f3ce649b47781c7663e92398197f5b551 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 29 Jul 2024 12:50:36 +0100 Subject: mm: move vma_modify() and helpers to internal header These are core VMA manipulation functions which invoke VMA splitting and merging and should not be directly accessed from outside of mm/. Link: https://lkml.kernel.org/r/5efde0c6342a8860d5ffc90b415f3989fd8ed0b2.1722251717.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Reviewed-by: Liam R. Howlett Cc: Alexander Viro Cc: Brendan Higgins Cc: Christian Brauner Cc: David Gow Cc: Eric W. Biederman Cc: Jan Kara Cc: Kees Cook Cc: Matthew Wilcox (Oracle) Cc: Rae Moar Cc: SeongJae Park Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Pengfei Xu Signed-off-by: Andrew Morton --- include/linux/mm.h | 60 ------------------------------------------------------ 1 file changed, 60 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 153a4662a0cd..c4054e68dc09 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3279,66 +3279,6 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff, bool *need_rmap_locks); extern void exit_mmap(struct mm_struct *); -struct vm_area_struct *vma_modify(struct vma_iterator *vmi, - struct vm_area_struct *prev, - struct vm_area_struct *vma, - unsigned long start, unsigned long end, - unsigned long vm_flags, - struct mempolicy *policy, - struct vm_userfaultfd_ctx uffd_ctx, - struct anon_vma_name *anon_name); - -/* We are about to modify the VMA's flags. */ -static inline struct vm_area_struct -*vma_modify_flags(struct vma_iterator *vmi, - struct vm_area_struct *prev, - struct vm_area_struct *vma, - unsigned long start, unsigned long end, - unsigned long new_flags) -{ - return vma_modify(vmi, prev, vma, start, end, new_flags, - vma_policy(vma), vma->vm_userfaultfd_ctx, - anon_vma_name(vma)); -} - -/* We are about to modify the VMA's flags and/or anon_name. */ -static inline struct vm_area_struct -*vma_modify_flags_name(struct vma_iterator *vmi, - struct vm_area_struct *prev, - struct vm_area_struct *vma, - unsigned long start, - unsigned long end, - unsigned long new_flags, - struct anon_vma_name *new_name) -{ - return vma_modify(vmi, prev, vma, start, end, new_flags, - vma_policy(vma), vma->vm_userfaultfd_ctx, new_name); -} - -/* We are about to modify the VMA's memory policy. */ -static inline struct vm_area_struct -*vma_modify_policy(struct vma_iterator *vmi, - struct vm_area_struct *prev, - struct vm_area_struct *vma, - unsigned long start, unsigned long end, - struct mempolicy *new_pol) -{ - return vma_modify(vmi, prev, vma, start, end, vma->vm_flags, - new_pol, vma->vm_userfaultfd_ctx, anon_vma_name(vma)); -} - -/* We are about to modify the VMA's flags and/or uffd context. */ -static inline struct vm_area_struct -*vma_modify_flags_uffd(struct vma_iterator *vmi, - struct vm_area_struct *prev, - struct vm_area_struct *vma, - unsigned long start, unsigned long end, - unsigned long new_flags, - struct vm_userfaultfd_ctx new_ctx) -{ - return vma_modify(vmi, prev, vma, start, end, new_flags, - vma_policy(vma), new_ctx, anon_vma_name(vma)); -} static inline int check_data_rlimit(unsigned long rlim, unsigned long new, -- cgit v1.2.3 From d61f0d59683d9c899211c300254d4140c482a6c0 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 29 Jul 2024 12:50:37 +0100 Subject: mm: move vma_shrink(), vma_expand() to internal header The vma_shrink() and vma_expand() functions are internal VMA manipulation functions which we ought to abstract for use outside of memory management code. To achieve this, we replace shift_arg_pages() in fs/exec.c with an invocation of a new relocate_vma_down() function implemented in mm/mmap.c, which enables us to also move move_page_tables() and vma_iter_prev_range() to internal.h. The purpose of doing this is to isolate key VMA manipulation functions in order that we can both abstract them and later render them easily testable. Link: https://lkml.kernel.org/r/3cfcd9ec433e032a85f636fdc0d7d98fafbd19c5.1722251717.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Reviewed-by: Liam R. Howlett Cc: Alexander Viro Cc: Brendan Higgins Cc: Christian Brauner Cc: David Gow Cc: Eric W. Biederman Cc: Jan Kara Cc: Kees Cook Cc: Matthew Wilcox (Oracle) Cc: Rae Moar Cc: SeongJae Park Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Pengfei Xu Signed-off-by: Andrew Morton --- include/linux/mm.h | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c4054e68dc09..1c0d88446eb8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1005,12 +1005,6 @@ static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi) return mas_prev(&vmi->mas, 0); } -static inline -struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi) -{ - return mas_prev_range(&vmi->mas, 0); -} - static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) { return vmi->mas.index; @@ -2520,11 +2514,6 @@ int set_page_dirty_lock(struct page *page); int get_cmdline(struct task_struct *task, char *buffer, int buflen); -extern unsigned long move_page_tables(struct vm_area_struct *vma, - unsigned long old_addr, struct vm_area_struct *new_vma, - unsigned long new_addr, unsigned long len, - bool need_rmap_locks, bool for_stack); - /* * Flags used by change_protection(). For now we make it a bitmap so * that we can pass in multiple flags just like parameters. However @@ -3267,11 +3256,6 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); -extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, - unsigned long start, unsigned long end, pgoff_t pgoff, - struct vm_area_struct *next); -extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, - unsigned long start, unsigned long end, pgoff_t pgoff); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void unlink_file_vma(struct vm_area_struct *); @@ -3279,6 +3263,7 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff, bool *need_rmap_locks); extern void exit_mmap(struct mm_struct *); +int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift); static inline int check_data_rlimit(unsigned long rlim, unsigned long new, -- cgit v1.2.3 From 49b1b8d6f6831026cb105b0eafa18f13db612d86 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 29 Jul 2024 12:50:38 +0100 Subject: mm: move internal core VMA manipulation functions to own file This patch introduces vma.c and moves internal core VMA manipulation functions to this file from mmap.c. This allows us to isolate VMA functionality in a single place such that we can create userspace testing code that invokes this functionality in an environment where we can implement simple unit tests of core functionality. This patch ensures that core VMA functionality is explicitly marked as such by its presence in mm/vma.h. It also places the header includes required by vma.c in vma_internal.h, which is simply imported by vma.c. This makes the VMA functionality testable, as userland testing code can simply stub out functionality as required. Link: https://lkml.kernel.org/r/c77a6aafb4c42aaadb8e7271a853658cbdca2e22.1722251717.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Reviewed-by: Liam R. Howlett Cc: Alexander Viro Cc: Brendan Higgins Cc: Christian Brauner Cc: David Gow Cc: Eric W. Biederman Cc: Jan Kara Cc: Kees Cook Cc: Matthew Wilcox (Oracle) Cc: Rae Moar Cc: SeongJae Park Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Pengfei Xu Signed-off-by: Andrew Morton --- include/linux/mm.h | 35 ----------------------------------- 1 file changed, 35 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1c0d88446eb8..bd219ac9c026 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1005,21 +1005,6 @@ static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi) return mas_prev(&vmi->mas, 0); } -static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) -{ - return vmi->mas.index; -} - -static inline unsigned long vma_iter_end(struct vma_iterator *vmi) -{ - return vmi->mas.last + 1; -} -static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi, - unsigned long count) -{ - return mas_expected_entries(&vmi->mas, count); -} - static inline int vma_iter_clear_gfp(struct vma_iterator *vmi, unsigned long start, unsigned long end, gfp_t gfp) { @@ -2534,21 +2519,6 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen); #define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \ MM_CP_UFFD_WP_RESOLVE) -bool vma_needs_dirty_tracking(struct vm_area_struct *vma); -bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); -static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma) -{ - /* - * We want to check manually if we can change individual PTEs writable - * if we can't do that automatically for all PTEs in a mapping. For - * private mappings, that's always the case when we have write - * permissions as we properly have to handle COW. - */ - if (vma->vm_flags & VM_SHARED) - return vma_wants_writenotify(vma, vma->vm_page_prot); - return !!(vma->vm_flags & VM_WRITE); - -} bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, pte_t pte); extern long change_protection(struct mmu_gather *tlb, @@ -3256,12 +3226,7 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); -extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); -extern void unlink_file_vma(struct vm_area_struct *); -extern struct vm_area_struct *copy_vma(struct vm_area_struct **, - unsigned long addr, unsigned long len, pgoff_t pgoff, - bool *need_rmap_locks); extern void exit_mmap(struct mm_struct *); int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift); -- cgit v1.2.3 From 29943248af0a8ac3edb808564baa417b40e35521 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 29 Jul 2024 14:47:17 +0530 Subject: mm: improve code consistency with zonelist_* helper functions Replace direct access to zoneref->zone, zoneref->zone_idx, or zone_to_nid(zoneref->zone) with the corresponding zonelist_* helper functions for consistency. No functional change. Link: https://lkml.kernel.org/r/20240729091717.464-1-shivankg@amd.com Co-developed-by: Shivank Garg Signed-off-by: Shivank Garg Signed-off-by: Wei Yang Acked-by: David Hildenbrand Cc: Mike Rapoport (IBM) Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 4 ++-- include/trace/events/oom.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1dc6248feb83..dbb69f96a152 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1688,7 +1688,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, zone = zonelist_zone(z)) #define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \ - for (zone = z->zone; \ + for (zone = zonelist_zone(z); \ zone; \ z = next_zones_zonelist(++z, highidx, nodemask), \ zone = zonelist_zone(z)) @@ -1724,7 +1724,7 @@ static inline bool movable_only_nodes(nodemask_t *nodes) nid = first_node(*nodes); zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK]; z = first_zones_zonelist(zonelist, ZONE_NORMAL, nodes); - return (!z->zone) ? true : false; + return (!zonelist_zone(z)) ? true : false; } diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h index a42be4c8563b..9f0a5d1482c4 100644 --- a/include/trace/events/oom.h +++ b/include/trace/events/oom.h @@ -55,8 +55,8 @@ TRACE_EVENT(reclaim_retry_zone, ), TP_fast_assign( - __entry->node = zone_to_nid(zoneref->zone); - __entry->zone_idx = zoneref->zone_idx; + __entry->node = zonelist_node_idx(zoneref); + __entry->zone_idx = zonelist_zone_idx(zoneref); __entry->order = order; __entry->reclaimable = reclaimable; __entry->available = available; -- cgit v1.2.3 From 9f101bef408a3f70c44b6e4de44d3d4e2655ed10 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Tue, 30 Jul 2024 19:13:39 +1200 Subject: mm: swap: add nr argument in swapcache_prepare and swapcache_clear to support large folios Right now, swapcache_prepare() and swapcache_clear() supports one entry only, to support large folios, we need to handle multiple swap entries. To optimize stack usage, we iterate twice in __swap_duplicate(): the first time to verify that all entries are valid, and the second time to apply the modifications to the entries. Currently, we're using nr=1 for the existing users. [v-songbaohua@oppo.com: clarify swap_count_continued and improve readability for __swap_duplicate] Link: https://lkml.kernel.org/r/20240802071817.47081-1-21cnbao@gmail.com Link: https://lkml.kernel.org/r/20240730071339.107447-2-21cnbao@gmail.com Signed-off-by: Barry Song Reviewed-by: Baolin Wang Acked-by: David Hildenbrand Tested-by: Baolin Wang Cc: Chris Li Cc: Gao Xiang Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kairui Song Cc: Kalesh Singh Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Cc: Nhat Pham Cc: Ryan Roberts Cc: Sergey Senozhatsky Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Yang Shi Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/swap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index ba7ea95d1c57..5b920fa2315b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -480,7 +480,7 @@ extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); -extern int swapcache_prepare(swp_entry_t); +extern int swapcache_prepare(swp_entry_t entry, int nr); extern void swap_free_nr(swp_entry_t entry, int nr_pages); extern void swapcache_free_entries(swp_entry_t *entries, int n); extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); @@ -554,7 +554,7 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } -static inline int swapcache_prepare(swp_entry_t swp) +static inline int swapcache_prepare(swp_entry_t swp, int nr) { return 0; } -- cgit v1.2.3 From 94ccd21e9a5f41585bd16cc84dab2afa6cc21149 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 31 Jul 2024 16:20:00 +0200 Subject: mm/hugetlb: remove hugetlb_follow_page_mask() leftover We removed hugetlb_follow_page_mask() in commit 9cb28da54643 ("mm/gup: handle hugetlb in the generic follow_page_mask code") but forgot to cleanup some leftovers. While at it, simplify the hugetlb comment, it's overly detailed and rather confusing. Stating that we may end up in there during coredumping is sufficient to explain the PF_DUMPCORE usage. Link: https://lkml.kernel.org/r/20240731142000.625044-1-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Peter Xu Cc: Muchun Song Cc: Alexander Viro Cc: Christian Brauner Cc: Jan Kara Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 9b7bcfce6920..3100a52ceb73 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -127,9 +127,6 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, unsigned long len); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *, struct vm_area_struct *); -struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, - unsigned long address, unsigned int flags, - unsigned int *page_mask); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *, zap_flags_t); -- cgit v1.2.3 From 17d5f38b33b66210c5a46af639c47fddfe1c5b4d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 31 Jul 2024 18:07:58 +0200 Subject: mm: clarify folio_likely_mapped_shared() documentation for KSM folios For KSM folios, the function actually does what it is supposed to do: even having multiple mappings inside the same MM is considered "sharing", as there is no real relationship between these KSM page mappings -- in contrast to mapping the same file range twice and having the same pagecache page mapped twice. Link: https://lkml.kernel.org/r/20240731160758.808925-1-david@redhat.com Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/mm.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index bd219ac9c026..2f6c08b53e4f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2132,14 +2132,19 @@ static inline size_t folio_size(const struct folio *folio) * MM ("mapped shared"), or if the folio is only mapped into a single MM * ("mapped exclusively"). * + * For KSM folios, this function also returns "mapped shared" when a folio is + * mapped multiple times into the same MM, because the individual page mappings + * are independent. + * * As precise information is not easily available for all folios, this function * estimates the number of MMs ("sharers") that are currently mapping a folio * using the number of times the first page of the folio is currently mapped * into page tables. * - * For small anonymous folios (except KSM folios) and anonymous hugetlb folios, - * the return value will be exactly correct, because they can only be mapped - * at most once into an MM, and they cannot be partially mapped. + * For small anonymous folios and anonymous hugetlb folios, the return + * value will be exactly correct: non-KSM folios can only be mapped at most once + * into an MM, and they cannot be partially mapped. KSM folios are + * considered shared even if mapped multiple times into the same MM. * * For other folios, the result can be fuzzy: * #. For partially-mappable large folios (THP), the return value can wrongly @@ -2148,9 +2153,6 @@ static inline size_t folio_size(const struct folio *folio) * #. For pagecache folios (including hugetlb), the return value can wrongly * indicate "mapped shared" (false positive) when two VMAs in the same MM * cover the same file range. - * #. For (small) KSM folios, the return value can wrongly indicate "mapped - * shared" (false positive), when the folio is mapped multiple times into - * the same MM. * * Further, this function only considers current page table mappings that * are tracked using the folio mapcount(s). -- cgit v1.2.3 From 03790c51a475c46647079e67e2460a149938bfd6 Mon Sep 17 00:00:00 2001 From: Kaiyang Zhao Date: Thu, 1 Aug 2024 23:25:47 +0000 Subject: mm: create promo_wmark_pages and clean up open-coded sites Patch series "mm: print the promo watermark in zoneinfo", v2. This patch (of 2): Define promo_wmark_pages and convert current call sites of wmark_pages with fixed WMARK_PROMO to using it instead. Link: https://lkml.kernel.org/r/20240801232548.36604-1-kaiyang2@cs.cmu.edu Link: https://lkml.kernel.org/r/20240801232548.36604-2-kaiyang2@cs.cmu.edu Signed-off-by: Kaiyang Zhao Cc: Johannes Weiner Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index dbb69f96a152..9f3589f7f05c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -669,6 +669,7 @@ enum zone_watermarks { #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) +#define promo_wmark_pages(z) (z->_watermark[WMARK_PROMO] + z->watermark_boost) #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) /* -- cgit v1.2.3 From 620943d7ee69070df8844235d58843af48ac70e2 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 1 Aug 2024 16:50:05 -0700 Subject: include/linux/mmzone.h: clean up watermark accessors - we have a helper wmark_pages(). Teach min_wmark_pages(), low_wmark_pages(), high_wmark_pages() and promo_wmark_pages() to use it instead of open-coding its implementation. - there's no reason to implement all these things as macros. Redo them in C. Acked-by: Johannes Weiner Cc: Kaiyang Zhao Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9f3589f7f05c..17506e4a2835 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -666,12 +666,6 @@ enum zone_watermarks { #define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1)) #define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP) -#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) -#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) -#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) -#define promo_wmark_pages(z) (z->_watermark[WMARK_PROMO] + z->watermark_boost) -#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) - /* * Flags used in pcp->flags field. * @@ -1017,6 +1011,32 @@ enum zone_flags { ZONE_BELOW_HIGH, /* zone is below high watermark. */ }; +static inline unsigned long wmark_pages(const struct zone *z, + enum zone_watermarks w) +{ + return z->_watermark[w] + z->watermark_boost; +} + +static inline unsigned long min_wmark_pages(const struct zone *z) +{ + return wmark_pages(z, WMARK_MIN); +} + +static inline unsigned long low_wmark_pages(const struct zone *z) +{ + return wmark_pages(z, WMARK_LOW); +} + +static inline unsigned long high_wmark_pages(const struct zone *z) +{ + return wmark_pages(z, WMARK_HIGH); +} + +static inline unsigned long promo_wmark_pages(const struct zone *z) +{ + return wmark_pages(z, WMARK_PROMO); +} + static inline unsigned long zone_managed_pages(struct zone *zone) { return (unsigned long)atomic_long_read(&zone->managed_pages); -- cgit v1.2.3 From aa39ca6940f1a0540f2984051b3089972f42959b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 2 Aug 2024 17:55:15 +0200 Subject: mm/pagewalk: introduce folio_walk_start() + folio_walk_end() We want to get rid of follow_page(), and have a more reasonable way to just lookup a folio mapped at a certain address, perform some checks while still under PTL, and then only conditionally grab a folio reference if really required. Further, we might want to get rid of some walk_page_range*() users that really only want to temporarily lookup a single folio at a single address. So let's add a new page table walker that does exactly that, similarly to GUP also being able to walk hugetlb VMAs. Add folio_walk_end() as a macro for now: the compiler is not easy to please with the pte_unmap()->kunmap_local(). Note that one difference between follow_page() and get_user_pages(1) is that follow_page() will not trigger faults to get something mapped. So folio_walk is at least currently not a replacement for get_user_pages(1), but could likely be extended/reused to achieve something similar in the future. Link: https://lkml.kernel.org/r/20240802155524.517137-3-david@redhat.com Signed-off-by: David Hildenbrand Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Claudio Imbrenda Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Janosch Frank Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Ryan Roberts Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/pagewalk.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'include') diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index 27cd1e59ccf7..f5eb5a32aeed 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -130,4 +130,62 @@ int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, pgoff_t nr, const struct mm_walk_ops *ops, void *private); +typedef int __bitwise folio_walk_flags_t; + +/* + * Walk migration entries as well. Careful: a large folio might get split + * concurrently. + */ +#define FW_MIGRATION ((__force folio_walk_flags_t)BIT(0)) + +/* Walk shared zeropages (small + huge) as well. */ +#define FW_ZEROPAGE ((__force folio_walk_flags_t)BIT(1)) + +enum folio_walk_level { + FW_LEVEL_PTE, + FW_LEVEL_PMD, + FW_LEVEL_PUD, +}; + +/** + * struct folio_walk - folio_walk_start() / folio_walk_end() data + * @page: exact folio page referenced (if applicable) + * @level: page table level identifying the entry type + * @pte: pointer to the page table entry (FW_LEVEL_PTE). + * @pmd: pointer to the page table entry (FW_LEVEL_PMD). + * @pud: pointer to the page table entry (FW_LEVEL_PUD). + * @ptl: pointer to the page table lock. + * + * (see folio_walk_start() documentation for more details) + */ +struct folio_walk { + /* public */ + struct page *page; + enum folio_walk_level level; + union { + pte_t *ptep; + pud_t *pudp; + pmd_t *pmdp; + }; + union { + pte_t pte; + pud_t pud; + pmd_t pmd; + }; + /* private */ + struct vm_area_struct *vma; + spinlock_t *ptl; +}; + +struct folio *folio_walk_start(struct folio_walk *fw, + struct vm_area_struct *vma, unsigned long addr, + folio_walk_flags_t flags); + +#define folio_walk_end(__fw, __vma) do { \ + spin_unlock((__fw)->ptl); \ + if (likely((__fw)->level == FW_LEVEL_PTE)) \ + pte_unmap((__fw)->ptep); \ + vma_pgtable_walk_end(__vma); \ +} while (0) + #endif /* _LINUX_PAGEWALK_H */ -- cgit v1.2.3 From 8710f6ed34e7bcf98971d65acfb5eaed5fc469b0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 2 Aug 2024 17:55:20 +0200 Subject: mm/huge_memory: convert split_huge_pages_pid() from follow_page() to folio_walk Let's remove yet another follow_page() user. Note that we have to do the split without holding the PTL, after folio_walk_end(). We don't care about losing the secretmem check in follow_page(). [david@redhat.com: teach can_split_folio() that we are not holding an additional reference] Link: https://lkml.kernel.org/r/c75d1c6c-8ea6-424f-853c-1ccda6c77ba2@redhat.com Link: https://lkml.kernel.org/r/20240802155524.517137-8-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Claudio Imbrenda Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Janosch Frank Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Ryan Roberts Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e25d9ebfdf89..ce44caa40eed 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -314,7 +314,7 @@ unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long add unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags); -bool can_split_folio(struct folio *folio, int *pextra_pins); +bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins); int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order); static inline int split_huge_page(struct page *page) @@ -470,7 +470,7 @@ thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, } static inline bool -can_split_folio(struct folio *folio, int *pextra_pins) +can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins) { return false; } -- cgit v1.2.3 From 7290840de65e04c1fc59dab692468fc9600c6038 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 2 Aug 2024 17:55:23 +0200 Subject: mm: remove follow_page() All users are gone, let's remove it and any leftovers in comments. We'll leave any FOLL/follow_page_() naming cleanups as future work. Link: https://lkml.kernel.org/r/20240802155524.517137-11-david@redhat.com Signed-off-by: David Hildenbrand Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Claudio Imbrenda Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Janosch Frank Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Ryan Roberts Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2f6c08b53e4f..ee8cea73d415 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3527,9 +3527,6 @@ static inline vm_fault_t vmf_fs_error(int err) return VM_FAULT_SIGBUS; } -struct page *follow_page(struct vm_area_struct *vma, unsigned long address, - unsigned int foll_flags); - static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) { if (vm_fault & VM_FAULT_OOM) -- cgit v1.2.3 From e31c38e037621c445bb4393fd77e0a76e6e0899a Mon Sep 17 00:00:00 2001 From: Nhat Pham Date: Mon, 5 Aug 2024 16:22:42 -0700 Subject: zswap: implement a second chance algorithm for dynamic zswap shrinker Patch series "improving dynamic zswap shrinker protection scheme", v3. When experimenting with the memory-pressure based (i.e "dynamic") zswap shrinker in production, we observed a sharp increase in the number of swapins, which led to performance regression. We were able to trace this regression to the following problems with the shrinker's warm pages protection scheme: 1. The protection decays way too rapidly, and the decaying is coupled with zswap stores, leading to anomalous patterns, in which a small batch of zswap stores effectively erase all the protection in place for the warmer pages in the zswap LRU. This observation has also been corroborated upstream by Takero Funaki (in [1]). 2. We inaccurately track the number of swapped in pages, missing the non-pivot pages that are part of the readahead window, while counting the pages that are found in the zswap pool. To alleviate these two issues, this patch series improve the dynamic zswap shrinker in the following manner: 1. Replace the protection size tracking scheme with a second chance algorithm. This new scheme removes the need for haphazard stats decaying, and automatically adjusts the pace of pages aging with memory pressure, and writeback rate with pool activities: slowing down when the pool is dominated with zswpouts, and speeding up when the pool is dominated with stale entries. 2. Fix the tracking of the number of swapins to take into account non-pivot pages in the readahead window. With these two changes in place, in a kernel-building benchmark without any cold data added, the number of swapins is reduced by 64.12%. This translate to a 10.32% reduction in build time. We also observe a 3% reduction in kernel CPU time. In another benchmark, with cold data added (to gauge the new algorithm's ability to offload cold data), the new second chance scheme outperforms the old protection scheme by around 0.7%, and actually written back around 21% more pages to backing swap device. So the new scheme is just as good, if not even better than the old scheme on this front as well. [1]: https://lore.kernel.org/linux-mm/CAPpodddcGsK=0Xczfuk8usgZ47xeyf4ZjiofdT+ujiyz6V2pFQ@mail.gmail.com/ This patch (of 2): Current zswap shrinker's heuristics to prevent overshrinking is brittle and inaccurate, specifically in the way we decay the protection size (i.e making pages in the zswap LRU eligible for reclaim). We currently decay protection aggressively in zswap_lru_add() calls. This leads to the following unfortunate effect: when a new batch of pages enter zswap, the protection size rapidly decays to below 25% of the zswap LRU size, which is way too low. We have observed this effect in production, when experimenting with the zswap shrinker: the rate of shrinking shoots up massively right after a new batch of zswap stores. This is somewhat the opposite of what we want originally - when new pages enter zswap, we want to protect both these new pages AND the pages that are already protected in the zswap LRU. Replace existing heuristics with a second chance algorithm 1. When a new zswap entry is stored in the zswap pool, its referenced bit is set. 2. When the zswap shrinker encounters a zswap entry with the referenced bit set, give it a second chance - only flips the referenced bit and rotate it in the LRU. 3. If the shrinker encounters the entry again, this time with its referenced bit unset, then it can reclaim the entry. In this manner, the aging of the pages in the zswap LRUs are decoupled from zswap stores, and picks up the pace with increasing memory pressure (which is what we want). The second chance scheme allows us to modulate the writeback rate based on recent pool activities. Entries that recently entered the pool will be protected, so if the pool is dominated by such entries the writeback rate will reduce proportionally, protecting the workload's workingset.On the other hand, stale entries will be written back quickly, which increases the effective writeback rate. The referenced bit is added at the hole after the `length` field of struct zswap_entry, so there is no extra space overhead for this algorithm. We will still maintain the count of swapins, which is consumed and subtracted from the lru size in zswap_shrinker_count(), to further penalize past overshrinking that led to disk swapins. The idea is that had we considered this many more pages in the LRU active/protected, they would not have been written back and we would not have had to swapped them in. To test this new heuristics, I built the kernel under a cgroup with memory.max set to 2G, on a host with 36 cores: With the old shrinker: real: 263.89s user: 4318.11s sys: 673.29s swapins: 227300.5 With the second chance algorithm: real: 244.85s user: 4327.22s sys: 664.39s swapins: 94663 (average over 5 runs) We observe an 1.3% reduction in kernel CPU usage, and around 7.2% reduction in real time. Note that the number of swapped in pages dropped by 58%. [nphamcs@gmail.com: fix a small mistake in the referenced bit documentation] Link: https://lkml.kernel.org/r/20240806003403.3142387-1-nphamcs@gmail.com Link: https://lkml.kernel.org/r/20240805232243.2896283-1-nphamcs@gmail.com Link: https://lkml.kernel.org/r/20240805232243.2896283-2-nphamcs@gmail.com Signed-off-by: Nhat Pham Suggested-by: Johannes Weiner Acked-by: Yosry Ahmed Cc: Chengming Zhou Cc: Shakeel Butt Cc: Takero Funaki Signed-off-by: Andrew Morton --- include/linux/zswap.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/zswap.h b/include/linux/zswap.h index 6cecb4a4f68b..9cd1beef0654 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -13,17 +13,15 @@ extern atomic_t zswap_stored_pages; struct zswap_lruvec_state { /* - * Number of pages in zswap that should be protected from the shrinker. - * This number is an estimate of the following counts: + * Number of swapped in pages from disk, i.e not found in the zswap pool. * - * a) Recent page faults. - * b) Recent insertion to the zswap LRU. This includes new zswap stores, - * as well as recent zswap LRU rotations. - * - * These pages are likely to be warm, and might incur IO if the are written - * to swap. + * This is consumed and subtracted from the lru size in + * zswap_shrinker_count() to penalize past overshrinking that led to disk + * swapins. The idea is that had we considered this many more pages in the + * LRU active/protected and not written them back, we would not have had to + * swapped them in. */ - atomic_long_t nr_zswap_protected; + atomic_long_t nr_disk_swapins; }; unsigned long zswap_total_pages(void); -- cgit v1.2.3 From 17fe833b0de08a78bfb51388e0615969d73ea8ad Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 5 Aug 2024 14:52:03 +0200 Subject: mm: fix (harmless) type confusion in lock_vma_under_rcu() There is a (harmless) type confusion in lock_vma_under_rcu(): After vma_start_read(), we have taken the VMA lock but don't know yet whether the VMA has already been detached and scheduled for RCU freeing. At this point, ->vm_start and ->vm_end are accessed. vm_area_struct contains a union such that ->vm_rcu uses the same memory as ->vm_start and ->vm_end; so accessing ->vm_start and ->vm_end of a detached VMA is illegal and leads to type confusion between union members. Fix it by reordering the vma->detached check above the address checks, and document the rules for RCU readers accessing VMAs. This will probably change the number of observed VMA_LOCK_MISS events (since previously, trying to access a detached VMA whose ->vm_rcu has been scheduled would bail out when checking the fault address against the rcu_head members reinterpreted as VMA bounds). Link: https://lkml.kernel.org/r/20240805-fix-vma-lock-type-confusion-v1-1-9f25443a9a71@google.com Fixes: 50ee32537206 ("mm: introduce lock_vma_under_rcu to be used from arch-specific code") Signed-off-by: Jann Horn Acked-by: Suren Baghdasaryan Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 165c58b12ccc..003619fab20e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -660,6 +660,9 @@ struct vma_numab_state { * per VM-area/task. A VM area is any part of the process virtual memory * space that has a special rule for the page-fault handlers (ie a shared * library, the executable area etc). + * + * Only explicitly marked struct members may be accessed by RCU readers before + * getting a stable reference. */ struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */ @@ -675,7 +678,11 @@ struct vm_area_struct { #endif }; - struct mm_struct *vm_mm; /* The address space we belong to. */ + /* + * The address space we belong to. + * Unstable RCU readers are allowed to read this. + */ + struct mm_struct *vm_mm; pgprot_t vm_page_prot; /* Access permissions of this VMA. */ /* @@ -688,7 +695,10 @@ struct vm_area_struct { }; #ifdef CONFIG_PER_VMA_LOCK - /* Flag to indicate areas detached from the mm->mm_mt tree */ + /* + * Flag to indicate areas detached from the mm->mm_mt tree. + * Unstable RCU readers are allowed to read this. + */ bool detached; /* @@ -706,6 +716,7 @@ struct vm_area_struct { * slowpath. */ int vm_lock_seq; + /* Unstable RCU readers are allowed to read this. */ struct vma_lock *vm_lock; #endif -- cgit v1.2.3 From cc0a0f98553528791ae33ba5ee8c118b52ae2028 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 5 Aug 2024 14:39:39 +0200 Subject: kfence: introduce burst mode Introduce burst mode, which can be configured with kfence.burst=$count, where the burst count denotes the additional successive slab allocations to be allocated through KFENCE for each sample interval. The idea is that this can give developers an additional knob to make KFENCE more aggressive when debugging specific issues of systems where either rebooting or recompiling the kernel with KASAN is not possible. Experiment: To assess the effectiveness of the new option, we randomly picked a recent out-of-bounds [1] and use-after-free bug [2], each with a reproducer provided by syzbot, that initially detected these bugs with KASAN. We then tried to reproduce the bugs with KFENCE below. [1] Fixed by: 7c55b78818cf ("jfs: xattr: fix buffer overflow for invalid xattr") https://syzkaller.appspot.com/bug?id=9d1b59d4718239da6f6069d3891863c25f9f24a2 [2] Fixed by: f8ad00f3fb2a ("l2tp: fix possible UAF when cleaning up tunnels") https://syzkaller.appspot.com/bug?id=4f34adc84f4a3b080187c390eeef60611fd450e1 The following KFENCE configs were compared. A pool size of 1023 objects was used for all configurations. Baseline kfence.sample_interval=100 kfence.skip_covered_thresh=75 kfence.burst=0 Aggressive kfence.sample_interval=1 kfence.skip_covered_thresh=10 kfence.burst=0 AggressiveBurst kfence.sample_interval=1 kfence.skip_covered_thresh=10 kfence.burst=1000 Each reproducer was run 10 times (after a fresh reboot), with the following detection counts for each KFENCE config: | Detection Count out of 10 | | OOB [1] | UAF [2] | ------------------+-------------+-------------+ Default | 0/10 | 0/10 | Aggressive | 0/10 | 0/10 | AggressiveBurst | 8/10 | 8/10 | With the Default and even the Aggressive configs the results are unsurprising, given KFENCE has not been designed for deterministic bug detection of small test cases. However, when enabling burst mode with relatively large burst count, KFENCE can start to detect heap memory-safety bugs even in simpler test cases with high probability (in the above cases with ~80% probability). Link: https://lkml.kernel.org/r/20240805124203.2692278-1-elver@google.com Signed-off-by: Marco Elver Reviewed-by: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Jann Horn Signed-off-by: Andrew Morton --- include/linux/kfence.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kfence.h b/include/linux/kfence.h index 88100cc9caba..0ad1ddbb8b99 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -124,7 +124,7 @@ static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp if (!static_branch_likely(&kfence_allocation_key)) return NULL; #endif - if (likely(atomic_read(&kfence_allocation_gate))) + if (likely(atomic_read(&kfence_allocation_gate) > 0)) return NULL; return __kfence_alloc(s, size, flags); } -- cgit v1.2.3 From 47baed6a132f611228113851a70c73f6e6478d87 Mon Sep 17 00:00:00 2001 From: Jianhui Zhou <912460177@qq.com> Date: Wed, 7 Aug 2024 15:44:48 +0800 Subject: percpu: remove pcpu_alloc_size() pcpu_alloc_size() was added in 7ac5c53e0073 "mm/percpu.c: introduce pcpu_alloc_size()", which is used to get the allocated memory size in bpf. However, pcpu_alloc_size() is no longer used in "bpf: Use c->unit_size to select target cache during free" because its actuall allocated memory size may change at runtime due to its slab merging mechanism. Therefore, pcpu_alloc_size() can be removed. Link: https://lkml.kernel.org/r/tencent_AD5C50E8D78C07A3CE539BD5F6BF39706507@qq.com Signed-off-by: Jianhui Zhou <912460177@qq.com> Cc: Christoph Lameter Cc: Dennis Zhou Cc: JonasZhou Cc: Tejun Heo Signed-off-by: Andrew Morton --- include/linux/percpu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 4b2047b78b67..b6321fc49159 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -135,7 +135,6 @@ extern void __init setup_per_cpu_areas(void); extern void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved, gfp_t gfp) __alloc_size(1); -extern size_t pcpu_alloc_size(void __percpu *__pdata); #define __alloc_percpu_gfp(_size, _align, _gfp) \ alloc_hooks(pcpu_alloc_noprof(_size, _align, false, _gfp)) -- cgit v1.2.3 From 09022bc196d23484a7a5d48cf373f8583e3fcf23 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 7 Aug 2024 20:35:26 +0100 Subject: mm: remove PG_error The PG_error bit is now unused; delete it and free up a bit in page->flags. Link: https://lkml.kernel.org/r/20240807193528.1865100-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 6 +----- include/trace/events/mmflags.h | 1 - include/uapi/linux/kernel-page-flags.h | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5769fe6e4950..a0a29bd092f8 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -66,8 +66,6 @@ * PG_referenced, PG_reclaim are used for page reclaim for anonymous and * file-backed pagecache (see mm/vmscan.c). * - * PG_error is set to indicate that an I/O error occurred on this page. - * * PG_arch_1 is an architecture specific page state bit. The generic code * guarantees that this bit is cleared for a page when it first is entered into * the page cache. @@ -103,7 +101,6 @@ enum pageflags { PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_active, PG_workingset, - PG_error, PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ PG_arch_1, PG_reserved, @@ -183,7 +180,7 @@ enum pageflags { */ /* At least one page in this folio has the hwpoison flag set */ - PG_has_hwpoisoned = PG_error, + PG_has_hwpoisoned = PG_active, PG_large_rmappable = PG_workingset, /* anon or file-backed */ }; @@ -506,7 +503,6 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } __PAGEFLAG(Locked, locked, PF_NO_TAIL) FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE) -PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) FOLIO_FLAG(referenced, FOLIO_HEAD_PAGE) FOLIO_TEST_CLEAR_FLAG(referenced, FOLIO_HEAD_PAGE) __FOLIO_SET_FLAG(referenced, FOLIO_HEAD_PAGE) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index b63d211bd141..b5c4da370a50 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -100,7 +100,6 @@ #define __def_pageflag_names \ DEF_PAGEFLAG_NAME(locked), \ DEF_PAGEFLAG_NAME(waiters), \ - DEF_PAGEFLAG_NAME(error), \ DEF_PAGEFLAG_NAME(referenced), \ DEF_PAGEFLAG_NAME(uptodate), \ DEF_PAGEFLAG_NAME(dirty), \ diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h index 6f2f2720f3ac..ff8032227876 100644 --- a/include/uapi/linux/kernel-page-flags.h +++ b/include/uapi/linux/kernel-page-flags.h @@ -7,7 +7,7 @@ */ #define KPF_LOCKED 0 -#define KPF_ERROR 1 +#define KPF_ERROR 1 /* Now unused */ #define KPF_REFERENCED 2 #define KPF_UPTODATE 3 #define KPF_DIRTY 4 -- cgit v1.2.3 From 310183de7bb2ed114a80d057690ddb23d0cfe814 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 9 Aug 2024 14:48:50 +0300 Subject: mm: introduce PageUnaccepted() page type The new page type allows physical memory scanners to detect unaccepted memory and handle it accordingly. The page type is serialized with zone lock. Link: https://lkml.kernel.org/r/20240809114854.3745464-5-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: David Hildenbrand Cc: Borislav Petkov Cc: Johannes Weiner Cc: Matthew Wilcox Cc: Mel Gorman Cc: Mike Rapoport (Microsoft) Cc: Tom Lendacky Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a0a29bd092f8..17175a328e6b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -939,6 +939,7 @@ enum pagetype { PG_hugetlb = 0x04000000, PG_slab = 0x02000000, PG_zsmalloc = 0x01000000, + PG_unaccepted = 0x00800000, PAGE_TYPE_BASE = 0x80000000, @@ -1072,6 +1073,13 @@ FOLIO_TEST_FLAG_FALSE(hugetlb) PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc) +/* + * Mark pages that has to be accepted before touched for the first time. + * + * Serialized with zone lock. + */ +PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted) + /** * PageHuge - Determine if the page belongs to hugetlbfs * @page: The page to test. -- cgit v1.2.3 From 5adfeaecc487e7023f1c7bbdc081707d7a93110f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 9 Aug 2024 14:48:51 +0300 Subject: mm: rework accept memory helpers Make accept_memory() and range_contains_unaccepted_memory() take 'start' and 'size' arguments instead of 'start' and 'end'. Remove accept_page(), replacing it with direct calls to accept_memory(). The accept_page() name is going to be used for a different function. Link: https://lkml.kernel.org/r/20240809114854.3745464-6-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Suggested-by: David Hildenbrand Cc: Borislav Petkov Cc: Johannes Weiner Cc: Matthew Wilcox Cc: Mel Gorman Cc: Mike Rapoport (Microsoft) Cc: Tom Lendacky Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ee8cea73d415..b1eed30fdc06 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4062,18 +4062,18 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start, #ifdef CONFIG_UNACCEPTED_MEMORY -bool range_contains_unaccepted_memory(phys_addr_t start, phys_addr_t end); -void accept_memory(phys_addr_t start, phys_addr_t end); +bool range_contains_unaccepted_memory(phys_addr_t start, unsigned long size); +void accept_memory(phys_addr_t start, unsigned long size); #else static inline bool range_contains_unaccepted_memory(phys_addr_t start, - phys_addr_t end) + unsigned long size) { return false; } -static inline void accept_memory(phys_addr_t start, phys_addr_t end) +static inline void accept_memory(phys_addr_t start, unsigned long size) { } @@ -4081,9 +4081,7 @@ static inline void accept_memory(phys_addr_t start, phys_addr_t end) static inline bool pfn_is_unaccepted_memory(unsigned long pfn) { - phys_addr_t paddr = pfn << PAGE_SHIFT; - - return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE); + return range_contains_unaccepted_memory(pfn << PAGE_SHIFT, PAGE_SIZE); } void vma_pgtable_walk_begin(struct vm_area_struct *vma); -- cgit v1.2.3 From 1c399e74a97ca733d0780cc51819aa81fb292c22 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 12 Aug 2024 14:12:23 -0400 Subject: mm/x86: implement arch_check_zapped_pud() Introduce arch_check_zapped_pud() to sanity check shadow stack on PUD zaps. It has the same logic as the PMD helper. One thing to mention is, it might be a good idea to use page_table_check in the future for trapping wrong setups of shadow stack pgtable entries [1]. That is left for the future as a separate effort. [1] https://lore.kernel.org/all/59d518698f664e07c036a5098833d7b56b953305.camel@intel.com Link: https://lkml.kernel.org/r/20240812181225.1360970-6-peterx@redhat.com Signed-off-by: Peter Xu Acked-by: David Hildenbrand Cc: "Edgecombe, Rick P" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Aneesh Kumar K.V Cc: Christophe Leroy Cc: Dan Williams Cc: Dave Jiang Cc: David Rientjes Cc: Hugh Dickins Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Paolo Bonzini Cc: Rik van Riel Cc: Sean Christopherson Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 2a6a3cccfc36..780f3b439d98 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -447,6 +447,12 @@ static inline void arch_check_zapped_pmd(struct vm_area_struct *vma, } #endif +#ifndef arch_check_zapped_pud +static inline void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud) +{ +} +#endif + #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, -- cgit v1.2.3 From cb0f01beb16669e91510fcdb2cea213931aee017 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 12 Aug 2024 14:12:25 -0400 Subject: mm/mprotect: fix dax pud handlings This is only relevant to the two archs that support PUD dax, aka, x86_64 and ppc64. PUD THPs do not yet exist elsewhere, and hugetlb PUDs do not count in this case. DAX have had PUD mappings for years, but change protection path never worked. When the path is triggered in any form (a simple test program would be: call mprotect() on a 1G dev_dax mapping), the kernel will report "bad pud". This patch should fix that. The new change_huge_pud() tries to keep everything simple. For example, it doesn't optimize write bit as that will need even more PUD helpers. It's not too bad anyway to have one more write fault in the worst case once for 1G range; may be a bigger thing for each PAGE_SIZE, though. Neither does it support userfault-wp bits, as there isn't such PUD mappings that is supported; file mappings always need a split there. The same to TLB shootdown: the pmd path (which was for x86 only) has the trick of using _ad() version of pmdp_invalidate*() which can avoid one redundant TLB, but let's also leave that for later. Again, the larger the mapping, the smaller of such effect. There's some difference on handling "retry" for change_huge_pud() (where it can return 0): it isn't like change_huge_pmd(), as the pmd version is safe with all conditions handled in change_pte_range() later, thanks to Hugh's new pte_offset_map_lock(). In short, change_pte_range() is simply smarter. For that, change_pud_range() will need proper retry if it races with something else when a huge PUD changed from under us. The last thing to mention is currently the PUD path ignores the huge pte numa counter (NUMA_HUGE_PTE_UPDATES), not only because DAX is not applicable to NUMA, but also that it's ambiguous on its own to decide how to account pud in this case. In one earlier version of this patchset I proposed to remove the counter as it doesn't even look right to do the accounting as of now [1], but then a further discussion suggests we can leave that for later, as that doesn't block this series if we choose to ignore that counter. That's what this patch does, by ignoring it. When at it, touch up the comment in pgtable_split_needed() to make it generic to either pmd or pud file THPs. [1] https://lore.kernel.org/all/20240715192142.3241557-3-peterx@redhat.com/ [2] https://lore.kernel.org/r/added2d0-b8be-4108-82ca-1367a388d0b1@redhat.com Link: https://lkml.kernel.org/r/20240812181225.1360970-8-peterx@redhat.com Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages") Fixes: 27af67f35631 ("powerpc/book3s64/mm: enable transparent pud hugepage") Signed-off-by: Peter Xu Cc: Dan Williams Cc: Matthew Wilcox Cc: Dave Jiang Cc: Hugh Dickins Cc: Kirill A. Shutemov Cc: Vlastimil Babka Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Michael Ellerman Cc: Aneesh Kumar K.V Cc: Oscar Salvador Cc: Christophe Leroy Cc: David Hildenbrand Cc: David Rientjes Cc: "Edgecombe, Rick P" Cc: Nicholas Piggin Cc: Paolo Bonzini Cc: Rik van Riel Cc: Sean Christopherson Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ce44caa40eed..6370026689e0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -342,6 +342,17 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, unsigned long address); +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +int change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, + pud_t *pudp, unsigned long addr, pgprot_t newprot, + unsigned long cp_flags); +#else +static inline int +change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, + pud_t *pudp, unsigned long addr, pgprot_t newprot, + unsigned long cp_flags) { return 0; } +#endif + #define split_huge_pud(__vma, __pud, __address) \ do { \ pud_t *____pud = (__pud); \ @@ -585,6 +596,19 @@ static inline int next_order(unsigned long *orders, int prev) { return 0; } + +static inline void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, + unsigned long address) +{ +} + +static inline int change_huge_pud(struct mmu_gather *tlb, + struct vm_area_struct *vma, pud_t *pudp, + unsigned long addr, pgprot_t newprot, + unsigned long cp_flags) +{ + return 0; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline int split_folio_to_list_to_order(struct folio *folio, -- cgit v1.2.3 From b6273b55d88539c6a7127a697c61d3f89c5831fe Mon Sep 17 00:00:00 2001 From: Takaya Saeki Date: Tue, 13 Aug 2024 10:03:12 +0000 Subject: filemap: add trace events for get_pages, map_pages, and fault To allow precise tracking of page caches accessed, add new tracepoints that trigger when a process actually accesses them. The ureadahead program used by ChromeOS traces the disk access of programs as they start up at boot up. It uses mincore(2) or the 'mm_filemap_add_to_page_cache' trace event to accomplish this. It stores this information in a "pack" file and on subsequent boots, it will read the pack file and call readahead(2) on the information so that disk storage can be loaded into RAM before the applications actually need it. A problem we see is that due to the kernel's readahead algorithm that can aggressively pull in more data than needed (to try and accomplish the same goal) and this data is also recorded. The end result is that the pack file contains a lot of pages on disk that are never actually used. Calling readahead(2) on these unused pages can slow down the system boot up times. To solve this, add 3 new trace events, get_pages, map_pages, and fault. These will be used to trace the pages are not only pulled in from disk, but are actually used by the application. Only those pages will be stored in the pack file, and this helps out the performance of boot up. With the combination of these 3 new trace events and mm_filemap_add_to_page_cache, we observed a reduction in the pack file by 7.3% - 20% on ChromeOS varying by device. Link: https://lkml.kernel.org/r/20240813100312.3930505-1-takayas@chromium.org Signed-off-by: Takaya Saeki Reviewed-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Cc: Junichi Uekawa Cc: Mathieu Desnoyers Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/trace/events/filemap.h | 84 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'include') diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h index 46c89c1e460c..f48fe637bfd2 100644 --- a/include/trace/events/filemap.h +++ b/include/trace/events/filemap.h @@ -56,6 +56,90 @@ DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache, TP_ARGS(folio) ); +DECLARE_EVENT_CLASS(mm_filemap_op_page_cache_range, + + TP_PROTO( + struct address_space *mapping, + pgoff_t index, + pgoff_t last_index + ), + + TP_ARGS(mapping, index, last_index), + + TP_STRUCT__entry( + __field(unsigned long, i_ino) + __field(dev_t, s_dev) + __field(unsigned long, index) + __field(unsigned long, last_index) + ), + + TP_fast_assign( + __entry->i_ino = mapping->host->i_ino; + if (mapping->host->i_sb) + __entry->s_dev = + mapping->host->i_sb->s_dev; + else + __entry->s_dev = mapping->host->i_rdev; + __entry->index = index; + __entry->last_index = last_index; + ), + + TP_printk( + "dev=%d:%d ino=%lx ofs=%lld-%lld", + MAJOR(__entry->s_dev), + MINOR(__entry->s_dev), __entry->i_ino, + ((loff_t)__entry->index) << PAGE_SHIFT, + ((((loff_t)__entry->last_index + 1) << PAGE_SHIFT) - 1) + ) +); + +DEFINE_EVENT(mm_filemap_op_page_cache_range, mm_filemap_get_pages, + TP_PROTO( + struct address_space *mapping, + pgoff_t index, + pgoff_t last_index + ), + TP_ARGS(mapping, index, last_index) +); + +DEFINE_EVENT(mm_filemap_op_page_cache_range, mm_filemap_map_pages, + TP_PROTO( + struct address_space *mapping, + pgoff_t index, + pgoff_t last_index + ), + TP_ARGS(mapping, index, last_index) +); + +TRACE_EVENT(mm_filemap_fault, + TP_PROTO(struct address_space *mapping, pgoff_t index), + + TP_ARGS(mapping, index), + + TP_STRUCT__entry( + __field(unsigned long, i_ino) + __field(dev_t, s_dev) + __field(unsigned long, index) + ), + + TP_fast_assign( + __entry->i_ino = mapping->host->i_ino; + if (mapping->host->i_sb) + __entry->s_dev = + mapping->host->i_sb->s_dev; + else + __entry->s_dev = mapping->host->i_rdev; + __entry->index = index; + ), + + TP_printk( + "dev=%d:%d ino=%lx ofs=%lld", + MAJOR(__entry->s_dev), + MINOR(__entry->s_dev), __entry->i_ino, + ((loff_t)__entry->index) << PAGE_SHIFT + ) +); + TRACE_EVENT(filemap_set_wb_err, TP_PROTO(struct address_space *mapping, errseq_t eseq), -- cgit v1.2.3 From 223febc6e5573cda5e94e6b38fcdaded068db777 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 12 Aug 2024 18:26:02 +1000 Subject: mm: add optional close() to struct vm_special_mapping Add an optional close() callback to struct vm_special_mapping. It will be used, by powerpc at least, to handle unmapping of the VDSO. Although support for unmapping the VDSO was initially added for CRIU[1], it is not desirable to guard that support behind CONFIG_CHECKPOINT_RESTORE. There are other known users of unmapping the VDSO which are not related to CRIU, eg. Valgrind [2] and void-ship [3]. The powerpc arch_unmap() hook has been in place for ~9 years, with no ifdef, so there may be other unknown users that have come to rely on unmapping the VDSO. Even if the code was behind an ifdef, major distros enable CHECKPOINT_RESTORE so users may not realise unmapping the VDSO depends on that configuration option. It's also undesirable to have such core mm behaviour behind a relatively obscure CONFIG option. Longer term the unmap behaviour should be standardised across architectures, however that is complicated by the fact the VDSO pointer is stored differently across architectures. There was a previous attempt to unify that handling [4], which could be revived. See [5] for further discussion. [1]: commit 83d3f0e90c6c ("powerpc/mm: tracking vDSO remap") [2]: https://sourceware.org/git/?p=valgrind.git;a=commit;h=3a004915a2cbdcdebafc1612427576bf3321eef5 [3]: https://github.com/insanitybit/void-ship [4]: https://lore.kernel.org/lkml/20210611180242.711399-17-dima@arista.com/ [5]: https://lore.kernel.org/linuxppc-dev/shiq5v3jrmyi6ncwke7wgl76ojysgbhrchsk32q4lbx2hadqqc@kzyy2igem256 Link: https://lkml.kernel.org/r/20240812082605.743814-1-mpe@ellerman.id.au Signed-off-by: Michael Ellerman Suggested-by: Linus Torvalds Reviewed-by: David Hildenbrand Reviewed-by: Liam R. Howlett Cc: Christophe Leroy Cc: Jeff Xu Cc: Nicholas Piggin Cc: Pedro Falcato Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 003619fab20e..2419e60c9a7f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1324,6 +1324,9 @@ struct vm_special_mapping { int (*mremap)(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma); + + void (*close)(const struct vm_special_mapping *sm, + struct vm_area_struct *vma); }; enum tlb_flush_reason { -- cgit v1.2.3 From 40b88644dd92d99e572063893c2a247598c238d6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 12 Aug 2024 18:26:04 +1000 Subject: mm: remove arch_unmap() Now that powerpc no longer uses arch_unmap() to handle VDSO unmapping, there are no meaningful implementions left. Drop support for it entirely, and update comments which refer to it. Link: https://lkml.kernel.org/r/20240812082605.743814-3-mpe@ellerman.id.au Signed-off-by: Michael Ellerman Suggested-by: Linus Torvalds Acked-by: David Hildenbrand Reviewed-by: Thomas Gleixner Reviewed-by: Liam R. Howlett Cc: Christophe Leroy Cc: Jeff Xu Cc: Nicholas Piggin Cc: Pedro Falcato Signed-off-by: Andrew Morton --- include/asm-generic/mm_hooks.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h index 4dbb177d1150..6eea3b3c1e65 100644 --- a/include/asm-generic/mm_hooks.h +++ b/include/asm-generic/mm_hooks.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Define generic no-op hooks for arch_dup_mmap, arch_exit_mmap - * and arch_unmap to be included in asm-FOO/mmu_context.h for any - * arch FOO which doesn't need to hook these. + * Define generic no-op hooks for arch_dup_mmap and arch_exit_mmap + * to be included in asm-FOO/mmu_context.h for any arch FOO which + * doesn't need to hook these. */ #ifndef _ASM_GENERIC_MM_HOOKS_H #define _ASM_GENERIC_MM_HOOKS_H @@ -17,11 +17,6 @@ static inline void arch_exit_mmap(struct mm_struct *mm) { } -static inline void arch_unmap(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ -} - static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { -- cgit v1.2.3 From 497258dfafcc6b88e7c4c46a38a479721d44cf41 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 20 Aug 2024 15:14:47 -0700 Subject: mm: remove legacy install_special_mapping() code All relevant architectures had already been converted to the new interface (which just has an underscore in front of the name - not very imaginative naming), this just force-converts the stragglers. The modern interface is almost identical to the old one, except instead of the page pointer it takes a "struct vm_special_mapping" that describes the mapping (and contains the page pointer as one member), and it returns the resulting 'vma' instead of just the error code. Getting rid of the old interface also gets rid of some special casing, which had caused problems with the mremap extensions to "struct vm_special_mapping". [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/CAHk-=whvR+z=0=0gzgdfUiK70JTa-=+9vxD-4T=3BagXR6dciA@mail.gmail.comTested-by: Rob Landley # arch/sh/ Link: https://lore.kernel.org/all/20240819195120.GA1113263@thelio-3990X/ Signed-off-by: Linus Torvalds Cc: Nathan Chancellor Cc: Michael Ellerman Cc: Anton Ivanov Cc: Brian Cain Cc: Christophe Leroy Cc: Dinh Nguyen Cc: Guo Ren Cc: Jeff Xu Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Liam R. Howlett Cc: Nicholas Piggin Cc: Pedro Falcato Cc: Richard Weinberger Cc: Rich Felker Cc: Rob Landley Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- include/linux/mm.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index b1eed30fdc06..0fde51c0532f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3263,10 +3263,6 @@ extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long flags, const struct vm_special_mapping *spec); -/* This is an obsolete alternative to _install_special_mapping. */ -extern int install_special_mapping(struct mm_struct *mm, - unsigned long addr, unsigned long len, - unsigned long flags, struct page **pages); unsigned long randomize_stack_top(unsigned long stack_top); unsigned long randomize_page(unsigned long start, unsigned long range); -- cgit v1.2.3 From 02f4bbefcada38cab86b365e5c795e0f858356bc Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Wed, 14 Aug 2024 17:34:15 +0800 Subject: mm: kmem: add lockdep assertion to obj_cgroup_memcg obj_cgroup_memcg() is supposed to safe to prevent the returned memory cgroup from being freed only when the caller is holding the rcu read lock or objcg_lock or cgroup_mutex. It is very easy to ignore thoes conditions when users call some upper APIs which call obj_cgroup_memcg() internally like mem_cgroup_from_slab_obj() (See the link below). So it is better to add lockdep assertion to obj_cgroup_memcg() to find those issues ASAP. Because there is no user of obj_cgroup_memcg() holding objcg_lock to make the returned memory cgroup safe, do not add objcg_lock assertion (We should export objcg_lock if we really want to do). Additionally, this is some internal implementation detail of memcg and should not be accessible outside memcg code. Some users like __mem_cgroup_uncharge() do not care the lifetime of the returned memory cgroup, which just want to know if the folio is charged to a memory cgroup, therefore, they do not need to hold the needed locks. In which case, introduce a new helper folio_memcg_charged() to do this. Compare it to folio_memcg(), it could eliminate a memory access of objcg->memcg for kmem, actually, a really small gain. [songmuchun@bytedance.com: fix split_page_memcg()] Link: https://lkml.kernel.org/r/20240819080415.44964-1-songmuchun@bytedance.com Link: https://lore.kernel.org/all/20240718083607.42068-1-songmuchun@bytedance.com/ Link: https://lkml.kernel.org/r/20240814093415.17634-1-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Shakeel Butt Acked-by: Roman Gushchin Acked-by: Vlastimil Babka Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1b79760af685..07eadf7ecbba 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -366,11 +366,11 @@ static inline bool folio_memcg_kmem(struct folio *folio); * After the initialization objcg->memcg is always pointing at * a valid memcg, but can be atomically swapped to the parent memcg. * - * The caller must ensure that the returned memcg won't be released: - * e.g. acquire the rcu_read_lock or css_set_lock. + * The caller must ensure that the returned memcg won't be released. */ static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) { + lockdep_assert_once(rcu_read_lock_held() || lockdep_is_held(&cgroup_mutex)); return READ_ONCE(objcg->memcg); } @@ -444,6 +444,19 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio) return __folio_memcg(folio); } +/* + * folio_memcg_charged - If a folio is charged to a memory cgroup. + * @folio: Pointer to the folio. + * + * Returns true if folio is charged to a memory cgroup, otherwise returns false. + */ +static inline bool folio_memcg_charged(struct folio *folio) +{ + if (folio_memcg_kmem(folio)) + return __folio_objcg(folio) != NULL; + return __folio_memcg(folio) != NULL; +} + /** * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio. * @folio: Pointer to the folio. @@ -460,7 +473,6 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) unsigned long memcg_data = READ_ONCE(folio->memcg_data); VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); - WARN_ON_ONCE(!rcu_read_lock_held()); if (memcg_data & MEMCG_DATA_KMEM) { struct obj_cgroup *objcg; @@ -469,6 +481,8 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) return obj_cgroup_memcg(objcg); } + WARN_ON_ONCE(!rcu_read_lock_held()); + return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); } -- cgit v1.2.3 From bd164d81a767f33c30d5c5b50b775631699ee976 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 14 Aug 2024 12:19:28 -0400 Subject: maple_tree: introduce store_type enum Patch series "Introduce a store type enum for the Maple tree", v4. ================================ OVERVIEW ================================ This series implements two work items[3]: "aligning mas_store_gfp() with mas_preallocate()" and "enum for store type". mas_store_gfp() is modified to preallocate nodes. This simplies many of the write helper functions by allowing them to use mas_store_gfp() rather than open coding node allocation and error handling. The enum defines the following store types: enum store_type { wr_invalid, wr_new_root, wr_store_root, wr_exact_fit, wr_spanning_store, wr_split_store, wr_rebalance, wr_append, wr_node_store, wr_slot_store, }; In the current maple tree code, a walk down the tree is done in mas_preallocate() to determine the number of nodes needed for this write. After node allocation, mas_wr_store_entry() will perform another walk to determine which write helper function to use to complete the write. Rather than performing the second walk, we can store the type of write in the maple write state during node allocation and read this field to complete the write. Patches 1-16 implement this store type feature. Patch 17 is a cleanup patch to change functions that have unused return types to be void. ================================ RESULTS ================================= Phoronix t-test-1 (Seconds < Lower Is Better) v6.10-rc6 Threads: 1 33.15 Threads: 2 10.81 v6.10-rc6 + this series Threads: 1 32.69 Threads: 2 10.45 Stress-ng mmap 6.10_base store_type_v4 Duration User 2744.65 2769.40 Duration System 10862.69 10817.59 Duration Elapsed 1477.58 1478.35 ================================ TESTING ================================= Testing was done with the maple tree test suite. A new test case is also added to validate the order in which we test for and assign the store type. [1]: https://lore.kernel.org/linux-mm/80926b22-a8d2-9992-eb5e-27e2c99cf460@google.com/T/#m81044feb66765265f8ca7f21e4b4b3725b18780a [2]: https://lore.kernel.org/linux-mm/80926b22-a8d2-9992-eb5e-27e2c99cf460@google.com/T/#mb36c6526486638e82518c0f37a428fb279c84d8a [3]: https://lists.infradead.org/pipermail/maple-tree/2023-December/003098.html This patch (of 17): Add a store_type enum that is stored in ma_state. This will be used to keep track of partial walks of the tree so that subsequent walks can pick up where a previous walk left off. Link: https://lkml.kernel.org/r/20240814161944.55347-1-sidhartha.kumar@oracle.com Link: https://lkml.kernel.org/r/20240814161944.55347-2-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Cc: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index a53ad4dabd7e..8e1504a81cd2 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -148,6 +148,18 @@ enum maple_type { maple_arange_64, }; +enum store_type { + wr_invalid, + wr_new_root, + wr_store_root, + wr_exact_fit, + wr_spanning_store, + wr_split_store, + wr_rebalance, + wr_append, + wr_node_store, + wr_slot_store, +}; /** * DOC: Maple tree flags @@ -436,6 +448,7 @@ struct ma_state { unsigned char offset; unsigned char mas_flags; unsigned char end; /* The end of the node */ + enum store_type store_type; /* The type of store needed for this operation */ }; struct ma_wr_state { @@ -477,6 +490,7 @@ struct ma_wr_state { .max = ULONG_MAX, \ .alloc = NULL, \ .mas_flags = 0, \ + .store_type = wr_invalid, \ } #define MA_WR_STATE(name, ma_state, wr_entry) \ -- cgit v1.2.3 From 5d383b69a04e2eb2e0ae06e91821fd82cb9acf73 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 14 Aug 2024 22:04:47 -0700 Subject: memcg: move v1 only percpu stats in separate struct Patch series "memcg: further decouple v1 code from v2". Some of the v1 code is still in v2 code base due to v1 fields in the struct memcg_vmstats_percpu. This field decouples those fileds from v2 struct and move all the related code into v1 only code base. This patch (of 7): At the moment struct memcg_vmstats_percpu contains two v1 only fields which consumes memory even when CONFIG_MEMCG_V1 is not enabled. In addition there are v1 only functions accessing them and are in the main memcontrol source file and can not be moved to v1 only source file due to these fields. Let's move these fields into their own struct. Later patches will move the functions accessing them to v1 source file and only allocate these fields when CONFIG_MEMCG_V1 is enabled. Link: https://lkml.kernel.org/r/20240815050453.1298138-1-shakeel.butt@linux.dev Link: https://lkml.kernel.org/r/20240815050453.1298138-2-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Acked-by: Roman Gushchin Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: T.J. Mercier Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 07eadf7ecbba..08b7121cf43a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -70,6 +70,7 @@ struct mem_cgroup_id { }; struct memcg_vmstats_percpu; +struct memcg1_events_percpu; struct memcg_vmstats; struct lruvec_stats_percpu; struct lruvec_stats; @@ -254,6 +255,7 @@ struct mem_cgroup { struct list_head objcg_list; struct memcg_vmstats_percpu __percpu *vmstats_percpu; + struct memcg1_events_percpu __percpu *events_percpu; #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; -- cgit v1.2.3 From 0ccaf421d6592bb99bb9b424e4ccca3c6367d799 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 14 Aug 2024 22:04:52 -0700 Subject: memcg: allocate v1 event percpu only on v1 deployment Currently memcg->events_percpu gets allocated on v2 deployments. Let's move the allocation to v1 only codebase. This is not needed in v2. Link: https://lkml.kernel.org/r/20240815050453.1298138-7-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: T.J. Mercier Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 08b7121cf43a..ed170399179a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -255,7 +255,6 @@ struct mem_cgroup { struct list_head objcg_list; struct memcg_vmstats_percpu __percpu *vmstats_percpu; - struct memcg1_events_percpu __percpu *events_percpu; #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; @@ -277,6 +276,8 @@ struct mem_cgroup { struct page_counter kmem; /* v1 only */ struct page_counter tcpmem; /* v1 only */ + struct memcg1_events_percpu __percpu *events_percpu; + unsigned long soft_limit; /* protected by memcg_oom_lock */ -- cgit v1.2.3 From 836d13a6ef8a2eb0eab2bd2de06f2deabc62b060 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:18 +0300 Subject: xz: switch from public domain to BSD Zero Clause License (0BSD) Remove the public domain notices and add SPDX license identifiers. Change MODULE_LICENSE from "GPL" to "Dual BSD/GPL" because 0BSD should count as a BSD license variant here. The switch to 0BSD was done in the upstream XZ Embedded project because public domain has (real or perceived) legal issues in some jurisdictions. Link: https://lkml.kernel.org/r/20240721133633.47721-4-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/decompress/unxz.h | 5 ++--- include/linux/xz.h | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/decompress/unxz.h b/include/linux/decompress/unxz.h index f764e2a7201e..3dd2658a9dab 100644 --- a/include/linux/decompress/unxz.h +++ b/include/linux/decompress/unxz.h @@ -1,10 +1,9 @@ +/* SPDX-License-Identifier: 0BSD */ + /* * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd * * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef DECOMPRESS_UNXZ_H diff --git a/include/linux/xz.h b/include/linux/xz.h index 7285ca5d56e9..5728d57aecc0 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -1,11 +1,10 @@ +/* SPDX-License-Identifier: 0BSD */ + /* * XZ decompressor * * Authors: Lasse Collin * Igor Pavlov - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef XZ_H -- cgit v1.2.3 From ad8c67b870d108aa1286f4fc76d0c29a736fd75e Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:20 +0300 Subject: xz: fix kernel-doc formatting errors in xz.h The opaque structs xz_dec and xz_dec_microlzma are declared in xz.h but their definitions are in xz_dec_lzma2.c without kernel-doc comments. Use regular comments for these structs in xz.h to avoid errors when building the docs. Add a few missing colons. Link: https://lkml.kernel.org/r/20240721133633.47721-6-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/xz.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/xz.h b/include/linux/xz.h index 5728d57aecc0..af1e075d9add 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -142,7 +142,7 @@ struct xz_buf { size_t out_size; }; -/** +/* * struct xz_dec - Opaque type to hold the XZ decoder state */ struct xz_dec; @@ -240,15 +240,16 @@ XZ_EXTERN void xz_dec_end(struct xz_dec *s); * marked with XZ_EXTERN. This avoids warnings about static functions that * are never defined. */ -/** + +/* * struct xz_dec_microlzma - Opaque type to hold the MicroLZMA decoder state */ struct xz_dec_microlzma; /** * xz_dec_microlzma_alloc() - Allocate memory for the MicroLZMA decoder - * @mode XZ_SINGLE or XZ_PREALLOC - * @dict_size LZMA dictionary size. This must be at least 4 KiB and + * @mode: XZ_SINGLE or XZ_PREALLOC + * @dict_size: LZMA dictionary size. This must be at least 4 KiB and * at most 3 GiB. * * In contrast to xz_dec_init(), this function only allocates the memory @@ -276,15 +277,15 @@ extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, /** * xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state - * @s Decoder state allocated using xz_dec_microlzma_alloc() - * @comp_size Compressed size of the input stream - * @uncomp_size Uncompressed size of the input stream. A value smaller + * @s: Decoder state allocated using xz_dec_microlzma_alloc() + * @comp_size: Compressed size of the input stream + * @uncomp_size: Uncompressed size of the input stream. A value smaller * than the real uncompressed size of the input stream can * be specified if uncomp_size_is_exact is set to false. * uncomp_size can never be set to a value larger than the * expected real uncompressed size because it would eventually * result in XZ_DATA_ERROR. - * @uncomp_size_is_exact This is an int instead of bool to avoid + * @uncomp_size_is_exact: This is an int instead of bool to avoid * requiring stdbool.h. This should normally be set to true. * When this is set to false, error detection is weaker. */ @@ -294,7 +295,7 @@ extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, /** * xz_dec_microlzma_run() - Run the MicroLZMA decoder - * @s Decoder state initialized using xz_dec_microlzma_reset() + * @s: Decoder state initialized using xz_dec_microlzma_reset() * @b: Input and output buffers * * This works similarly to xz_dec_run() with a few important differences. -- cgit v1.2.3 From 0f2c5996340b69d167d3f6ca38d3012204787ac1 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:21 +0300 Subject: xz: improve the MicroLZMA kernel-doc in xz.h Move the description of the format into a "DOC:" comment. Emphasize that MicroLZMA functions aren't usually needed. Link: https://lkml.kernel.org/r/20240721133633.47721-7-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/xz.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/xz.h b/include/linux/xz.h index af1e075d9add..701d62c02b9a 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -232,9 +232,18 @@ XZ_EXTERN void xz_dec_reset(struct xz_dec *s); */ XZ_EXTERN void xz_dec_end(struct xz_dec *s); -/* - * Decompressor for MicroLZMA, an LZMA variant with a very minimal header. - * See xz_dec_microlzma_alloc() below for details. +/** + * DOC: MicroLZMA decompressor + * + * This MicroLZMA header format was created for use in EROFS but may be used + * by others too. **In most cases one needs the XZ APIs above instead.** + * + * The compressed format supported by this decoder is a raw LZMA stream + * whose first byte (always 0x00) has been replaced with bitwise-negation + * of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is + * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00. + * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream + * marker must not be used. The unused values are reserved for future use. * * These functions aren't used or available in preboot code and thus aren't * marked with XZ_EXTERN. This avoids warnings about static functions that @@ -262,15 +271,6 @@ struct xz_dec_microlzma; * On success, xz_dec_microlzma_alloc() returns a pointer to * struct xz_dec_microlzma. If memory allocation fails or * dict_size is invalid, NULL is returned. - * - * The compressed format supported by this decoder is a raw LZMA stream - * whose first byte (always 0x00) has been replaced with bitwise-negation - * of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is - * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00. - * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream - * marker must not be used. The unused values are reserved for future use. - * This MicroLZMA header format was created for use in EROFS but may be used - * by others too. */ extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, uint32_t dict_size); -- cgit v1.2.3 From c6f371bab25edccd39caa5dd452b50d9dfdf4ff0 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Wed, 24 Jul 2024 14:05:41 +0300 Subject: xz: remove XZ_EXTERN and extern from functions XZ_EXTERN was used to make internal functions static in the preboot code. However, in other decompressors this hasn't been done. On x86-64, this makes no difference to the kernel image size. Omit XZ_EXTERN and let some of the internal functions be extern in the preboot code. Omitting XZ_EXTERN from include/linux/xz.h fixes warnings in "make htmldocs" and makes the intradocument links to xz_dec functions work in Documentation/staging/xz.rst. The alternative would have been to add "XZ_EXTERN" to c_id_attributes in Documentation/conf.py but omitting XZ_EXTERN seemed cleaner. Link: https://lore.kernel.org/lkml/20240723205437.3c0664b0@kaneli/ Link: https://lkml.kernel.org/r/20240724110544.16430-1-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Tested-by: Michael Ellerman (powerpc) Cc: Jonathan Corbet Cc: Sam James Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/xz.h | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/xz.h b/include/linux/xz.h index 701d62c02b9a..58ae1d746c6f 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -18,11 +18,6 @@ # include #endif -/* In Linux, this is used to make extern functions static when needed. */ -#ifndef XZ_EXTERN -# define XZ_EXTERN extern -#endif - /** * enum xz_mode - Operation mode * @@ -190,7 +185,7 @@ struct xz_dec; * ready to be used with xz_dec_run(). If memory allocation fails, * xz_dec_init() returns NULL. */ -XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); +struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); /** * xz_dec_run() - Run the XZ decoder @@ -210,7 +205,7 @@ XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); * get that amount valid data from the beginning of the stream. You must use * the multi-call decoder if you don't want to uncompress the whole stream. */ -XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b); +enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b); /** * xz_dec_reset() - Reset an already allocated decoder state @@ -223,14 +218,14 @@ XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b); * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in * multi-call mode. */ -XZ_EXTERN void xz_dec_reset(struct xz_dec *s); +void xz_dec_reset(struct xz_dec *s); /** * xz_dec_end() - Free the memory allocated for the decoder state * @s: Decoder state allocated using xz_dec_init(). If s is NULL, * this function does nothing. */ -XZ_EXTERN void xz_dec_end(struct xz_dec *s); +void xz_dec_end(struct xz_dec *s); /** * DOC: MicroLZMA decompressor @@ -244,10 +239,6 @@ XZ_EXTERN void xz_dec_end(struct xz_dec *s); * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00. * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream * marker must not be used. The unused values are reserved for future use. - * - * These functions aren't used or available in preboot code and thus aren't - * marked with XZ_EXTERN. This avoids warnings about static functions that - * are never defined. */ /* @@ -272,8 +263,8 @@ struct xz_dec_microlzma; * struct xz_dec_microlzma. If memory allocation fails or * dict_size is invalid, NULL is returned. */ -extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, - uint32_t dict_size); +struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, + uint32_t dict_size); /** * xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state @@ -289,9 +280,8 @@ extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, * requiring stdbool.h. This should normally be set to true. * When this is set to false, error detection is weaker. */ -extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, - uint32_t comp_size, uint32_t uncomp_size, - int uncomp_size_is_exact); +void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size, + uint32_t uncomp_size, int uncomp_size_is_exact); /** * xz_dec_microlzma_run() - Run the MicroLZMA decoder @@ -329,15 +319,14 @@ extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, * may be changed normally like with XZ_PREALLOC. This way input data can be * provided from non-contiguous memory. */ -extern enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s, - struct xz_buf *b); +enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s, struct xz_buf *b); /** * xz_dec_microlzma_end() - Free the memory allocated for the decoder state * @s: Decoder state allocated using xz_dec_microlzma_alloc(). * If s is NULL, this function does nothing. */ -extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s); +void xz_dec_microlzma_end(struct xz_dec_microlzma *s); /* * Standalone build (userspace build or in-kernel build for boot time use) @@ -358,13 +347,13 @@ extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s); * This must be called before any other xz_* function to initialize * the CRC32 lookup table. */ -XZ_EXTERN void xz_crc32_init(void); +void xz_crc32_init(void); /* * Update CRC32 value using the polynomial from IEEE-802.3. To start a new * calculation, the third argument must be zero. To continue the calculation, * the previously returned value is passed as the third argument. */ -XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc); +uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc); #endif #endif -- cgit v1.2.3 From 9a42bfd255b288dad2d1a9df0a1fe58394d5da12 Mon Sep 17 00:00:00 2001 From: Deshan Zhang Date: Thu, 25 Jul 2024 17:30:45 +0800 Subject: lib/lru_cache: fix spelling mistake "colision"->"collision" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a spelling mistake in a literal string and in cariable names. Fix these. Link: https://lkml.kernel.org/r/20240725093044.1742842-1-deshan@nfschina.com Signed-off-by: Deshan Zhang Cc: Christoph Böhmwalder Cc: Lars Ellenberg Cc: Philipp Reisner Signed-off-by: Andrew Morton --- include/linux/lru_cache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index c9afcdd9324c..ff82ef85a084 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -119,7 +119,7 @@ write intent log information, three of which are mentioned here. */ /* this defines an element in a tracked set - * .colision is for hash table lookup. + * .collision is for hash table lookup. * When we process a new IO request, we know its sector, thus can deduce the * region number (label) easily. To do the label -> object lookup without a * full list walk, we use a simple hash table. @@ -145,7 +145,7 @@ write intent log information, three of which are mentioned here. * But it avoids high order page allocations in kmalloc. */ struct lc_element { - struct hlist_node colision; + struct hlist_node collision; struct list_head list; /* LRU list or free list */ unsigned refcnt; /* back "pointer" into lc_cache->element[index], -- cgit v1.2.3 From 6ce2082fd3a25d5a8c756120959237cace0379f1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 13 Aug 2024 15:12:35 +0300 Subject: fault-inject: improve build for CONFIG_FAULT_INJECTION=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fault-inject.h users across the kernel need to add a lot of #ifdef CONFIG_FAULT_INJECTION to cater for shortcomings in the header. Make fault-inject.h self-contained for CONFIG_FAULT_INJECTION=n, and add stubs for DECLARE_FAULT_ATTR(), setup_fault_attr(), should_fail_ex(), and should_fail() to allow removal of conditional compilation. [akpm@linux-foundation.org: repair fallout from no longer including debugfs.h into fault-inject.h] [akpm@linux-foundation.org: fix drivers/misc/xilinx_tmr_inject.c] [akpm@linux-foundation.org: Add debugfs.h inclusion to more files, per Stephen] Link: https://lkml.kernel.org/r/20240813121237.2382534-1-jani.nikula@intel.com Fixes: 6ff1cb355e62 ("[PATCH] fault-injection capabilities infrastructure") Signed-off-by: Jani Nikula Cc: Akinobu Mita Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi Cc: Rob Clark Cc: Rodrigo Vivi Cc: Thomas Hellström Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/fault-inject.h | 36 +++++++++++++++++++++++++++++------- include/linux/mmc/host.h | 1 + include/ufs/ufshcd.h | 1 + 3 files changed, 31 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 354413950d34..8c829d28dcf3 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -2,13 +2,17 @@ #ifndef _LINUX_FAULT_INJECT_H #define _LINUX_FAULT_INJECT_H +#include +#include + +struct dentry; +struct kmem_cache; + #ifdef CONFIG_FAULT_INJECTION -#include -#include +#include #include #include -#include /* * For explanation of the elements of this struct, see @@ -51,6 +55,28 @@ int setup_fault_attr(struct fault_attr *attr, char *str); bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size); +#else /* CONFIG_FAULT_INJECTION */ + +struct fault_attr { +}; + +#define DECLARE_FAULT_ATTR(name) struct fault_attr name = {} + +static inline int setup_fault_attr(struct fault_attr *attr, char *str) +{ + return 0; /* Note: 0 means error for __setup() handlers! */ +} +static inline bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) +{ + return false; +} +static inline bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return false; +} + +#endif /* CONFIG_FAULT_INJECTION */ + #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS struct dentry *fault_create_debugfs_attr(const char *name, @@ -87,10 +113,6 @@ static inline void fault_config_init(struct fault_config *config, #endif /* CONFIG_FAULT_INJECTION_CONFIGFS */ -#endif /* CONFIG_FAULT_INJECTION */ - -struct kmem_cache; - #ifdef CONFIG_FAIL_PAGE_ALLOC bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order); #else diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 88c6a76042ee..49470188fca7 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 0fd2aebac728..3f68ae3e4330 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From d994c238347d7ba4de15da00985e1bea75e91dc7 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sat, 17 Aug 2024 14:37:54 +0200 Subject: ratelimit: convert flags to int to save 8 bytes in size Only bit 1 is used, making an unsigned long a total overkill. This brings it from 40 to 32 bytes, which in turn shrinks user_struct from 136 to 128 bytes. Since the latter is allocated with hwalign, this means the total usage goes down from 192 to 128 bytes per object. No functional changes. Link: https://lkml.kernel.org/r/20240817123754.240924-1-mjguzik@gmail.com Signed-off-by: Mateusz Guzik Signed-off-by: Andrew Morton --- include/linux/ratelimit_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h index 002266693e50..765232ce0b5e 100644 --- a/include/linux/ratelimit_types.h +++ b/include/linux/ratelimit_types.h @@ -19,8 +19,8 @@ struct ratelimit_state { int burst; int printed; int missed; + unsigned int flags; unsigned long begin; - unsigned long flags; }; #define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \ -- cgit v1.2.3 From 32cebfe1cc21a84e4a907575bb9fd1c3f6b091fd Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Tue, 27 Aug 2024 10:45:15 +0800 Subject: lib/string_choices: add str_true_false()/str_false_true() helper Add str_true_false()/str_false_true() helper to retur a "true" or "false" string literal. We found more than 10 cases currently exist in the tree. So these helpers can be used for these cases. This patch (of 3): Add str_true_false()/str_false_true() helper to return "true" or "false" string literal. Link: https://lkml.kernel.org/r/20240827024517.914100-1-lihongbo22@huawei.com Link: https://lkml.kernel.org/r/20240827024517.914100-2-lihongbo22@huawei.com Signed-off-by: Hongbo Li Cc: Andy Shevchenko Cc: Anna Schumaker Cc: Greg Kroah-Hartman Cc: Kees Cook Cc: Matthew Wilcox Cc: Trond Myklebust Signed-off-by: Andrew Morton --- include/linux/string_choices.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index d9ebe20229f8..4a2432313b8e 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -42,6 +42,12 @@ static inline const char *str_yes_no(bool v) return v ? "yes" : "no"; } +static inline const char *str_true_false(bool v) +{ + return v ? "true" : "false"; +} +#define str_false_true(v) str_true_false(!(v)) + /** * str_plural - Return the simple pluralization based on English counts * @num: Number used for deciding pluralization -- cgit v1.2.3 From 181028a0d84cdcc7ac86d05cc49eaa416ce85c8b Mon Sep 17 00:00:00 2001 From: Chandramohan Akula Date: Thu, 29 Aug 2024 08:34:05 -0700 Subject: RDMA/bnxt_re: Share a page to expose per SRQ info with userspace Gen P7 adapters needs to share a toggle bits information received in kernel driver with the user space. User space needs this info to arm the SRQ. User space application can get this page using the UAPI routines. Library will mmap this page and get the toggle bits to be used in the next ARM Doorbell. Uses a hash list to map the SRQ structure from the SRQ ID. SRQ structure is retrieved from the hash list while the library calls the UAPI routine to get the toggle page mapping. Currently the full page is mapped per SRQ. This can be optimized to enable multiple SRQs from the same application share the same page and different offsets in the page Signed-off-by: Chandramohan Akula Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/1724945645-14989-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- include/uapi/rdma/bnxt_re-abi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index 6821002931c8..faa9d62b3b30 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -141,8 +141,14 @@ struct bnxt_re_srq_req { __aligned_u64 srq_handle; }; +enum bnxt_re_srq_mask { + BNXT_RE_SRQ_TOGGLE_PAGE_SUPPORT = 0x1, +}; + struct bnxt_re_srq_resp { __u32 srqid; + __u32 rsvd; /* padding */ + __aligned_u64 comp_mask; }; enum bnxt_re_shpg_offt { -- cgit v1.2.3 From c4a315eaf8eff0d3234600e13db7e7c71c0b3405 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Aug 2024 14:14:56 +0200 Subject: gpio: ath79: remove support for platform data There are no more board files defining platform data for this driver so remove the header and support from the driver. Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240821121456.19553-4-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- include/linux/platform_data/gpio-ath79.h | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 include/linux/platform_data/gpio-ath79.h (limited to 'include') diff --git a/include/linux/platform_data/gpio-ath79.h b/include/linux/platform_data/gpio-ath79.h deleted file mode 100644 index 3ea6dd942c27..000000000000 --- a/include/linux/platform_data/gpio-ath79.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Atheros AR7XXX/AR9XXX GPIO controller platform data - * - * Copyright (C) 2015 Alban Bedel - */ - -#ifndef __LINUX_PLATFORM_DATA_GPIO_ATH79_H -#define __LINUX_PLATFORM_DATA_GPIO_ATH79_H - -struct ath79_gpio_platform_data { - unsigned ngpios; - bool oe_inverted; -}; - -#endif -- cgit v1.2.3 From afec1aba08607b18cfd00fdcd6525aeca0e187bf Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 29 Jul 2024 21:26:43 +0100 Subject: dt-bindings: clock: renesas: Document RZ/V2H(P) SoC CPG Document the device tree bindings for the Renesas RZ/V2H(P) SoC Clock Pulse Generator (CPG). CPG block handles the below operations: - Generation and control of clock signals for the IP modules - Generation and control of resets - Control over booting - Low power consumption and power supply domains Also define constants for the core clocks of the RZ/V2H(P) SoC. Note the core clocks are a subset of the ones which are listed as part of section 4.4.2 of HW manual Rev.1.01 which cannot be controlled by CLKON register. Signed-off-by: Lad Prabhakar Reviewed-by: Krzysztof Kozlowski Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20240729202645.263525-2-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/renesas,r9a09g057-cpg.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/dt-bindings/clock/renesas,r9a09g057-cpg.h (limited to 'include') diff --git a/include/dt-bindings/clock/renesas,r9a09g057-cpg.h b/include/dt-bindings/clock/renesas,r9a09g057-cpg.h new file mode 100644 index 000000000000..541e6d719bd6 --- /dev/null +++ b/include/dt-bindings/clock/renesas,r9a09g057-cpg.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + * + * Copyright (C) 2024 Renesas Electronics Corp. + */ +#ifndef __DT_BINDINGS_CLOCK_RENESAS_R9A09G057_CPG_H__ +#define __DT_BINDINGS_CLOCK_RENESAS_R9A09G057_CPG_H__ + +#include + +/* Core Clock list */ +#define R9A09G057_SYS_0_PCLK 0 +#define R9A09G057_CA55_0_CORE_CLK0 1 +#define R9A09G057_CA55_0_CORE_CLK1 2 +#define R9A09G057_CA55_0_CORE_CLK2 3 +#define R9A09G057_CA55_0_CORE_CLK3 4 +#define R9A09G057_CA55_0_PERIPHCLK 5 +#define R9A09G057_CM33_CLK0 6 +#define R9A09G057_CST_0_SWCLKTCK 7 +#define R9A09G057_IOTOP_0_SHCLK 8 + +#endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G057_CPG_H__ */ -- cgit v1.2.3 From 4b2b0a2ce8153d65d0829e45e73bf6acdc291344 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 28 Aug 2024 17:23:57 +0300 Subject: gpiolib: legacy: Kill GPIOF_INIT_* definitions Besides the fact that (old) drivers use wrong definitions, e.g., GPIOF_INIT_HIGH instead of GPIOF_OUT_INIT_HIGH, shrink the legacy definitions by killing those GPIOF_INIT_* completely. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Acked-by: Alexander Sverdlin Reviewed-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20240828142554.2424189-2-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- include/linux/gpio.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 063f71b18a7c..4af8ad114557 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -20,12 +20,9 @@ struct device; #define GPIOF_DIR_OUT (0 << 0) #define GPIOF_DIR_IN (1 << 0) -#define GPIOF_INIT_LOW (0 << 1) -#define GPIOF_INIT_HIGH (1 << 1) - #define GPIOF_IN (GPIOF_DIR_IN) -#define GPIOF_OUT_INIT_LOW (GPIOF_DIR_OUT | GPIOF_INIT_LOW) -#define GPIOF_OUT_INIT_HIGH (GPIOF_DIR_OUT | GPIOF_INIT_HIGH) +#define GPIOF_OUT_INIT_LOW (GPIOF_DIR_OUT | (0 << 1)) +#define GPIOF_OUT_INIT_HIGH (GPIOF_DIR_OUT | (1 << 1)) /* Gpio pin is active-low */ #define GPIOF_ACTIVE_LOW (1 << 2) -- cgit v1.2.3 From 8c045ca534d03bab1ce4b4de49d29a36276a1e35 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 28 Aug 2024 17:23:58 +0300 Subject: gpiolib: legacy: Kill GPIOF_DIR_* definitions Besides the fact that (old) drivers use wrong definitions, e.g., GPIOF_DIR_IN instead of GPIOF_IN, shrink the legacy definitions by killing those GPIOF_DIR_* completely. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Acked-by: Alexander Sverdlin Reviewed-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20240828142554.2424189-3-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- include/linux/gpio.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 4af8ad114557..2d105be7bbc3 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -17,12 +17,9 @@ struct device; /* make these flag values available regardless of GPIO kconfig options */ -#define GPIOF_DIR_OUT (0 << 0) -#define GPIOF_DIR_IN (1 << 0) - -#define GPIOF_IN (GPIOF_DIR_IN) -#define GPIOF_OUT_INIT_LOW (GPIOF_DIR_OUT | (0 << 1)) -#define GPIOF_OUT_INIT_HIGH (GPIOF_DIR_OUT | (1 << 1)) +#define GPIOF_IN ((1 << 0)) +#define GPIOF_OUT_INIT_LOW ((0 << 0) | (0 << 1)) +#define GPIOF_OUT_INIT_HIGH ((0 << 0) | (1 << 1)) /* Gpio pin is active-low */ #define GPIOF_ACTIVE_LOW (1 << 2) -- cgit v1.2.3 From 60949b7b805424f21326b450ca4f1806c06d982e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Mon, 26 Aug 2024 12:16:44 +0200 Subject: ACPI: CPPC: Fix MASK_VAL() usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MASK_VAL() was added as a way to handle bit_offset and bit_width for registers located in system memory address space. However, while suited for reading, it does not work for writing and result in corrupted registers when writing values with bit_offset > 0. Moreover, when a register is collocated with another one at the same address but with a different mask, the current code results in the other registers being overwritten with 0s. The write procedure for SYSTEM_MEMORY registers should actually read the value, mask it, update it and write it with the updated value. Moreover, since registers can be located in the same word, we must take care of locking the access before doing it. We should potentially use a global lock since we don't know in if register addresses aren't shared with another _CPC package but better not encourage vendors to do so. Assume that registers can use the same word inside a _CPC package and thus, use a per _CPC package lock. Fixes: 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") Signed-off-by: Clément Léger Link: https://patch.msgid.link/20240826101648.95654-1-cleger@rivosinc.com [ rjw: Dropped redundant semicolon ] Signed-off-by: Rafael J. Wysocki --- include/acpi/cppc_acpi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 930b6afba6f4..e1720d930666 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -64,6 +64,8 @@ struct cpc_desc { int cpu_id; int write_cmd_status; int write_cmd_id; + /* Lock used for RMW operations in cpc_write() */ + spinlock_t rmw_lock; struct cpc_register_resource cpc_regs[MAX_CPC_REG_ENT]; struct acpi_psd_package domain_info; struct kobject kobj; -- cgit v1.2.3 From e220917fa50774fedb27c075df2261fd664e8ca3 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 22 Aug 2024 15:50:12 +0200 Subject: mm: split a folio in minimum folio order chunks split_folio() and split_folio_to_list() assume order 0, to support minorder for non-anonymous folios, we must expand these to check the folio mapping order and use that. Set new_order to be at least minimum folio order if it is set in split_huge_page_to_list() so that we can maintain minimum folio order requirement in the page cache. Update the debugfs write files used for testing to ensure the order is respected as well. We simply enforce the min order when a file mapping is used. Signed-off-by: Luis Chamberlain Signed-off-by: Pankaj Raghav Link: https://lore.kernel.org/r/20240902124931.506061-2-kernel@pankajraghav.com # folded fix Link: https://lore.kernel.org/r/20240822135018.1931258-5-kernel@pankajraghav.com Tested-by: David Howells Reviewed-by: Hannes Reinecke Reviewed-by: Zi Yan Signed-off-by: Christian Brauner --- include/linux/huge_mm.h | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e25d9ebfdf89..7a570e0437c9 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -96,6 +96,8 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; #define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \ (!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order))) +#define split_folio(f) split_folio_to_list(f, NULL) + #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES #define HPAGE_PMD_SHIFT PMD_SHIFT #define HPAGE_PUD_SHIFT PUD_SHIFT @@ -317,9 +319,24 @@ unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long add bool can_split_folio(struct folio *folio, int *pextra_pins); int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order); +int min_order_for_split(struct folio *folio); +int split_folio_to_list(struct folio *folio, struct list_head *list); static inline int split_huge_page(struct page *page) { - return split_huge_page_to_list_to_order(page, NULL, 0); + struct folio *folio = page_folio(page); + int ret = min_order_for_split(folio); + + if (ret < 0) + return ret; + + /* + * split_huge_page() locks the page before splitting and + * expects the same page that has been split to be locked when + * returned. split_folio(page_folio(page)) cannot be used here + * because it converts the page to folio and passes the head + * page to be split. + */ + return split_huge_page_to_list_to_order(page, NULL, ret); } void deferred_split_folio(struct folio *folio); @@ -484,6 +501,12 @@ static inline int split_huge_page(struct page *page) { return 0; } + +static inline int split_folio_to_list(struct folio *folio, struct list_head *list) +{ + return 0; +} + static inline void deferred_split_folio(struct folio *folio) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) @@ -598,7 +621,4 @@ static inline int split_folio_to_order(struct folio *folio, int new_order) return split_folio_to_list_to_order(folio, NULL, new_order); } -#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0) -#define split_folio(f) split_folio_to_order(f, 0) - #endif /* _LINUX_HUGE_MM_H */ -- cgit v1.2.3 From a0bbe77fafbc7e5eb41fbf3dc5cdb3608d8778a3 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Thu, 8 Aug 2024 09:11:00 +0200 Subject: dt-bindings: soc: fsl: cpm_qe: Add QUICC Engine (QE) TSA controller Add support for the time slot assigner (TSA) available in some PowerQUICC SoC that uses a QUICC Engine (QE) block such as MPC8321. This QE TSA is similar to the CPM TSA except that it uses UCCs (Unified Communication Controllers) instead of SCCs (Serial Communication Controllers). Also, compared against the CPM TSA, this QE TSA can handle up to 4 TDMs instead of 2 and allows to configure the logic level of sync signals. Signed-off-by: Herve Codina Reviewed-by: Rob Herring (Arm) Reviewed-by: Christophe Leroy Link: https://lore.kernel.org/r/20240808071132.149251-8-herve.codina@bootlin.com Signed-off-by: Christophe Leroy --- include/dt-bindings/soc/qe-fsl,tsa.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/dt-bindings/soc/qe-fsl,tsa.h (limited to 'include') diff --git a/include/dt-bindings/soc/qe-fsl,tsa.h b/include/dt-bindings/soc/qe-fsl,tsa.h new file mode 100644 index 000000000000..3cf3df9c0968 --- /dev/null +++ b/include/dt-bindings/soc/qe-fsl,tsa.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ + +#ifndef __DT_BINDINGS_SOC_FSL_QE_TSA_H +#define __DT_BINDINGS_SOC_FSL_QE_TSA_H + +#define FSL_QE_TSA_NU 0 +#define FSL_QE_TSA_UCC1 1 +#define FSL_QE_TSA_UCC2 2 +#define FSL_QE_TSA_UCC3 3 +#define FSL_QE_TSA_UCC4 4 +#define FSL_QE_TSA_UCC5 5 + +#endif -- cgit v1.2.3 From c6f39c7c165fce8fe1a41327da02dcca0a3cac25 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Thu, 8 Aug 2024 09:11:25 +0200 Subject: soc: fsl: qe: Add resource-managed muram allocators Introduce devm_cpm_muram_alloc() and devm_cpm_muram_alloc_fixed(), the resource-managed version of cpm_muram_alloc and cpm_muram_alloc_fixed(). These resource-managed versions simplify the user avoiding the need to call cpm_muram_free(). Indeed, the allocated area returned by these functions will be automatically freed on driver detach. Signed-off-by: Herve Codina Reviewed-by: Christophe Leroy Link: https://lore.kernel.org/r/20240808071132.149251-33-herve.codina@bootlin.com Signed-off-by: Christophe Leroy --- include/soc/fsl/qe/qe.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h index af793f2a0ec4..629835b6c71d 100644 --- a/include/soc/fsl/qe/qe.h +++ b/include/soc/fsl/qe/qe.h @@ -23,6 +23,8 @@ #include #include +struct device; + #define QE_NUM_OF_SNUM 256 /* There are 256 serial number in QE */ #define QE_NUM_OF_BRGS 16 #define QE_NUM_OF_PORTS 1024 @@ -93,8 +95,12 @@ int cpm_muram_init(void); #if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE) s32 cpm_muram_alloc(unsigned long size, unsigned long align); +s32 devm_cpm_muram_alloc(struct device *dev, unsigned long size, + unsigned long align); void cpm_muram_free(s32 offset); s32 cpm_muram_alloc_fixed(unsigned long offset, unsigned long size); +s32 devm_cpm_muram_alloc_fixed(struct device *dev, unsigned long offset, + unsigned long size); void __iomem *cpm_muram_addr(unsigned long offset); unsigned long cpm_muram_offset(const void __iomem *addr); dma_addr_t cpm_muram_dma(void __iomem *addr); @@ -106,6 +112,12 @@ static inline s32 cpm_muram_alloc(unsigned long size, return -ENOSYS; } +static inline s32 devm_cpm_muram_alloc(struct device *dev, unsigned long size, + unsigned long align) +{ + return -ENOSYS; +} + static inline void cpm_muram_free(s32 offset) { } @@ -116,6 +128,13 @@ static inline s32 cpm_muram_alloc_fixed(unsigned long offset, return -ENOSYS; } +static inline s32 devm_cpm_muram_alloc_fixed(struct device *dev, + unsigned long offset, + unsigned long size) +{ + return -ENOSYS; +} + static inline void __iomem *cpm_muram_addr(unsigned long offset) { return NULL; @@ -172,7 +191,6 @@ static inline int par_io_data_set(u8 port, u8 pin, u8 val) { return -ENOSYS; } /* * Pin multiplexing functions. */ -struct device; struct qe_pin; #ifdef CONFIG_QE_GPIO extern struct qe_pin *qe_pin_request(struct device *dev, int index); @@ -233,7 +251,9 @@ static inline int qe_alive_during_sleep(void) /* we actually use cpm_muram implementation, define this for convenience */ #define qe_muram_init cpm_muram_init #define qe_muram_alloc cpm_muram_alloc +#define devm_qe_muram_alloc devm_cpm_muram_alloc #define qe_muram_alloc_fixed cpm_muram_alloc_fixed +#define devm_qe_muram_alloc_fixed devm_cpm_muram_alloc_fixed #define qe_muram_free cpm_muram_free #define qe_muram_addr cpm_muram_addr #define qe_muram_offset cpm_muram_offset -- cgit v1.2.3 From f68cd02d51a65594341168f03f7962e9d9540726 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Thu, 8 Aug 2024 09:11:26 +0200 Subject: soc: fsl: qe: Add missing PUSHSCHED command The PUSHSCHED command is missing in the QE header file. This command is supported on MPC8321 and is used to modify the start address for the task running on a given peripheral. It is needed for the QMC in order to perform the re-initialization procedure and so, ensure the correct UCC setup in that case. Simply add the missing command in the commands list available in the QE header file. Signed-off-by: Herve Codina Reviewed-by: Christophe Leroy Link: https://lore.kernel.org/r/20240808071132.149251-34-herve.codina@bootlin.com Signed-off-by: Christophe Leroy --- include/soc/fsl/qe/qe.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h index 629835b6c71d..8f967d15e479 100644 --- a/include/soc/fsl/qe/qe.h +++ b/include/soc/fsl/qe/qe.h @@ -469,6 +469,7 @@ enum comm_dir { #define QE_QMC_STOP_TX 0x0000000c #define QE_QMC_STOP_RX 0x0000000d #define QE_SS7_SU_FIL_RESET 0x0000000e +#define QE_PUSHSCHED 0x0000000f /* jonathbr added from here down for 83xx */ #define QE_RESET_BCS 0x0000000a #define QE_MCC_INIT_TX_RX_16 0x00000003 -- cgit v1.2.3 From f46a6e16519712651e2304da03ec144cc0bb084c Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 30 Aug 2024 18:26:28 +0300 Subject: usb: Add tunnel_mode parameter to usb device structure Add 'tunnel_mode' enum to usb device structure to describe if a USB3 link is tunneled over USB4, or connected directly using native USB2/USB3 protocols. Tunneled devices depend on USB4 NHI host to maintain the tunnel. Knowledge about tunneled devices is important to ensure correct suspend and resume order between USB4 hosts and tunneled devices. i.e. make sure tunnel is up before the USB device using it resumes. USB hosts such as xHCI may have vendor specific ways to detect tunneled connections. This 'tunnel_mode' parameter can be set by USB3 host driver during hcd->driver->update_device(hcd, udev) callback. tunnel_mode can be set to: USB_LINK_UNKNOWN = 0 USB_LINK_NATIVE USB_LINK_TUNNELED USB_LINK_UNKNOWN is used in case host is not capable of detecting tunneled links. Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240830152630.3943215-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index 832997a9da0a..672d8fc2abdb 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -495,6 +495,12 @@ struct usb_dev_state; struct usb_tt; +enum usb_link_tunnel_mode { + USB_LINK_UNKNOWN = 0, + USB_LINK_NATIVE, + USB_LINK_TUNNELED, +}; + enum usb_port_connect_type { USB_PORT_CONNECT_TYPE_UNKNOWN = 0, USB_PORT_CONNECT_TYPE_HOT_PLUG, @@ -605,6 +611,7 @@ struct usb3_lpm_parameters { * WUSB devices are not, until we authorize them from user space. * FIXME -- complete doc * @authenticated: Crypto authentication passed + * @tunnel_mode: Connection native or tunneled over USB4 * @lpm_capable: device supports LPM * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM @@ -714,6 +721,7 @@ struct usb_device { unsigned do_remote_wakeup:1; unsigned reset_resume:1; unsigned port_is_suspended:1; + enum usb_link_tunnel_mode tunnel_mode; int slot_id; struct usb2_lpm_parameters l1_params; -- cgit v1.2.3 From d9c61bb33fbbcbc2d5f69efa10db116dab4b56cc Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Sun, 1 Sep 2024 21:11:16 +0200 Subject: usb: gadget: function: move u_f.h to include/linux/usb/func_utils.h We move the func_utils.h header to include/linux/usb to be able to compile function drivers outside of the drivers/usb/gadget/function directory. Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20240116-ml-topic-u9p-v12-1-9a27de5160e0@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/func_utils.h | 86 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 include/linux/usb/func_utils.h (limited to 'include') diff --git a/include/linux/usb/func_utils.h b/include/linux/usb/func_utils.h new file mode 100644 index 000000000000..c8795c965109 --- /dev/null +++ b/include/linux/usb/func_utils.h @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * func_utils.h + * + * Utility definitions for USB functions + * + * Copyright (c) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Andrzej Pietrasiewicz + */ + +#ifndef _FUNC_UTILS_H_ +#define _FUNC_UTILS_H_ + +#include +#include + +/* Variable Length Array Macros **********************************************/ +#define vla_group(groupname) size_t groupname##__next = 0 +#define vla_group_size(groupname) groupname##__next + +#define vla_item(groupname, type, name, n) \ + size_t groupname##_##name##__offset = ({ \ + size_t offset = 0; \ + if (groupname##__next != SIZE_MAX) { \ + size_t align_mask = __alignof__(type) - 1; \ + size_t size = array_size(n, sizeof(type)); \ + offset = (groupname##__next + align_mask) & \ + ~align_mask; \ + if (check_add_overflow(offset, size, \ + &groupname##__next)) { \ + groupname##__next = SIZE_MAX; \ + offset = 0; \ + } \ + } \ + offset; \ + }) + +#define vla_item_with_sz(groupname, type, name, n) \ + size_t groupname##_##name##__sz = array_size(n, sizeof(type)); \ + size_t groupname##_##name##__offset = ({ \ + size_t offset = 0; \ + if (groupname##__next != SIZE_MAX) { \ + size_t align_mask = __alignof__(type) - 1; \ + offset = (groupname##__next + align_mask) & \ + ~align_mask; \ + if (check_add_overflow(offset, groupname##_##name##__sz,\ + &groupname##__next)) { \ + groupname##__next = SIZE_MAX; \ + offset = 0; \ + } \ + } \ + offset; \ + }) + +#define vla_ptr(ptr, groupname, name) \ + ((void *) ((char *)ptr + groupname##_##name##__offset)) + +struct usb_ep; +struct usb_request; + +/** + * alloc_ep_req - returns a usb_request allocated by the gadget driver and + * allocates the request's buffer. + * + * @ep: the endpoint to allocate a usb_request + * @len: usb_requests's buffer suggested size + * + * In case @ep direction is OUT, the @len will be aligned to ep's + * wMaxPacketSize. In order to avoid memory leaks or drops, *always* use + * usb_requests's length (req->length) to refer to the allocated buffer size. + * Requests allocated via alloc_ep_req() *must* be freed by free_ep_req(). + */ +struct usb_request *alloc_ep_req(struct usb_ep *ep, size_t len); + +/* Frees a usb_request previously allocated by alloc_ep_req() */ +static inline void free_ep_req(struct usb_ep *ep, struct usb_request *req) +{ + WARN_ON(req->buf == NULL); + kfree(req->buf); + req->buf = NULL; + usb_ep_free_request(ep, req); +} + +#endif /* _FUNC_UTILS_H_ */ -- cgit v1.2.3 From eaf9b2c875ece22768b78aa38da8b232e5de021b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 28 Aug 2024 11:34:02 +0200 Subject: netfilter: nf_tables: drop unused 3rd argument from validate callback ops Since commit a654de8fdc18 ("netfilter: nf_tables: fix chain dependency validation") the validate() callback no longer needs the return pointer argument. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 +-- include/net/netfilter/nft_fib.h | 4 +--- include/net/netfilter/nft_meta.h | 3 +-- include/net/netfilter/nft_reject.h | 3 +-- 4 files changed, 4 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 1cc33d946d41..7cd60ea7cdee 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -961,8 +961,7 @@ struct nft_expr_ops { const struct nft_expr *expr, bool reset); int (*validate)(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data); + const struct nft_expr *expr); bool (*reduce)(struct nft_regs_track *track, const struct nft_expr *expr); bool (*gc)(struct net *net, diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 167640b843ef..38cae7113de4 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -21,9 +21,7 @@ nft_fib_is_loopback(const struct sk_buff *skb, const struct net_device *in) int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset); int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); -int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nft_data **data); - +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr); void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index ba1238f12a48..d602263590fe 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -41,8 +41,7 @@ void nft_meta_set_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr); int nft_meta_set_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data); + const struct nft_expr *expr); bool nft_meta_get_reduce(struct nft_regs_track *track, const struct nft_expr *expr); diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h index 6d9ba62efd75..19060212988a 100644 --- a/include/net/netfilter/nft_reject.h +++ b/include/net/netfilter/nft_reject.h @@ -15,8 +15,7 @@ struct nft_reject { extern const struct nla_policy nft_reject_policy[]; int nft_reject_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data); + const struct nft_expr *expr); int nft_reject_init(const struct nft_ctx *ctx, const struct nft_expr *expr, -- cgit v1.2.3 From 85dfb34bb7d24c4585fa6a8186c3bf207470ecd7 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 28 Aug 2024 21:30:16 +0100 Subject: netfilter: nf_tables: Correct spelling in nf_tables.h Correct spelling in nf_tables.h. As reported by codespell. Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 7cd60ea7cdee..02626ad17e99 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -687,7 +687,7 @@ void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); * @NFT_SET_EXT_TIMEOUT: element timeout * @NFT_SET_EXT_EXPIRATION: element expiration time * @NFT_SET_EXT_USERDATA: user data associated with the element - * @NFT_SET_EXT_EXPRESSIONS: expressions assiciated with the element + * @NFT_SET_EXT_EXPRESSIONS: expressions associated with the element * @NFT_SET_EXT_OBJREF: stateful object reference associated with element * @NFT_SET_EXT_NUM: number of extension types */ -- cgit v1.2.3 From c362646b6fc15009219020c443f5256190be6436 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 30 Aug 2024 18:23:00 +0100 Subject: netfilter: nf_tables: Add missing Kernel doc - Add missing documentation of struct field and enum items. - Add missing documentation of function parameter. Flagged by ./scripts/kernel-doc -none. No functional change intended. Compile tested only. Signed-off-by: Simon Horman Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ include/net/netfilter/nf_tproxy.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 02626ad17e99..c302b396e1a7 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -209,6 +209,7 @@ static inline void nft_data_copy(u32 *dst, const struct nft_data *src, * @family: protocol family * @level: depth of the chains * @report: notify via unicast netlink message + * @reg_inited: bitmap of initialised registers */ struct nft_ctx { struct net *net; @@ -313,6 +314,7 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) /** * enum nft_iter_type - nftables set iterator type * + * @NFT_ITER_UNSPEC: unspecified, to catch errors * @NFT_ITER_READ: read-only iteration over set elements * @NFT_ITER_UPDATE: iteration under mutex to update set element state */ diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h index faa108b1ba67..5adf6fda11e8 100644 --- a/include/net/netfilter/nf_tproxy.h +++ b/include/net/netfilter/nf_tproxy.h @@ -36,6 +36,7 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr); /** * nf_tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections + * @net: The network namespace. * @skb: The skb being processed. * @laddr: IPv4 address to redirect to or zero. * @lport: TCP port to redirect to or zero. -- cgit v1.2.3 From beb5a9bea8239cdf4adf6b62672e30db3e9fa5ce Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 29 Aug 2024 14:33:36 +0200 Subject: netdevice: convert private flags > BIT(31) to bitfields Make dev->priv_flags `u32` back and define bits higher than 31 as bitfield booleans as per Jakub's suggestion. This simplifies code which accesses these bits with no optimization loss (testb both before/after), allows to not extend &netdev_priv_flags each time, but also scales better as bits > 63 in the future would only add a new u64 to the structure with no complications, comparing to that extending ::priv_flags would require converting it to a bitmap. Note that I picked `unsigned long :1` to not lose any potential optimizations comparing to `bool :1` etc. Suggested-by: Jakub Kicinski Signed-off-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fce70990b209..d6f35c9d8580 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1613,7 +1613,8 @@ struct net_device_ops { * userspace; this means that the order of these flags can change * during any kernel release. * - * You should have a pretty good reason to be extending these flags. + * You should add bitfield booleans after either net_device::priv_flags + * (hotpath) or ::threaded (slowpath) instead of extending these flags. * * @IFF_802_1Q_VLAN: 802.1Q VLAN device * @IFF_EBRIDGE: Ethernet bridging device @@ -1652,10 +1653,6 @@ struct net_device_ops { * @IFF_NO_ADDRCONF: prevent ipv6 addrconf * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with * skb_headlen(skb) == 0 (data starts from frag0) - * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN - * @IFF_SEE_ALL_HWTSTAMP_REQUESTS: device wants to see calls to - * ndo_hwtstamp_set() for all timestamp requests regardless of source, - * even if those aren't HWTSTAMP_SOURCE_NETDEV. */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1690,8 +1687,6 @@ enum netdev_priv_flags { IFF_L3MDEV_RX_HANDLER = 1<<29, IFF_NO_ADDRCONF = BIT_ULL(30), IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), - IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), - IFF_SEE_ALL_HWTSTAMP_REQUESTS = BIT_ULL(33), }; /* Specifies the type of the struct net_device::ml_priv pointer */ @@ -1723,6 +1718,9 @@ enum netdev_reg_state { * data with strictly "high-level" data, and it has to know about * almost every data structure used in the INET module. * + * @priv_flags: flags invisible to userspace defined as bits, see + * enum netdev_priv_flags for the definitions + * * @name: This is the first field of the "visible" part of this structure * (i.e. as seen by users in the "Space.c" file). It is the name * of the interface. @@ -1789,8 +1787,6 @@ enum netdev_reg_state { * * @flags: Interface flags (a la BSD) * @xdp_features: XDP capability supported by the device - * @priv_flags: Like 'flags' but invisible to userspace, - * see if.h for the definitions * @gflags: Global flags ( kept as legacy ) * @priv_len: Size of the ->priv flexible array * @priv: Flexible array containing private data @@ -1964,6 +1960,12 @@ enum netdev_reg_state { * * @threaded: napi threaded mode is enabled * + * @see_all_hwtstamp_requests: device wants to see calls to + * ndo_hwtstamp_set() for all timestamp requests + * regardless of source, even if those aren't + * HWTSTAMP_SOURCE_NETDEV + * @change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN + * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. @@ -2014,7 +2016,9 @@ struct net_device { /* TX read-mostly hotpath */ __cacheline_group_begin(net_device_read_tx); - unsigned long long priv_flags; + struct_group(priv_flags_fast, + unsigned long priv_flags:32; + ); const struct net_device_ops *netdev_ops; const struct header_ops *header_ops; struct netdev_queue *_tx; @@ -2350,6 +2354,10 @@ struct net_device { bool proto_down; bool threaded; + /* priv_flags_slow, ungrouped to save space */ + unsigned long see_all_hwtstamp_requests:1; + unsigned long change_proto_down:1; + struct list_head net_notifier_list; #if IS_ENABLED(CONFIG_MACSEC) -- cgit v1.2.3 From 00d066a4d4edbe559ba6c35153da71d4b2b8a383 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 29 Aug 2024 14:33:37 +0200 Subject: netdev_features: convert NETIF_F_LLTX to dev->lltx NETIF_F_LLTX can't be changed via Ethtool and is not a feature, rather an attribute, very similar to IFF_NO_QUEUE (and hot). Free one netdev_features_t bit and make it a "hot" private flag. Signed-off-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- include/linux/netdev_features.h | 6 ++---- include/linux/netdevice.h | 10 +++++++--- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 7c2d77d75a88..a2e20b517584 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -24,8 +24,7 @@ enum { NETIF_F_HW_VLAN_CTAG_FILTER_BIT,/* Receive filtering on VLAN CTAGs */ NETIF_F_VLAN_CHALLENGED_BIT, /* Device cannot handle VLAN packets */ NETIF_F_GSO_BIT, /* Enable software GSO. */ - NETIF_F_LLTX_BIT, /* LockLess TX - deprecated. Please */ - /* do not use LLTX in new drivers */ + __UNUSED_NETIF_F_12, NETIF_F_NETNS_LOCAL_BIT, /* Does not change network namespaces */ NETIF_F_GRO_BIT, /* Generic receive offload */ NETIF_F_LRO_BIT, /* large receive offload */ @@ -120,7 +119,6 @@ enum { #define NETIF_F_HW_VLAN_CTAG_TX __NETIF_F(HW_VLAN_CTAG_TX) #define NETIF_F_IP_CSUM __NETIF_F(IP_CSUM) #define NETIF_F_IPV6_CSUM __NETIF_F(IPV6_CSUM) -#define NETIF_F_LLTX __NETIF_F(LLTX) #define NETIF_F_LOOPBACK __NETIF_F(LOOPBACK) #define NETIF_F_LRO __NETIF_F(LRO) #define NETIF_F_NETNS_LOCAL __NETIF_F(NETNS_LOCAL) @@ -193,7 +191,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ #define NETIF_F_NEVER_CHANGE (NETIF_F_VLAN_CHALLENGED | \ - NETIF_F_LLTX | NETIF_F_NETNS_LOCAL) + NETIF_F_NETNS_LOCAL) /* remember that ((t)1 << t_BITS) is undefined in C99 */ #define NETIF_F_ETHTOOL_BITS ((__NETIF_F_BIT(NETDEV_FEATURE_COUNT - 1) | \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d6f35c9d8580..e22ca5e07490 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1720,6 +1720,9 @@ enum netdev_reg_state { * * @priv_flags: flags invisible to userspace defined as bits, see * enum netdev_priv_flags for the definitions + * @lltx: device supports lockless Tx. Deprecated for real HW + * drivers. Mainly used by logical interfaces, such as + * bonding and tunnels * * @name: This is the first field of the "visible" part of this structure * (i.e. as seen by users in the "Space.c" file). It is the name @@ -2018,6 +2021,7 @@ struct net_device { __cacheline_group_begin(net_device_read_tx); struct_group(priv_flags_fast, unsigned long priv_flags:32; + unsigned long lltx:1; ); const struct net_device_ops *netdev_ops; const struct header_ops *header_ops; @@ -4433,7 +4437,7 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) } #define HARD_TX_LOCK(dev, txq, cpu) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ + if (!(dev)->lltx) { \ __netif_tx_lock(txq, cpu); \ } else { \ __netif_tx_acquire(txq); \ @@ -4441,12 +4445,12 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) } #define HARD_TX_TRYLOCK(dev, txq) \ - (((dev->features & NETIF_F_LLTX) == 0) ? \ + (!(dev)->lltx ? \ __netif_tx_trylock(txq) : \ __netif_tx_acquire(txq)) #define HARD_TX_UNLOCK(dev, txq) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ + if (!(dev)->lltx) { \ __netif_tx_unlock(txq); \ } else { \ __netif_tx_release(txq); \ -- cgit v1.2.3 From 05c1280a2bcfca187fe7fa90bb240602cf54af0a Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 29 Aug 2024 14:33:38 +0200 Subject: netdev_features: convert NETIF_F_NETNS_LOCAL to dev->netns_local "Interface can't change network namespaces" is rather an attribute, not a feature, and it can't be changed via Ethtool. Make it a "cold" private flag instead of a netdev_feature and free one more bit. Signed-off-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- include/linux/netdev_features.h | 6 ++---- include/linux/netdevice.h | 2 ++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index a2e20b517584..d5a3836f4793 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -25,7 +25,7 @@ enum { NETIF_F_VLAN_CHALLENGED_BIT, /* Device cannot handle VLAN packets */ NETIF_F_GSO_BIT, /* Enable software GSO. */ __UNUSED_NETIF_F_12, - NETIF_F_NETNS_LOCAL_BIT, /* Does not change network namespaces */ + __UNUSED_NETIF_F_13, NETIF_F_GRO_BIT, /* Generic receive offload */ NETIF_F_LRO_BIT, /* large receive offload */ @@ -121,7 +121,6 @@ enum { #define NETIF_F_IPV6_CSUM __NETIF_F(IPV6_CSUM) #define NETIF_F_LOOPBACK __NETIF_F(LOOPBACK) #define NETIF_F_LRO __NETIF_F(LRO) -#define NETIF_F_NETNS_LOCAL __NETIF_F(NETNS_LOCAL) #define NETIF_F_NOCACHE_COPY __NETIF_F(NOCACHE_COPY) #define NETIF_F_NTUPLE __NETIF_F(NTUPLE) #define NETIF_F_RXCSUM __NETIF_F(RXCSUM) @@ -190,8 +189,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ -#define NETIF_F_NEVER_CHANGE (NETIF_F_VLAN_CHALLENGED | \ - NETIF_F_NETNS_LOCAL) +#define NETIF_F_NEVER_CHANGE NETIF_F_VLAN_CHALLENGED /* remember that ((t)1 << t_BITS) is undefined in C99 */ #define NETIF_F_ETHTOOL_BITS ((__NETIF_F_BIT(NETDEV_FEATURE_COUNT - 1) | \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e22ca5e07490..a698e2402420 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1968,6 +1968,7 @@ enum netdev_reg_state { * regardless of source, even if those aren't * HWTSTAMP_SOURCE_NETDEV * @change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN + * @netns_local: interface can't change network namespaces * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved @@ -2361,6 +2362,7 @@ struct net_device { /* priv_flags_slow, ungrouped to save space */ unsigned long see_all_hwtstamp_requests:1; unsigned long change_proto_down:1; + unsigned long netns_local:1; struct list_head net_notifier_list; -- cgit v1.2.3 From 782dbbf589cd9082effaec522e3f1b4ce1594803 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 29 Aug 2024 14:33:39 +0200 Subject: netdev_features: convert NETIF_F_FCOE_MTU to dev->fcoe_mtu Ability to handle maximum FCoE frames of 2158 bytes can never be changed and thus more of an attribute, not a toggleable feature. Move it from netdev_features_t to "cold" priv flags (bitfield bool) and free yet another feature bit. Signed-off-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- include/linux/netdev_features.h | 6 ++---- include/linux/netdevice.h | 2 ++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index d5a3836f4793..37af2c6e7caf 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -58,7 +58,7 @@ enum { NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ - NETIF_F_FCOE_MTU_BIT, /* Supports max FCoE MTU, 2158 bytes*/ + __UNUSED_NETIF_F_37, NETIF_F_NTUPLE_BIT, /* N-tuple filters supported */ NETIF_F_RXHASH_BIT, /* Receive hashing offload */ NETIF_F_RXCSUM_BIT, /* Receive checksumming offload */ @@ -105,7 +105,6 @@ enum { #define __NETIF_F(name) __NETIF_F_BIT(NETIF_F_##name##_BIT) #define NETIF_F_FCOE_CRC __NETIF_F(FCOE_CRC) -#define NETIF_F_FCOE_MTU __NETIF_F(FCOE_MTU) #define NETIF_F_FRAGLIST __NETIF_F(FRAGLIST) #define NETIF_F_FSO __NETIF_F(FSO) #define NETIF_F_GRO __NETIF_F(GRO) @@ -210,8 +209,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) #define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | \ NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID) -#define NETIF_F_ALL_FCOE (NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \ - NETIF_F_FSO) +#define NETIF_F_ALL_FCOE (NETIF_F_FCOE_CRC | NETIF_F_FSO) /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP | \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a698e2402420..ca5f0dda733b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1969,6 +1969,7 @@ enum netdev_reg_state { * HWTSTAMP_SOURCE_NETDEV * @change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN * @netns_local: interface can't change network namespaces + * @fcoe_mtu: device supports maximum FCoE MTU, 2158 bytes * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved @@ -2363,6 +2364,7 @@ struct net_device { unsigned long see_all_hwtstamp_requests:1; unsigned long change_proto_down:1; unsigned long netns_local:1; + unsigned long fcoe_mtu:1; struct list_head net_notifier_list; -- cgit v1.2.3 From a61fec1c87be682b5c0fdba6074649c469c675a3 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 29 Aug 2024 14:33:40 +0200 Subject: netdev_features: remove NETIF_F_ALL_FCOE NETIF_F_ALL_FCOE is used only in vlan_dev.c, 2 times. Now that it's only 2 bits, open-code it and remove the definition from netdev_features.h. Suggested-by: Jakub Kicinski Signed-off-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- include/linux/netdev_features.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 37af2c6e7caf..66e7d26b70a4 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -209,8 +209,6 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) #define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | \ NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID) -#define NETIF_F_ALL_FCOE (NETIF_F_FCOE_CRC | NETIF_F_FSO) - /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP | \ NETIF_F_GSO_UDP_L4 | NETIF_F_GSO_FRAGLIST) -- cgit v1.2.3 From 2c9ffe872e64f5aec12f2ff6cec1b7542569a88f Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 3 Sep 2024 09:21:44 +0100 Subject: wifi: cfg80211: wext: Update spelling and grammar Correct spelling in iw_handler.h. As reported by codespell. Also, while the "few shortcomings" line is being updated, correct its grammar. Signed-off-by: Simon Horman Link: https://patch.msgid.link/20240903-wifi-spell-v2-1-bfcf7062face@kernel.org Signed-off-by: Johannes Berg --- include/net/iw_handler.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index b2cf243ebe44..7af1082ea9a0 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -23,7 +23,7 @@ * to handle wireless statistics. * * The initial APIs served us well and has proven a reasonably good design. - * However, there is a few shortcommings : + * However, there are a few shortcomings : * o No events, everything is a request to the driver. * o Large ioctl function in driver with gigantic switch statement * (i.e. spaghetti code). @@ -38,13 +38,13 @@ * ------------------------------- * The new driver API is just a bunch of standard functions (handlers), * each handling a specific Wireless Extension. The driver just export - * the list of handler it supports, and those will be called apropriately. + * the list of handler it supports, and those will be called appropriately. * * I tried to keep the main advantage of the previous API (simplicity, * efficiency and light weight), and also I provide a good dose of backward * compatibility (most structures are the same, driver can use both API * simultaneously, ...). - * Hopefully, I've also addressed the shortcomming of the initial API. + * Hopefully, I've also addressed the shortcoming of the initial API. * * The advantage of the new API are : * o Handling of Extensions in driver broken in small contained functions @@ -84,7 +84,7 @@ /* ---------------------- THE IMPLEMENTATION ---------------------- */ /* - * Some of the choice I've made are pretty controversials. Defining an + * Some of the choice I've made are pretty controversial. Defining an * API is very much weighting compromises. This goes into some of the * details and the thinking behind the implementation. * @@ -140,7 +140,7 @@ * example to distinguish setting max rate and basic rate), I would * break the prototype. Using iwreq_data is more flexible. * 3) Also, the above form is not generic (see above). - * 4) I don't expect driver developper using the wrong field of the + * 4) I don't expect driver developer using the wrong field of the * union (Doh !), so static typechecking doesn't add much value. * 5) Lastly, you can skip the union by doing : * static int mydriver_ioctl_setrate(struct net_device *dev, @@ -459,7 +459,7 @@ int iw_handler_get_thrspy(struct net_device *dev, struct iw_request_info *info, void wireless_spy_update(struct net_device *dev, unsigned char *address, struct iw_quality *wstats); -/************************* INLINE FUNTIONS *************************/ +/************************* INLINE FUNCTIONS *************************/ /* * Function that are so simple that it's more efficient inlining them */ -- cgit v1.2.3 From b45ed06f46737f8c2ee65698f4305409f2386674 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Tue, 13 Aug 2024 22:19:32 +0800 Subject: drivers/base: Introduce device_match_t for device finding APIs There are several drivers/base APIs for finding a specific device, and they currently use the following good type for the @match parameter: int (*match)(struct device *dev, const void *data) Since these operations do not modify the caller-provided @*data, this type is worthy of a dedicated typedef: typedef int (*device_match_t)(struct device *dev, const void *data) Advantages of using device_match_t: - Shorter API declarations and definitions - Prevent further APIs from using a bad type for @match So introduce device_match_t and apply it to the existing (bus|class|driver|auxiliary)_find_device() APIs. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240813-dev_match_api-v3-1-6c6878a99b9f@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 2 +- include/linux/device/bus.h | 6 ++++-- include/linux/device/class.h | 2 +- include/linux/device/driver.h | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 662b8ae54b6a..31762324bcc9 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -271,6 +271,6 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv); struct auxiliary_device *auxiliary_find_device(struct device *start, const void *data, - int (*match)(struct device *dev, const void *data)); + device_match_t match); #endif /* _AUXILIARY_BUS_H_ */ diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 807831d6bf0f..cdc4757217f9 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -126,6 +126,9 @@ struct bus_attribute { int __must_check bus_create_file(const struct bus_type *bus, struct bus_attribute *attr); void bus_remove_file(const struct bus_type *bus, struct bus_attribute *attr); +/* Matching function type for drivers/base APIs to find a specific device */ +typedef int (*device_match_t)(struct device *dev, const void *data); + /* Generic device matching functions that all busses can use to match with */ int device_match_name(struct device *dev, const void *name); int device_match_of_node(struct device *dev, const void *np); @@ -139,8 +142,7 @@ int device_match_any(struct device *dev, const void *unused); int bus_for_each_dev(const struct bus_type *bus, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); struct device *bus_find_device(const struct bus_type *bus, struct device *start, - const void *data, - int (*match)(struct device *dev, const void *data)); + const void *data, device_match_t match); /** * bus_find_device_by_name - device iterator for locating a particular device * of a specific name. diff --git a/include/linux/device/class.h b/include/linux/device/class.h index c576b49c55c2..518c9c83d64b 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -95,7 +95,7 @@ void class_dev_iter_exit(struct class_dev_iter *iter); int class_for_each_device(const struct class *class, const struct device *start, void *data, int (*fn)(struct device *dev, void *data)); struct device *class_find_device(const struct class *class, const struct device *start, - const void *data, int (*match)(struct device *, const void *)); + const void *data, device_match_t match); /** * class_find_device_by_name - device iterator for locating a particular device diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 1fc8b68786de..5c04b8e3833b 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -157,7 +157,7 @@ int __must_check driver_for_each_device(struct device_driver *drv, struct device void *data, int (*fn)(struct device *dev, void *)); struct device *driver_find_device(const struct device_driver *drv, struct device *start, const void *data, - int (*match)(struct device *dev, const void *data)); + device_match_t match); /** * driver_find_device_by_name - device iterator for locating a particular device -- cgit v1.2.3 From 24e041e1e48d06f25a12caaf73728a4ec2e511fe Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 23 Aug 2024 15:55:44 +0800 Subject: platform: Make platform_bus_type constant Since commit d492cc2573a0 ("driver core: device.h: make struct bus_type a const *"), the driver core can properly handle constant struct bus_type, move the platform_bus_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Kunwu Chan Link: https://lore.kernel.org/r/20240823075544.144426-1-kunwu.chan@linux.dev Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index d422db6eec63..7132623e4658 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -52,7 +52,7 @@ struct platform_device { extern int platform_device_register(struct platform_device *); extern void platform_device_unregister(struct platform_device *); -extern struct bus_type platform_bus_type; +extern const struct bus_type platform_bus_type; extern struct device platform_bus; extern struct resource *platform_get_resource(struct platform_device *, -- cgit v1.2.3 From 8064952c65045f05ee2671fe437770e50c151776 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Thu, 22 Aug 2024 15:28:04 -0500 Subject: driver core: shut down devices asynchronously Add code to allow asynchronous shutdown of devices, ensuring that each device is shut down before its parents & suppliers. Only devices with drivers that have async_shutdown_enable enabled will be shut down asynchronously. This can dramatically reduce system shutdown/reboot time on systems that have multiple devices that take many seconds to shut down (like certain NVMe drives). On one system tested, the shutdown time went from 11 minutes without this patch to 55 seconds with the patch. Signed-off-by: Stuart Hayes Signed-off-by: David Jeffery Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Keith Busch Tested-by: Keith Busch Link: https://lore.kernel.org/r/20240822202805.6379-4-stuart.w.hayes@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 5c04b8e3833b..14c9211b82d6 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -56,6 +56,7 @@ enum probe_type { * @mod_name: Used for built-in modules. * @suppress_bind_attrs: Disables bind/unbind via sysfs. * @probe_type: Type of the probe (synchronous or asynchronous) to use. + * @async_shutdown_enable: Enables devices to be shutdown asynchronously. * @of_match_table: The open firmware table. * @acpi_match_table: The ACPI match table. * @probe: Called to query the existence of a specific device, @@ -102,6 +103,7 @@ struct device_driver { bool suppress_bind_attrs; /* disables bind/unbind via sysfs */ enum probe_type probe_type; + bool async_shutdown_enable; const struct of_device_id *of_match_table; const struct acpi_device_id *acpi_match_table; -- cgit v1.2.3 From 8ab0f4605d5ce3c0d386b3828b07719f1e8e0505 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 23 Aug 2024 14:24:40 +0800 Subject: bus: fsl-mc: make fsl_mc_bus_type const Since commit d492cc2573a0 ("driver core: device.h: make struct bus_type a const *"), the driver core can properly handle constant struct bus_type, move the fsl_mc_bus_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Kunwu Chan Acked-by: Christophe Leroy # for Link: https://lore.kernel.org/r/20240823062440.113628-1-kunwu.chan@linux.dev Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/mc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 083c860fd28e..c90ec889bfc2 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -436,7 +436,7 @@ void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev); struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, u16 if_id); -extern struct bus_type fsl_mc_bus_type; +extern const struct bus_type fsl_mc_bus_type; extern struct device_type fsl_mc_bus_dprc_type; extern struct device_type fsl_mc_bus_dpni_type; -- cgit v1.2.3 From 4c0a6a0ac902dbf184ae7be1f1fce225cc0b7380 Mon Sep 17 00:00:00 2001 From: Chanwoo Lee Date: Thu, 29 Aug 2024 11:47:09 +0900 Subject: mmc: core: Replace the argument of mmc_sd_switch() with defines Replace with already defined values for readability. While at it, let's also change the mode-parameter from an int to bool, as the only used values are 0 or 1. Signed-off-by: Chanwoo Lee Link: https://lore.kernel.org/r/20240829024709.402285-1-cw9316.lee@samsung.com Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index ac01cd1622ef..6a31ed02d3ff 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -648,7 +648,8 @@ static inline void mmc_debugfs_err_stats_inc(struct mmc_host *host, host->err_stats[stat] += 1; } -int mmc_sd_switch(struct mmc_card *card, int mode, int group, u8 value, u8 *resp); +int mmc_sd_switch(struct mmc_card *card, bool mode, int group, + u8 value, u8 *resp); int mmc_send_status(struct mmc_card *card, u32 *status); int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode); -- cgit v1.2.3 From 7df7c204c678e24cd32d33360538670b7b90e330 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Thu, 22 Aug 2024 15:50:18 +0200 Subject: xfs: enable block size larger than page size support Page cache now has the ability to have a minimum order when allocating a folio which is a prerequisite to add support for block size > page size. Signed-off-by: Pankaj Raghav Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20240827-xfs-fix-wformat-bs-gt-ps-v1-1-aec6717609e0@kernel.org # fix folded Link: https://lore.kernel.org/r/20240822135018.1931258-11-kernel@pankajraghav.com Reviewed-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Christian Brauner --- include/linux/pagemap.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4cc170949e9c..55b254d951da 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -374,6 +374,19 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) #define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1) #define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER) +/* + * mapping_max_folio_size_supported() - Check the max folio size supported + * + * The filesystem should call this function at mount time if there is a + * requirement on the folio mapping size in the page cache. + */ +static inline size_t mapping_max_folio_size_supported(void) +{ + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + return 1U << (PAGE_SHIFT + MAX_PAGECACHE_ORDER); + return PAGE_SIZE; +} + /* * mapping_set_folio_order_range() - Set the orders supported by a file. * @mapping: The address space of the file. -- cgit v1.2.3 From 31754ea6cbbc08d5bbe1fa290320c3048d8d98a3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 27 Aug 2024 06:51:36 -0400 Subject: iomap: add a private argument for iomap_file_buffered_write In order to switch fuse over to using iomap for buffered writes we need to be able to have the struct file for the original write, in case we have to read in the page to make it uptodate. Handle this by using the existing private field in the iomap_iter, and add the argument to iomap_file_buffered_write. This will allow us to pass the file in through the iomap buffered write path, and is flexible for any other file systems needs. Signed-off-by: Josef Bacik Link: https://lore.kernel.org/r/7f55c7c32275004ba00cddf862d970e6e633f750.1724755651.git.josef@toxicpanda.com Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- include/linux/iomap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 6fc1c858013d..f792b37f7627 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -257,7 +257,7 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i) } ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, - const struct iomap_ops *ops); + const struct iomap_ops *ops, void *private); int iomap_file_buffered_write_punch_delalloc(struct inode *inode, struct iomap *iomap, loff_t pos, loff_t length, ssize_t written, int (*punch)(struct inode *inode, loff_t pos, loff_t length)); -- cgit v1.2.3 From 6f634eb080161baa4811a39f5ec07923ede092bc Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Tue, 27 Aug 2024 10:42:07 +0200 Subject: filemap: fix htmldoc warning for mapping_align_index() Stephen reported that there is a kernel build warning due to a missing description of a parameter in mapping_align_index(). Add the missing index parameter in the comment description. Signed-off-by: Pankaj Raghav Link: https://lore.kernel.org/r/20240827084206.106347-2-kernel@pankajraghav.com Fixes: ab95d23bab22 ("filemap: allocate mapping_min_order folios in the page cache") Reported-by: Stephen Rothwell Signed-off-by: Christian Brauner --- include/linux/pagemap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 55b254d951da..6c0dde447c98 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -470,6 +470,7 @@ mapping_min_folio_nrpages(struct address_space *mapping) /** * mapping_align_index() - Align index for this mapping. * @mapping: The address_space. + * @index: The page index. * * The index of a folio must be naturally aligned. If you are adding a * new folio to the page cache and need to know what index to give it, -- cgit v1.2.3 From 4686cc598f669dea1b50dde1568e6c65c355bc67 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Aug 2024 00:25:51 +0200 Subject: sched: Clean up DL server vs core sched Abide by the simple rule: pick_next_task() := pick_task() + set_next_task(.first = true) This allows us to trivially get rid of server_pick_next() and things collapse nicely. Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240813224015.837303391@infradead.org --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3709dedbab59..57cf27a3045c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -692,7 +692,6 @@ struct sched_dl_entity { */ struct rq *rq; dl_server_has_tasks_f server_has_tasks; - dl_server_pick_f server_pick_next; dl_server_pick_f server_pick_task; #ifdef CONFIG_RT_MUTEXES -- cgit v1.2.3 From 73d3c04b710f0c144ce873dfe4f173a55c749539 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 3 Sep 2024 01:08:58 +0200 Subject: netfilter: nf_tables: annotate data-races around element expiration element expiration can be read-write locklessly, it can be written by dynset and read from netlink dump, add annotation. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index c302b396e1a7..1528af3fe26f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -835,7 +835,7 @@ static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext, u64 tstamp) { return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && - time_after_eq64(tstamp, *nft_set_ext_expiration(ext)); + time_after_eq64(tstamp, READ_ONCE(*nft_set_ext_expiration(ext))); } static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) -- cgit v1.2.3 From 4c5daea9af4fce6628b8ca9e6a332529bbf26809 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 3 Sep 2024 01:09:17 +0200 Subject: netfilter: nf_tables: consolidate timeout extension for elements Expiration and timeout are stored in separated set element extensions, but they are tightly coupled. Consolidate them in a single extension to simplify and prepare for set element updates. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 1528af3fe26f..1e9b5e1659a1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -687,7 +687,6 @@ void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); * @NFT_SET_EXT_DATA: mapping data * @NFT_SET_EXT_FLAGS: element flags * @NFT_SET_EXT_TIMEOUT: element timeout - * @NFT_SET_EXT_EXPIRATION: element expiration time * @NFT_SET_EXT_USERDATA: user data associated with the element * @NFT_SET_EXT_EXPRESSIONS: expressions associated with the element * @NFT_SET_EXT_OBJREF: stateful object reference associated with element @@ -699,7 +698,6 @@ enum nft_set_extensions { NFT_SET_EXT_DATA, NFT_SET_EXT_FLAGS, NFT_SET_EXT_TIMEOUT, - NFT_SET_EXT_EXPIRATION, NFT_SET_EXT_USERDATA, NFT_SET_EXT_EXPRESSIONS, NFT_SET_EXT_OBJREF, @@ -811,14 +809,14 @@ static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext) return nft_set_ext(ext, NFT_SET_EXT_FLAGS); } -static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext) -{ - return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT); -} +struct nft_timeout { + u64 timeout; + u64 expiration; +}; -static inline u64 *nft_set_ext_expiration(const struct nft_set_ext *ext) +static inline struct nft_timeout *nft_set_ext_timeout(const struct nft_set_ext *ext) { - return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION); + return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT); } static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext *ext) @@ -834,8 +832,8 @@ static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ex static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext, u64 tstamp) { - return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && - time_after_eq64(tstamp, READ_ONCE(*nft_set_ext_expiration(ext))); + return nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && + time_after_eq64(tstamp, READ_ONCE(nft_set_ext_timeout(ext)->expiration)); } static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) -- cgit v1.2.3 From 8bfb74ae12fa4cd3c9b49bb5913610b5709bffd7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 3 Sep 2024 01:09:27 +0200 Subject: netfilter: nf_tables: zero timeout means element never times out This patch uses zero as timeout marker for those elements that never expire when the element is created. If userspace provides no timeout for an element, then the default set timeout applies. However, if no default set timeout is specified and timeout flag is set on, then timeout extension is allocated and timeout is set to zero to allow for future updates. Use of zero a never timeout marker has been suggested by Phil Sutter. Note that, in older kernels, it is already possible to define elements that never expire by declaring a set with the set timeout flag set on and no global set timeout, in this case, new element with no explicit timeout never expire do not allocate the timeout extension, hence, they never expire. This approach makes it complicated to accomodate element timeout update, because element extensions do not support reallocations. Therefore, allocate the timeout extension and use the new marker for this case, but do not expose it to userspace to retain backward compatibility in the set listing. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 7 +++++-- include/uapi/linux/netfilter/nf_tables.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 1e9b5e1659a1..7511918dce6f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -832,8 +832,11 @@ static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ex static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext, u64 tstamp) { - return nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && - time_after_eq64(tstamp, READ_ONCE(nft_set_ext_timeout(ext)->expiration)); + if (!nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) || + nft_set_ext_timeout(ext)->timeout == 0) + return false; + + return time_after_eq64(tstamp, READ_ONCE(nft_set_ext_timeout(ext)->expiration)); } static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 639894ed1b97..d6476ca5d7a6 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -436,7 +436,7 @@ enum nft_set_elem_flags { * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data) * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes) * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32) - * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64) + * @NFTA_SET_ELEM_TIMEOUT: timeout value, zero means never times out (NLA_U64) * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64) * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY) * @NFTA_SET_ELEM_EXPR: expression (NLA_NESTED: nft_expr_attributes) -- cgit v1.2.3 From 4201f3938914d8df3c761754b9726770c4225d66 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 3 Sep 2024 01:09:58 +0200 Subject: netfilter: nf_tables: set element timeout update support Store new timeout and expiration in transaction object, use them to update elements from .commit path. Otherwise, discard update if .abort path is exercised. Use update_flags in the transaction to note whether the timeout, expiration, or both need to be updated. Annotate access to timeout extension now that it can be updated while lockless read access is possible. Reject timeout updates on elements with no timeout extension. Element transaction remains in the 96 bytes kmalloc slab on x86_64 after this update. This patch requires ("netfilter: nf_tables: use timestamp to check for set element timeout") to make sure an element does not expire while transaction is ongoing. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 7511918dce6f..49708e7e1339 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -833,7 +833,7 @@ static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext, u64 tstamp) { if (!nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) || - nft_set_ext_timeout(ext)->timeout == 0) + READ_ONCE(nft_set_ext_timeout(ext)->timeout) == 0) return false; return time_after_eq64(tstamp, READ_ONCE(nft_set_ext_timeout(ext)->expiration)); @@ -1749,10 +1749,18 @@ struct nft_trans_table { #define nft_trans_table_update(trans) \ nft_trans_container_table(trans)->update +enum nft_trans_elem_flags { + NFT_TRANS_UPD_TIMEOUT = (1 << 0), + NFT_TRANS_UPD_EXPIRATION = (1 << 1), +}; + struct nft_trans_elem { struct nft_trans nft_trans; struct nft_set *set; struct nft_elem_priv *elem_priv; + u64 timeout; + u64 expiration; + u8 update_flags; bool bound; }; @@ -1762,6 +1770,12 @@ struct nft_trans_elem { nft_trans_container_elem(trans)->set #define nft_trans_elem_priv(trans) \ nft_trans_container_elem(trans)->elem_priv +#define nft_trans_elem_update_flags(trans) \ + nft_trans_container_elem(trans)->update_flags +#define nft_trans_elem_timeout(trans) \ + nft_trans_container_elem(trans)->timeout +#define nft_trans_elem_expiration(trans) \ + nft_trans_container_elem(trans)->expiration #define nft_trans_elem_set_bound(trans) \ nft_trans_container_elem(trans)->bound -- cgit v1.2.3 From 631598c419985327110135efb696d9f9fe67bffd Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 14 Aug 2024 11:26:29 +0200 Subject: iio: dac: ad5449: drop support for platform data There are no longer any users of the platform data struct. Remove support for it from the driver. Signed-off-by: Bartosz Golaszewski Link: https://patch.msgid.link/20240814092629.9862-1-brgl@bgdev.pl Signed-off-by: Jonathan Cameron --- include/linux/platform_data/ad5449.h | 39 ------------------------------------ 1 file changed, 39 deletions(-) delete mode 100644 include/linux/platform_data/ad5449.h (limited to 'include') diff --git a/include/linux/platform_data/ad5449.h b/include/linux/platform_data/ad5449.h deleted file mode 100644 index d687ef5726c2..000000000000 --- a/include/linux/platform_data/ad5449.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * AD5415, AD5426, AD5429, AD5432, AD5439, AD5443, AD5449 Digital to Analog - * Converter driver. - * - * Copyright 2012 Analog Devices Inc. - * Author: Lars-Peter Clausen - */ - -#ifndef __LINUX_PLATFORM_DATA_AD5449_H__ -#define __LINUX_PLATFORM_DATA_AD5449_H__ - -/** - * enum ad5449_sdo_mode - AD5449 SDO pin configuration - * @AD5449_SDO_DRIVE_FULL: Drive the SDO pin with full strength. - * @AD5449_SDO_DRIVE_WEAK: Drive the SDO pin with not full strength. - * @AD5449_SDO_OPEN_DRAIN: Operate the SDO pin in open-drain mode. - * @AD5449_SDO_DISABLED: Disable the SDO pin, in this mode it is not possible to - * read back from the device. - */ -enum ad5449_sdo_mode { - AD5449_SDO_DRIVE_FULL = 0x0, - AD5449_SDO_DRIVE_WEAK = 0x1, - AD5449_SDO_OPEN_DRAIN = 0x2, - AD5449_SDO_DISABLED = 0x3, -}; - -/** - * struct ad5449_platform_data - Platform data for the ad5449 DAC driver - * @sdo_mode: SDO pin mode - * @hardware_clear_to_midscale: Whether asserting the hardware CLR pin sets the - * outputs to midscale (true) or to zero scale(false). - */ -struct ad5449_platform_data { - enum ad5449_sdo_mode sdo_mode; - bool hardware_clear_to_midscale; -}; - -#endif -- cgit v1.2.3 From 33f339a1ba54e56bba57ee9a77c71e385ab4825c Mon Sep 17 00:00:00 2001 From: Tze-nan Wu Date: Fri, 30 Aug 2024 16:25:17 +0800 Subject: bpf, net: Fix a potential race in do_sock_getsockopt() There's a potential race when `cgroup_bpf_enabled(CGROUP_GETSOCKOPT)` is false during the execution of `BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN`, but becomes true when `BPF_CGROUP_RUN_PROG_GETSOCKOPT` is called. This inconsistency can lead to `BPF_CGROUP_RUN_PROG_GETSOCKOPT` receiving an "-EFAULT" from `__cgroup_bpf_run_filter_getsockopt(max_optlen=0)`. Scenario shown as below: `process A` `process B` ----------- ------------ BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN enable CGROUP_GETSOCKOPT BPF_CGROUP_RUN_PROG_GETSOCKOPT (-EFAULT) To resolve this, remove the `BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN` macro and directly uses `copy_from_sockptr` to ensure that `max_optlen` is always set before `BPF_CGROUP_RUN_PROG_GETSOCKOPT` is invoked. Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Co-developed-by: Yanghui Li Signed-off-by: Yanghui Li Co-developed-by: Cheng-Jui Wang Signed-off-by: Cheng-Jui Wang Signed-off-by: Tze-nan Wu Acked-by: Stanislav Fomichev Acked-by: Alexei Starovoitov Link: https://patch.msgid.link/20240830082518.23243-1-Tze-nan.Wu@mediatek.com Signed-off-by: Jakub Kicinski --- include/linux/bpf-cgroup.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index fb3c3e7181e6..ce91d9b2acb9 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -390,14 +390,6 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, __ret; \ }) -#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \ -({ \ - int __ret = 0; \ - if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \ - copy_from_sockptr(&__ret, optlen, sizeof(int)); \ - __ret; \ -}) - #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \ max_optlen, retval) \ ({ \ @@ -518,7 +510,6 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) -#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ optlen, max_optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ -- cgit v1.2.3 From 7cb9b5fa218caa899f4288865c4598357bcce4e9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 3 Sep 2024 15:38:25 -0500 Subject: PCI: endpoint: Fix enum pci_epc_bar_type kerneldoc e01c9797c0eb ("PCI: endpoint: Clean up hardware description for BARs") added enum pci_epc_bar_type with incomplete kerneldoc. Add the missing piece. Signed-off-by: Bjorn Helgaas --- include/linux/pci-epc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 85bdf2adb760..697cc190747d 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -149,6 +149,7 @@ struct pci_epc { }; /** + * enum pci_epc_bar_type - configurability of endpoint BAR * @BAR_PROGRAMMABLE: The BAR mask can be configured by the EPC. * @BAR_FIXED: The BAR mask is fixed by the hardware. * @BAR_RESERVED: The BAR should not be touched by an EPF driver. -- cgit v1.2.3 From 364ea7ccaef917a3068236a19a4b31a0623b561a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 31 Aug 2024 16:20:39 +0200 Subject: power: supply: Change usb_types from an array into a bitmask The bit_types array just hold a list of valid enum power_supply_usb_type values which map to 0 - 9. This can easily be represented as a bitmap. This reduces the size of struct power_supply_desc and further reduces the data section size by drivers no longer needing to store the array. This also unifies how usb_types are handled with charge_behaviours, which allows power_supply_show_usb_type() to be removed. Signed-off-by: Hans de Goede Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240831142039.28830-7-hdegoede@redhat.com Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 72dc7e45c90c..910d407ebe63 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -243,8 +243,7 @@ struct power_supply_desc { const char *name; enum power_supply_type type; u8 charge_behaviours; - const enum power_supply_usb_type *usb_types; - size_t num_usb_types; + u32 usb_types; const enum power_supply_property *properties; size_t num_properties; -- cgit v1.2.3 From 8f62819aaace77dd85037ae766eb767f8c4417ce Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Aug 2024 11:33:23 +0200 Subject: PCI/pwrctl: Rescan bus on a separate thread If we trigger the bus rescan from sysfs, we'll try to lock the PCI rescan mutex recursively and deadlock - the platform device will be populated and probed on the same thread that handles the sysfs write. Add a workqueue to the pwrctl code on which we schedule the rescan for controlled PCI devices. While at it: add a new interface for initializing the pwrctl context where we'd now assign the parent device address and initialize the workqueue. Link: https://lore.kernel.org/r/20240823093323.33450-3-brgl@bgdev.pl Fixes: 4565d2652a37 ("PCI/pwrctl: Add PCI power control core code") Reported-by: Konrad Dybcio Signed-off-by: Bartosz Golaszewski Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- include/linux/pci-pwrctl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pci-pwrctl.h b/include/linux/pci-pwrctl.h index 45e9cfe740e4..0d23dddf59ec 100644 --- a/include/linux/pci-pwrctl.h +++ b/include/linux/pci-pwrctl.h @@ -7,6 +7,7 @@ #define __PCI_PWRCTL_H__ #include +#include struct device; struct device_link; @@ -41,8 +42,10 @@ struct pci_pwrctl { /* Private: don't use. */ struct notifier_block nb; struct device_link *link; + struct work_struct work; }; +void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev); int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl); void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl); int devm_pci_pwrctl_device_set_ready(struct device *dev, -- cgit v1.2.3 From de6c85bf918ea52d5c680f0d130b37ee2ff152d6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Aug 2024 09:02:47 +0300 Subject: dma-mapping: clearly mark DMA ops as an architecture feature DMA ops are a helper for architectures and not for drivers to override the DMA implementation. Unfortunately driver authors keep ignoring this. Make the fact more clear by renaming the symbol to ARCH_HAS_DMA_OPS and having the two drivers overriding their dma_ops depend on that. These drivers should probably be marked broken, but we can give them a bit of a grace period for that. Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Acked-by: Sakari Ailus # for IPU6 Acked-by: Robin Murphy --- include/linux/device.h | 2 +- include/linux/dma-map-ops.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 1c5280d28bc3..b4bde8d22697 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -750,7 +750,7 @@ struct device { struct dev_pin_info *pins; #endif struct dev_msi_info msi; -#ifdef CONFIG_DMA_OPS +#ifdef CONFIG_ARCH_HAS_DMA_OPS const struct dma_map_ops *dma_ops; #endif u64 *dma_mask; /* dma mask (if dma'able device) */ diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 077b15c93bb8..9668ddf3696e 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -75,7 +75,7 @@ struct dma_map_ops { unsigned long (*get_merge_boundary)(struct device *dev); }; -#ifdef CONFIG_DMA_OPS +#ifdef CONFIG_ARCH_HAS_DMA_OPS #include static inline const struct dma_map_ops *get_dma_ops(struct device *dev) @@ -90,7 +90,7 @@ static inline void set_dma_ops(struct device *dev, { dev->dma_ops = dma_ops; } -#else /* CONFIG_DMA_OPS */ +#else /* CONFIG_ARCH_HAS_DMA_OPS */ static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return NULL; @@ -99,7 +99,7 @@ static inline void set_dma_ops(struct device *dev, const struct dma_map_ops *dma_ops) { } -#endif /* CONFIG_DMA_OPS */ +#endif /* CONFIG_ARCH_HAS_DMA_OPS */ #ifdef CONFIG_DMA_CMA extern struct cma *dma_contiguous_default_area; -- cgit v1.2.3 From 73ed0baae66df50359c876f65f41179d6ebd2716 Mon Sep 17 00:00:00 2001 From: Chris Li Date: Tue, 30 Jul 2024 23:49:13 -0700 Subject: mm: swap: swap cluster switch to double link list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm: swap: mTHP swap allocator base on swap cluster order", v5. This is the short term solutions "swap cluster order" listed in my "Swap Abstraction" discussion slice 8 in the recent LSF/MM conference. When commit 845982eb264bc "mm: swap: allow storage of all mTHP orders" is introduced, it only allocates the mTHP swap entries from the new empty cluster list.  It has a fragmentation issue reported by Barry. https://lore.kernel.org/all/CAGsJ_4zAcJkuW016Cfi6wicRr8N9X+GJJhgMQdSMp+Ah+NSgNQ@mail.gmail.com/ The reason is that all the empty clusters have been exhausted while there are plenty of free swap entries in the cluster that are not 100% free. Remember the swap allocation order in the cluster. Keep track of the per order non full cluster list for later allocation. This series gives the swap SSD allocation a new separate code path from the HDD allocation. The new allocator use cluster list only and do not global scan swap_map[] without lock any more. This streamline the swap allocation for SSD. The code matches the execution flow much better. User impact: For users that allocate and free mix order mTHP swapping, It greatly improves the success rate of the mTHP swap allocation after the initial phase. It also performs faster when the swapfile is close to full, because the allocator can get the non full cluster from a list rather than scanning a lot of swap_map entries.  With Barry's mthp test program V2: Without: $ ./thp_swap_allocator_test -a Iteration 1: swpout inc: 32, swpout fallback inc: 192, Fallback percentage: 85.71% Iteration 2: swpout inc: 0, swpout fallback inc: 231, Fallback percentage: 100.00% Iteration 3: swpout inc: 0, swpout fallback inc: 227, Fallback percentage: 100.00% ... Iteration 98: swpout inc: 0, swpout fallback inc: 224, Fallback percentage: 100.00% Iteration 99: swpout inc: 0, swpout fallback inc: 215, Fallback percentage: 100.00% Iteration 100: swpout inc: 0, swpout fallback inc: 222, Fallback percentage: 100.00% $ ./thp_swap_allocator_test -a -s Iteration 1: swpout inc: 0, swpout fallback inc: 224, Fallback percentage: 100.00% Iteration 2: swpout inc: 0, swpout fallback inc: 218, Fallback percentage: 100.00% Iteration 3: swpout inc: 0, swpout fallback inc: 222, Fallback percentage: 100.00% .. Iteration 98: swpout inc: 0, swpout fallback inc: 228, Fallback percentage: 100.00% Iteration 99: swpout inc: 0, swpout fallback inc: 230, Fallback percentage: 100.00% Iteration 100: swpout inc: 0, swpout fallback inc: 229, Fallback percentage: 100.00% $ ./thp_swap_allocator_test -s Iteration 1: swpout inc: 0, swpout fallback inc: 224, Fallback percentage: 100.00% Iteration 2: swpout inc: 0, swpout fallback inc: 218, Fallback percentage: 100.00% Iteration 3: swpout inc: 0, swpout fallback inc: 222, Fallback percentage: 100.00% .. Iteration 98: swpout inc: 0, swpout fallback inc: 228, Fallback percentage: 100.00% Iteration 99: swpout inc: 0, swpout fallback inc: 230, Fallback percentage: 100.00% Iteration 100: swpout inc: 0, swpout fallback inc: 229, Fallback percentage: 100.00% $ ./thp_swap_allocator_test Iteration 1: swpout inc: 0, swpout fallback inc: 224, Fallback percentage: 100.00% Iteration 2: swpout inc: 0, swpout fallback inc: 218, Fallback percentage: 100.00% Iteration 3: swpout inc: 0, swpout fallback inc: 222, Fallback percentage: 100.00% .. Iteration 98: swpout inc: 0, swpout fallback inc: 228, Fallback percentage: 100.00% Iteration 99: swpout inc: 0, swpout fallback inc: 230, Fallback percentage: 100.00% Iteration 100: swpout inc: 0, swpout fallback inc: 229, Fallback percentage: 100.00% With: # with all 0.00% filter out $ ./thp_swap_allocator_test -a | grep -v "0.00%" $ # all result are 0.00% $ ./thp_swap_allocator_test -a -s | grep -v "0.00%" ./thp_swap_allocator_test -a -s | grep -v "0.00%" Iteration 14: swpout inc: 223, swpout fallback inc: 3, Fallback percentage: 1.33% Iteration 19: swpout inc: 219, swpout fallback inc: 7, Fallback percentage: 3.10% Iteration 28: swpout inc: 225, swpout fallback inc: 1, Fallback percentage: 0.44% Iteration 29: swpout inc: 227, swpout fallback inc: 1, Fallback percentage: 0.44% Iteration 34: swpout inc: 220, swpout fallback inc: 8, Fallback percentage: 3.51% Iteration 35: swpout inc: 222, swpout fallback inc: 11, Fallback percentage: 4.72% Iteration 38: swpout inc: 217, swpout fallback inc: 4, Fallback percentage: 1.81% Iteration 40: swpout inc: 222, swpout fallback inc: 6, Fallback percentage: 2.63% Iteration 42: swpout inc: 221, swpout fallback inc: 2, Fallback percentage: 0.90% Iteration 43: swpout inc: 215, swpout fallback inc: 7, Fallback percentage: 3.15% Iteration 47: swpout inc: 226, swpout fallback inc: 2, Fallback percentage: 0.88% Iteration 49: swpout inc: 217, swpout fallback inc: 1, Fallback percentage: 0.46% Iteration 52: swpout inc: 221, swpout fallback inc: 8, Fallback percentage: 3.49% Iteration 56: swpout inc: 224, swpout fallback inc: 4, Fallback percentage: 1.75% Iteration 58: swpout inc: 214, swpout fallback inc: 5, Fallback percentage: 2.28% Iteration 62: swpout inc: 220, swpout fallback inc: 3, Fallback percentage: 1.35% Iteration 64: swpout inc: 224, swpout fallback inc: 1, Fallback percentage: 0.44% Iteration 67: swpout inc: 221, swpout fallback inc: 1, Fallback percentage: 0.45% Iteration 75: swpout inc: 220, swpout fallback inc: 9, Fallback percentage: 3.93% Iteration 82: swpout inc: 227, swpout fallback inc: 1, Fallback percentage: 0.44% Iteration 86: swpout inc: 211, swpout fallback inc: 12, Fallback percentage: 5.38% Iteration 89: swpout inc: 226, swpout fallback inc: 2, Fallback percentage: 0.88% Iteration 93: swpout inc: 220, swpout fallback inc: 1, Fallback percentage: 0.45% Iteration 94: swpout inc: 224, swpout fallback inc: 1, Fallback percentage: 0.44% Iteration 96: swpout inc: 221, swpout fallback inc: 6, Fallback percentage: 2.64% Iteration 98: swpout inc: 227, swpout fallback inc: 1, Fallback percentage: 0.44% Iteration 99: swpout inc: 227, swpout fallback inc: 3, Fallback percentage: 1.30% $ ./thp_swap_allocator_test ./thp_swap_allocator_test Iteration 1: swpout inc: 233, swpout fallback inc: 0, Fallback percentage: 0.00% Iteration 2: swpout inc: 131, swpout fallback inc: 101, Fallback percentage: 43.53% Iteration 3: swpout inc: 71, swpout fallback inc: 155, Fallback percentage: 68.58% Iteration 4: swpout inc: 55, swpout fallback inc: 168, Fallback percentage: 75.34% Iteration 5: swpout inc: 35, swpout fallback inc: 191, Fallback percentage: 84.51% Iteration 6: swpout inc: 25, swpout fallback inc: 199, Fallback percentage: 88.84% Iteration 7: swpout inc: 23, swpout fallback inc: 205, Fallback percentage: 89.91% Iteration 8: swpout inc: 9, swpout fallback inc: 219, Fallback percentage: 96.05% Iteration 9: swpout inc: 13, swpout fallback inc: 213, Fallback percentage: 94.25% Iteration 10: swpout inc: 12, swpout fallback inc: 216, Fallback percentage: 94.74% Iteration 11: swpout inc: 16, swpout fallback inc: 213, Fallback percentage: 93.01% Iteration 12: swpout inc: 10, swpout fallback inc: 210, Fallback percentage: 95.45% Iteration 13: swpout inc: 16, swpout fallback inc: 212, Fallback percentage: 92.98% Iteration 14: swpout inc: 12, swpout fallback inc: 212, Fallback percentage: 94.64% Iteration 15: swpout inc: 15, swpout fallback inc: 211, Fallback percentage: 93.36% Iteration 16: swpout inc: 15, swpout fallback inc: 200, Fallback percentage: 93.02% Iteration 17: swpout inc: 9, swpout fallback inc: 220, Fallback percentage: 96.07% $ ./thp_swap_allocator_test -s ./thp_swap_allocator_test -s Iteration 1: swpout inc: 233, swpout fallback inc: 0, Fallback percentage: 0.00% Iteration 2: swpout inc: 97, swpout fallback inc: 135, Fallback percentage: 58.19% Iteration 3: swpout inc: 42, swpout fallback inc: 192, Fallback percentage: 82.05% Iteration 4: swpout inc: 19, swpout fallback inc: 214, Fallback percentage: 91.85% Iteration 5: swpout inc: 12, swpout fallback inc: 213, Fallback percentage: 94.67% Iteration 6: swpout inc: 11, swpout fallback inc: 217, Fallback percentage: 95.18% Iteration 7: swpout inc: 9, swpout fallback inc: 214, Fallback percentage: 95.96% Iteration 8: swpout inc: 8, swpout fallback inc: 213, Fallback percentage: 96.38% Iteration 9: swpout inc: 2, swpout fallback inc: 223, Fallback percentage: 99.11% Iteration 10: swpout inc: 2, swpout fallback inc: 228, Fallback percentage: 99.13% Iteration 11: swpout inc: 4, swpout fallback inc: 214, Fallback percentage: 98.17% Iteration 12: swpout inc: 5, swpout fallback inc: 226, Fallback percentage: 97.84% Iteration 13: swpout inc: 3, swpout fallback inc: 212, Fallback percentage: 98.60% Iteration 14: swpout inc: 0, swpout fallback inc: 222, Fallback percentage: 100.00% Iteration 15: swpout inc: 3, swpout fallback inc: 222, Fallback percentage: 98.67% Iteration 16: swpout inc: 4, swpout fallback inc: 223, Fallback percentage: 98.24% ========= Kernel compile under tmpfs with cgroup memory.max = 470M. 12 core 24 hyperthreading, 32 jobs. 10 Run each group SSD swap 10 runs average, 20G swap partition: With: user 2929.064 system 1479.381 : 1376.89 1398.22 1444.64 1477.39 1479.04 1497.27 1504.47 1531.4 1532.92 1551.57 real 1441.324 Without: user 2910.872 system 1482.732 : 1440.01 1451.4 1462.01 1467.47 1467.51 1469.3 1470.19 1496.32 1544.1 1559.01 real 1580.822 Two zram swap: zram0 3.0G zram1 20G. The idea is forcing the zram0 almost full then overflow to zram1: With: user 4320.301 system 4272.403 : 4236.24 4262.81 4264.75 4269.13 4269.44 4273.06 4279.85 4285.98 4289.64 4293.13 real 431.759 Without user 4301.393 system 4387.672 : 4374.47 4378.3 4380.95 4382.84 4383.06 4388.05 4389.76 4397.16 4398.23 4403.9 real 433.979 ------ more test result from Kaiui ---------- Test with build linux kernel using a 4G ZRAM, 1G memory.max limit on top of shmem: System info: 32 Core AMD Zen2, 64G total memory. Test 3 times using only 4K pages: ================================= With: ----- 1838.74user 2411.21system 2:37.86elapsed 2692%CPU (0avgtext+0avgdata 847060maxresident)k 1839.86user 2465.77system 2:39.35elapsed 2701%CPU (0avgtext+0avgdata 847060maxresident)k 1840.26user 2454.68system 2:39.43elapsed 2693%CPU (0avgtext+0avgdata 847060maxresident)k Summary (~4.6% improment of system time): User: 1839.62 System: 2443.89: 2465.77 2454.68 2411.21 Real: 158.88 Without: -------- 1837.99user 2575.95system 2:43.09elapsed 2706%CPU (0avgtext+0avgdata 846520maxresident)k 1838.32user 2555.15system 2:42.52elapsed 2709%CPU (0avgtext+0avgdata 846520maxresident)k 1843.02user 2561.55system 2:43.35elapsed 2702%CPU (0avgtext+0avgdata 846520maxresident)k Summary: User: 1839.78 System: 2564.22: 2575.95 2555.15 2561.55 Real: 162.99 Test 5 times using enabled all mTHP pages: ========================================== With: ----- 1796.44user 2937.33system 2:59.09elapsed 2643%CPU (0avgtext+0avgdata 846936maxresident)k 1802.55user 3002.32system 2:54.68elapsed 2750%CPU (0avgtext+0avgdata 847072maxresident)k 1806.59user 2986.53system 2:55.17elapsed 2736%CPU (0avgtext+0avgdata 847092maxresident)k 1803.27user 2982.40system 2:54.49elapsed 2742%CPU (0avgtext+0avgdata 846796maxresident)k 1807.43user 3036.08system 2:56.06elapsed 2751%CPU (0avgtext+0avgdata 846488maxresident)k Summary (~8.4% improvement of system time): User: 1803.25 System: 2988.93: 2937.33 3002.32 2986.53 2982.40 3036.08 Real: 175.90 mTHP swapout status: /sys/kernel/mm/transparent_hugepage/hugepages-32kB/stats/swpout:347721 /sys/kernel/mm/transparent_hugepage/hugepages-32kB/stats/swpout_fallback:3110 /sys/kernel/mm/transparent_hugepage/hugepages-512kB/stats/swpout:3365 /sys/kernel/mm/transparent_hugepage/hugepages-512kB/stats/swpout_fallback:8269 /sys/kernel/mm/transparent_hugepage/hugepages-2048kB/stats/swpout:24 /sys/kernel/mm/transparent_hugepage/hugepages-2048kB/stats/swpout_fallback:3341 /sys/kernel/mm/transparent_hugepage/hugepages-1024kB/stats/swpout:145 /sys/kernel/mm/transparent_hugepage/hugepages-1024kB/stats/swpout_fallback:5038 /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpout:322737 /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpout_fallback:36808 /sys/kernel/mm/transparent_hugepage/hugepages-16kB/stats/swpout:380455 /sys/kernel/mm/transparent_hugepage/hugepages-16kB/stats/swpout_fallback:1010 /sys/kernel/mm/transparent_hugepage/hugepages-256kB/stats/swpout:24973 /sys/kernel/mm/transparent_hugepage/hugepages-256kB/stats/swpout_fallback:13223 /sys/kernel/mm/transparent_hugepage/hugepages-128kB/stats/swpout:197348 /sys/kernel/mm/transparent_hugepage/hugepages-128kB/stats/swpout_fallback:80541 Without: -------- 1794.41user 3151.29system 3:05.97elapsed 2659%CPU (0avgtext+0avgdata 846704maxresident)k 1810.27user 3304.48system 3:05.38elapsed 2759%CPU (0avgtext+0avgdata 846636maxresident)k 1809.84user 3254.85system 3:03.83elapsed 2755%CPU (0avgtext+0avgdata 846952maxresident)k 1813.54user 3259.56system 3:04.28elapsed 2752%CPU (0avgtext+0avgdata 846848maxresident)k 1829.97user 3338.40system 3:07.32elapsed 2759%CPU (0avgtext+0avgdata 847024maxresident)k Summary: User: 1811.61 System: 3261.72 : 3151.29 3304.48 3254.85 3259.56 3338.40 Real: 185.356 mTHP swapout status: hugepages-32kB/stats/swpout:35630 hugepages-32kB/stats/swpout_fallback:1809908 hugepages-512kB/stats/swpout:523 hugepages-512kB/stats/swpout_fallback:55235 hugepages-2048kB/stats/swpout:53 hugepages-2048kB/stats/swpout_fallback:17264 hugepages-1024kB/stats/swpout:85 hugepages-1024kB/stats/swpout_fallback:24979 hugepages-64kB/stats/swpout:30117 hugepages-64kB/stats/swpout_fallback:1825399 hugepages-16kB/stats/swpout:42775 hugepages-16kB/stats/swpout_fallback:1951123 hugepages-256kB/stats/swpout:2326 hugepages-256kB/stats/swpout_fallback:170165 hugepages-128kB/stats/swpout:17925 hugepages-128kB/stats/swpout_fallback:1309757 This patch (of 9): Previously, the swap cluster used a cluster index as a pointer to construct a custom single link list type "swap_cluster_list". The next cluster pointer is shared with the cluster->count. It prevents puting the non free cluster into a list. Change the cluster to use the standard double link list instead. This allows tracing the nonfull cluster in the follow up patch. That way, it is faster to get to the nonfull cluster of that order. Remove the cluster getter/setter for accessing the cluster struct member. The list operation is protected by the swap_info_struct->lock. Change cluster code to use "struct swap_cluster_info *" to reference the cluster rather than by using index. That is more consistent with the list manipulation. It avoids the repeat adding index to the cluser_info. The code is easier to understand. Remove the cluster next pointer is NULL flag, the double link list can handle the empty list pretty well. The "swap_cluster_info" struct is two pointer bigger, because 512 swap entries share one swap_cluster_info struct, it has very little impact on the average memory usage per swap entry. For 1TB swapfile, the swap cluster data structure increases from 8MB to 24MB. Other than the list conversion, there is no real function change in this patch. Link: https://lkml.kernel.org/r/20240730-swap-allocator-v5-0-cb9c148b9297@kernel.org Link: https://lkml.kernel.org/r/20240730-swap-allocator-v5-1-cb9c148b9297@kernel.org Signed-off-by: Chris Li Reported-by: Barry Song <21cnbao@gmail.com> Reviewed-by: "Huang, Ying" Cc: Hugh Dickins Cc: Kairui Song Cc: Kalesh Singh Cc: Ryan Roberts Signed-off-by: Andrew Morton --- include/linux/swap.h | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 5b920fa2315b..ead568f04f81 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -243,22 +243,20 @@ enum { * free clusters are organized into a list. We fetch an entry from the list to * get a free cluster. * - * The data field stores next cluster if the cluster is free or cluster usage - * counter otherwise. The flags field determines if a cluster is free. This is - * protected by swap_info_struct.lock. + * The flags field determines if a cluster is free. This is + * protected by cluster lock. */ struct swap_cluster_info { spinlock_t lock; /* * Protect swap_cluster_info fields - * and swap_info_struct->swap_map - * elements correspond to the swap - * cluster + * other than list, and swap_info_struct->swap_map + * elements corresponding to the swap cluster. */ - unsigned int data:24; - unsigned int flags:8; + u16 count; + u8 flags; + struct list_head list; }; #define CLUSTER_FLAG_FREE 1 /* This cluster is free */ -#define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */ /* * The first page in the swap file is the swap header, which is always marked @@ -283,11 +281,6 @@ struct percpu_cluster { unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */ }; -struct swap_cluster_list { - struct swap_cluster_info head; - struct swap_cluster_info tail; -}; - /* * The in-memory structure used to track swap areas. */ @@ -300,7 +293,7 @@ struct swap_info_struct { unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ - struct swap_cluster_list free_clusters; /* free clusters list */ + struct list_head free_clusters; /* free clusters list */ unsigned int lowest_bit; /* index of first free in swap_map */ unsigned int highest_bit; /* index of last free in swap_map */ unsigned int pages; /* total of usable pages of swap */ @@ -331,7 +324,7 @@ struct swap_info_struct { * list. */ struct work_struct discard_work; /* discard worker */ - struct swap_cluster_list discard_clusters; /* discard clusters list */ + struct list_head discard_clusters; /* discard clusters list */ struct plist_node avail_lists[]; /* * entries in swap_avail_heads, one * entry per node. -- cgit v1.2.3 From d07a46a4ac18786e7f4c98fb08525ed80dd1f642 Mon Sep 17 00:00:00 2001 From: Chris Li Date: Tue, 30 Jul 2024 23:49:14 -0700 Subject: mm: swap: mTHP allocate swap entries from nonfull list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track the nonfull cluster as well as the empty cluster on lists. Each order has one nonfull cluster list. The cluster will remember which order it was used during new cluster allocation. When the cluster has free entry, add to the nonfull[order] list.  When the free cluster list is empty, also allocate from the nonempty list of that order. This improves the mTHP swap allocation success rate. There are limitations if the distribution of numbers of different orders of mTHP changes a lot. e.g. there are a lot of nonfull cluster assign to order A while later time there are a lot of order B allocation while very little allocation in order A. Currently the cluster used by order A will not reused by order B unless the cluster is 100% empty. Link: https://lkml.kernel.org/r/20240730-swap-allocator-v5-2-cb9c148b9297@kernel.org Signed-off-by: Chris Li Reported-by: Barry Song <21cnbao@gmail.com> Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Kairui Song Cc: Kalesh Singh Cc: Ryan Roberts Signed-off-by: Andrew Morton --- include/linux/swap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index ead568f04f81..7007a3c2336c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -254,9 +254,11 @@ struct swap_cluster_info { */ u16 count; u8 flags; + u8 order; struct list_head list; }; #define CLUSTER_FLAG_FREE 1 /* This cluster is free */ +#define CLUSTER_FLAG_NONFULL 2 /* This cluster is on nonfull list */ /* * The first page in the swap file is the swap header, which is always marked @@ -294,6 +296,8 @@ struct swap_info_struct { unsigned char *swap_map; /* vmalloc'ed array of usage counts */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ struct list_head free_clusters; /* free clusters list */ + struct list_head nonfull_clusters[SWAP_NR_ORDERS]; + /* list of cluster that contains at least one free slot */ unsigned int lowest_bit; /* index of first free in swap_map */ unsigned int highest_bit; /* index of last free in swap_map */ unsigned int pages; /* total of usable pages of swap */ -- cgit v1.2.3 From 477cb7ba28892eda112c79d8f75d10edabfc3050 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 30 Jul 2024 23:49:19 -0700 Subject: mm: swap: add a fragment cluster list Now swap cluster allocator arranges the clusters in LRU style, so the "cold" cluster stay at the head of nonfull lists are the ones that were used for allocation long time ago and still partially occupied. So if allocator can't find enough contiguous slots to satisfy an high order allocation, it's unlikely there will be slot being free on them to satisfy the allocation, at least in a short period. As a result, nonfull cluster scanning will waste time repeatly scanning the unusable head of the list. Also, multiple CPUs could content on the same head cluster of nonfull list. Unlike free clusters which are removed from the list when any CPU starts using it, nonfull cluster stays on the head. So introduce a new list frag list, all scanned nonfull clusters will be moved to this list. Both for avoiding repeated scanning and contention. Frag list is still used as fallback for allocations, so if one CPU failed to allocate one order of slots, it can still steal other CPU's clusters. And order 0 will favor the fragmented clusters to better protect nonfull clusters If any slots on a fragment list are being freed, move the fragment list back to nonfull list indicating it worth another scan on the cluster. Compared to scan upon freeing a slot, this keep the scanning lazy and save some CPU if there are still other clusters to use. It may seems unneccessay to keep the fragmented cluster on list at all if they can't be used for specific order allocation. But this will start to make sense once reclaim dring scanning is ready. Link: https://lkml.kernel.org/r/20240730-swap-allocator-v5-7-cb9c148b9297@kernel.org Signed-off-by: Kairui Song Reported-by: Barry Song <21cnbao@gmail.com> Cc: Chris Li Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Kalesh Singh Cc: Ryan Roberts Signed-off-by: Andrew Morton --- include/linux/swap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 7007a3c2336c..c526cd3e7b31 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -259,6 +259,7 @@ struct swap_cluster_info { }; #define CLUSTER_FLAG_FREE 1 /* This cluster is free */ #define CLUSTER_FLAG_NONFULL 2 /* This cluster is on nonfull list */ +#define CLUSTER_FLAG_FRAG 4 /* This cluster is on nonfull list */ /* * The first page in the swap file is the swap header, which is always marked @@ -298,6 +299,8 @@ struct swap_info_struct { struct list_head free_clusters; /* free clusters list */ struct list_head nonfull_clusters[SWAP_NR_ORDERS]; /* list of cluster that contains at least one free slot */ + struct list_head frag_clusters[SWAP_NR_ORDERS]; + /* list of cluster that are fragmented or contented */ unsigned int lowest_bit; /* index of first free in swap_map */ unsigned int highest_bit; /* index of last free in swap_map */ unsigned int pages; /* total of usable pages of swap */ -- cgit v1.2.3 From 661383c6111a38c88df61af6bfbcfacd2ff20a67 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 30 Jul 2024 23:49:20 -0700 Subject: mm: swap: relaim the cached parts that got scanned This commit implements reclaim during scan for cluster allocator. Cluster scanning were unable to reuse SWAP_HAS_CACHE slots, which could result in low allocation success rate or early OOM. So to ensure maximum allocation success rate, integrate reclaiming with scanning. If found a range of suitable swap slots but fragmented due to HAS_CACHE, just try to reclaim the slots. Link: https://lkml.kernel.org/r/20240730-swap-allocator-v5-8-cb9c148b9297@kernel.org Signed-off-by: Kairui Song Reported-by: Barry Song <21cnbao@gmail.com> Cc: Chris Li Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Kalesh Singh Cc: Ryan Roberts Signed-off-by: Andrew Morton --- include/linux/swap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index c526cd3e7b31..e372122e4f45 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -301,6 +301,7 @@ struct swap_info_struct { /* list of cluster that contains at least one free slot */ struct list_head frag_clusters[SWAP_NR_ORDERS]; /* list of cluster that are fragmented or contented */ + unsigned int frag_cluster_nr[SWAP_NR_ORDERS]; unsigned int lowest_bit; /* index of first free in swap_map */ unsigned int highest_bit; /* index of last free in swap_map */ unsigned int pages; /* total of usable pages of swap */ -- cgit v1.2.3 From 2cacbdfdee65b18f9952620e762eab043d71b564 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 30 Jul 2024 23:49:21 -0700 Subject: mm: swap: add a adaptive full cluster cache reclaim Link all full cluster with one full list, and reclaim from it when the allocation have ran out of all usable clusters. There are many reason a folio can end up being in the swap cache while having no swap count reference. So the best way to search for such slots is still by iterating the swap clusters. With the list as an LRU, iterating from the oldest cluster and keep them rotating is a very doable and clean way to free up potentially not inuse clusters. When any allocation failure, try reclaim and rotate only one cluster. This is adaptive for high order allocations they can tolerate fallback. So this avoids latency, and give the full cluster list an fair chance to get reclaimed. It release the usage stress for the fallback order 0 allocation or following up high order allocation. If the swap device is getting very full, reclaim more aggresively to ensure no OOM will happen. This ensures order 0 heavy workload won't go OOM as order 0 won't fail if any cluster still have any space. [ryncsn@gmail.com: fix discard of full cluster] Link: https://lkml.kernel.org/r/CAMgjq7CWwK75_2Zi5P40K08pk9iqOcuWKL6khu=x4Yg_nXaQag@mail.gmail.com Link: https://lkml.kernel.org/r/20240730-swap-allocator-v5-9-cb9c148b9297@kernel.org Signed-off-by: Kairui Song Reported-by: Barry Song <21cnbao@gmail.com> Cc: Chris Li Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Kalesh Singh Cc: Ryan Roberts Cc: David Hildenbrand Cc: Kairui Song Signed-off-by: Andrew Morton --- include/linux/swap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index e372122e4f45..1c8f844a9f0f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -260,6 +260,7 @@ struct swap_cluster_info { #define CLUSTER_FLAG_FREE 1 /* This cluster is free */ #define CLUSTER_FLAG_NONFULL 2 /* This cluster is on nonfull list */ #define CLUSTER_FLAG_FRAG 4 /* This cluster is on nonfull list */ +#define CLUSTER_FLAG_FULL 8 /* This cluster is on full list */ /* * The first page in the swap file is the swap header, which is always marked @@ -297,6 +298,7 @@ struct swap_info_struct { unsigned char *swap_map; /* vmalloc'ed array of usage counts */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ struct list_head free_clusters; /* free clusters list */ + struct list_head full_clusters; /* full clusters list */ struct list_head nonfull_clusters[SWAP_NR_ORDERS]; /* list of cluster that contains at least one free slot */ struct list_head frag_clusters[SWAP_NR_ORDERS]; -- cgit v1.2.3 From 46bcce503197d1019ee5c49ccde978e31298e35f Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 7 Aug 2024 09:40:51 +0300 Subject: arch, mm: move definition of node_data to generic code Every architecture that supports NUMA defines node_data in the same way: struct pglist_data *node_data[MAX_NUMNODES]; No reason to keep multiple copies of this definition and its forward declarations, especially when such forward declaration is the only thing in include/asm/mmzone.h for many architectures. Add definition and declaration of node_data to generic code and drop architecture-specific versions. Link: https://lkml.kernel.org/r/20240807064110.1003856-8-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: David Hildenbrand Reviewed-by: Jonathan Cameron Acked-by: Davidlohr Bueso Tested-by: Zi Yan # for x86_64 and arm64 Tested-by: Jonathan Cameron [arm64 + CXL via QEMU] Acked-by: Dan Williams Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Rafael J. Wysocki Cc: Rob Herring (Arm) Cc: Samuel Holland Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- include/asm-generic/mmzone.h | 5 +++++ include/linux/numa.h | 3 +++ 2 files changed, 8 insertions(+) create mode 100644 include/asm-generic/mmzone.h (limited to 'include') diff --git a/include/asm-generic/mmzone.h b/include/asm-generic/mmzone.h new file mode 100644 index 000000000000..2ab5193e8394 --- /dev/null +++ b/include/asm-generic/mmzone.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_MMZONE_H +#define _ASM_GENERIC_MMZONE_H + +#endif diff --git a/include/linux/numa.h b/include/linux/numa.h index eb19503604fe..e5841d4057ab 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -30,6 +30,9 @@ static inline bool numa_valid_node(int nid) #ifdef CONFIG_NUMA #include +extern struct pglist_data *node_data[]; +#define NODE_DATA(nid) (node_data[nid]) + /* Generic implementation available */ int numa_nearest_node(int node, unsigned int state); -- cgit v1.2.3 From ec164cf1dd3df4ae58931b9b491b06a90d5c22d3 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 7 Aug 2024 09:40:52 +0300 Subject: mm: drop CONFIG_HAVE_ARCH_NODEDATA_EXTENSION There are no users of HAVE_ARCH_NODEDATA_EXTENSION left, so arch_alloc_nodedata() and arch_refresh_nodedata() are not needed anymore. Replace the call to arch_alloc_nodedata() in free_area_init() with a new helper alloc_offline_node_data(), remove arch_refresh_nodedata() and cleanup include/linux/memory_hotplug.h from the associated ifdefery. Link: https://lkml.kernel.org/r/20240807064110.1003856-9-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Tested-by: Zi Yan # for x86_64 and arm64 Acked-by: Dan Williams Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: John Paul Adrian Glaubitz Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Rafael J. Wysocki Cc: Rob Herring (Arm) Cc: Samuel Holland Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/memory_hotplug.h | 48 ------------------------------------------ include/linux/numa.h | 4 ++++ 2 files changed, 4 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index ebe876930e78..b27ddce5d324 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -16,54 +16,6 @@ struct resource; struct vmem_altmap; struct dev_pagemap; -#ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION -/* - * For supporting node-hotadd, we have to allocate a new pgdat. - * - * If an arch has generic style NODE_DATA(), - * node_data[nid] = kzalloc() works well. But it depends on the architecture. - * - * In general, generic_alloc_nodedata() is used. - * - */ -extern pg_data_t *arch_alloc_nodedata(int nid); -extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat); - -#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ - -#define arch_alloc_nodedata(nid) generic_alloc_nodedata(nid) - -#ifdef CONFIG_NUMA -/* - * XXX: node aware allocation can't work well to get new node's memory at this time. - * Because, pgdat for the new node is not allocated/initialized yet itself. - * To use new node's memory, more consideration will be necessary. - */ -#define generic_alloc_nodedata(nid) \ -({ \ - memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); \ -}) - -extern pg_data_t *node_data[]; -static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) -{ - node_data[nid] = pgdat; -} - -#else /* !CONFIG_NUMA */ - -/* never called */ -static inline pg_data_t *generic_alloc_nodedata(int nid) -{ - BUG(); - return NULL; -} -static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) -{ -} -#endif /* CONFIG_NUMA */ -#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ - #ifdef CONFIG_MEMORY_HOTPLUG struct page *pfn_to_online_page(unsigned long pfn); diff --git a/include/linux/numa.h b/include/linux/numa.h index e5841d4057ab..b41b1569781b 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -33,6 +33,8 @@ static inline bool numa_valid_node(int nid) extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) +void __init alloc_offline_node_data(int nid); + /* Generic implementation available */ int numa_nearest_node(int node, unsigned int state); @@ -60,6 +62,8 @@ static inline int phys_to_target_node(u64 start) { return 0; } + +static inline void alloc_offline_node_data(int nid) {} #endif #define numa_map_to_online_node(node) numa_nearest_node(node, N_ONLINE) -- cgit v1.2.3 From 3515863d9f29dd476cdad875fbd558fc85b4c511 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 7 Aug 2024 09:40:53 +0300 Subject: arch, mm: pull out allocation of NODE_DATA to generic code Architectures that support NUMA duplicate the code that allocates NODE_DATA on the node-local memory with slight variations in reporting of the addresses where the memory was allocated. Use x86 version as the basis for the generic alloc_node_data() function and call this function in architecture specific numa initialization. Round up node data size to SMP_CACHE_BYTES rather than to PAGE_SIZE like x86 used to do since the bootmem era when allocation granularity was PAGE_SIZE anyway. Link: https://lkml.kernel.org/r/20240807064110.1003856-10-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: David Hildenbrand Reviewed-by: Jonathan Cameron Tested-by: Zi Yan # for x86_64 and arm64 Tested-by: Jonathan Cameron [arm64 + CXL via QEMU] Acked-by: Dan Williams Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: Davidlohr Bueso Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Rafael J. Wysocki Cc: Rob Herring (Arm) Cc: Samuel Holland Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/numa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/numa.h b/include/linux/numa.h index b41b1569781b..3567e40329eb 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -33,6 +33,7 @@ static inline bool numa_valid_node(int nid) extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) +void __init alloc_node_data(int nid); void __init alloc_offline_node_data(int nid); /* Generic implementation available */ -- cgit v1.2.3 From 87482708210ff3333adab4dc5f1a491793159d43 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 7 Aug 2024 09:41:01 +0300 Subject: mm: introduce numa_memblks Move code dealing with numa_memblks from arch/x86 to mm/ and add Kconfig options to let x86 select it in its Kconfig. This code will be later reused by arch_numa. No functional changes. Link: https://lkml.kernel.org/r/20240807064110.1003856-18-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Tested-by: Zi Yan # for x86_64 and arm64 Reviewed-by: Jonathan Cameron Tested-by: Jonathan Cameron [arm64 + CXL via QEMU] Acked-by: Dan Williams Acked-by: David Hildenbrand Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: Davidlohr Bueso Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Rafael J. Wysocki Cc: Rob Herring (Arm) Cc: Samuel Holland Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/numa_memblks.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 include/linux/numa_memblks.h (limited to 'include') diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h new file mode 100644 index 000000000000..6981cf97d2c9 --- /dev/null +++ b/include/linux/numa_memblks.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NUMA_MEMBLKS_H +#define __NUMA_MEMBLKS_H + +#ifdef CONFIG_NUMA_MEMBLKS +#include + +#define NR_NODE_MEMBLKS (MAX_NUMNODES * 2) + +struct numa_memblk { + u64 start; + u64 end; + int nid; +}; + +struct numa_meminfo { + int nr_blks; + struct numa_memblk blk[NR_NODE_MEMBLKS]; +}; + +extern struct numa_meminfo numa_meminfo __initdata_or_meminfo; +extern struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo; + +int __init numa_add_memblk(int nodeid, u64 start, u64 end); +void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi); + +int __init numa_cleanup_meminfo(struct numa_meminfo *mi); +int __init numa_register_meminfo(struct numa_meminfo *mi); + +void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, + const struct numa_meminfo *mi); + +#endif /* CONFIG_NUMA_MEMBLKS */ + +#endif /* __NUMA_MEMBLKS_H */ -- cgit v1.2.3 From 75f9d4cc4eb5e15dd5ce843e33de7f6346a0d4fa Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 7 Aug 2024 09:41:02 +0300 Subject: mm: move numa_distance and related code from x86 to numa_memblks Move code dealing with numa_distance array from arch/x86 to mm/numa_memblks.c This code will be later reused by arch_numa. No functional changes. Link: https://lkml.kernel.org/r/20240807064110.1003856-19-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Tested-by: Zi Yan # for x86_64 and arm64 Reviewed-by: Jonathan Cameron Tested-by: Jonathan Cameron [arm64 + CXL via QEMU] Acked-by: Dan Williams Acked-by: David Hildenbrand Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: Davidlohr Bueso Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Rafael J. Wysocki Cc: Rob Herring (Arm) Cc: Samuel Holland Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/numa_memblks.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h index 6981cf97d2c9..968a590535ac 100644 --- a/include/linux/numa_memblks.h +++ b/include/linux/numa_memblks.h @@ -7,6 +7,10 @@ #define NR_NODE_MEMBLKS (MAX_NUMNODES * 2) +extern int numa_distance_cnt; +void __init numa_set_distance(int from, int to, int distance); +void __init numa_reset_distance(void); + struct numa_memblk { u64 start; u64 end; -- cgit v1.2.3 From b0c4e27c687177aa36048110a12cba94bf291452 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 7 Aug 2024 09:41:03 +0300 Subject: mm: introduce numa_emulation Move numa_emulation code from arch/x86 to mm/numa_emulation.c This code will be later reused by arch_numa. No functional changes. Link: https://lkml.kernel.org/r/20240807064110.1003856-20-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Tested-by: Zi Yan # for x86_64 and arm64 Reviewed-by: Jonathan Cameron Tested-by: Jonathan Cameron [arm64 + CXL via QEMU] Acked-by: Dan Williams Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Rafael J. Wysocki Cc: Rob Herring (Arm) Cc: Samuel Holland Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/numa_memblks.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h index 968a590535ac..f81f98678074 100644 --- a/include/linux/numa_memblks.h +++ b/include/linux/numa_memblks.h @@ -34,6 +34,23 @@ int __init numa_register_meminfo(struct numa_meminfo *mi); void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, const struct numa_meminfo *mi); +#ifdef CONFIG_NUMA_EMU +int numa_emu_cmdline(char *str); +void __init numa_emu_update_cpu_to_node(int *emu_nid_to_phys, + unsigned int nr_emu_nids); +u64 __init numa_emu_dma_end(void); +void __init numa_emulation(struct numa_meminfo *numa_meminfo, + int numa_dist_cnt); +#else +static inline void numa_emulation(struct numa_meminfo *numa_meminfo, + int numa_dist_cnt) +{ } +static inline int numa_emu_cmdline(char *str) +{ + return -EINVAL; +} +#endif /* CONFIG_NUMA_EMU */ + #endif /* CONFIG_NUMA_MEMBLKS */ #endif /* __NUMA_MEMBLKS_H */ -- cgit v1.2.3 From 692d73d2f0f75e58828ab05872b3789ae1f486ef Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 7 Aug 2024 09:41:04 +0300 Subject: mm: numa_memblks: introduce numa_memblks_init Move most of x86::numa_init() to numa_memblks so that the latter will be more self-contained. With this numa_memblk data structures should not be exposed to the architecture specific code. Link: https://lkml.kernel.org/r/20240807064110.1003856-21-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Tested-by: Zi Yan # for x86_64 and arm64 Reviewed-by: Jonathan Cameron Tested-by: Jonathan Cameron [arm64 + CXL via QEMU] Acked-by: Dan Williams Acked-by: David Hildenbrand Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: Davidlohr Bueso Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Rafael J. Wysocki Cc: Rob Herring (Arm) Cc: Samuel Holland Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/numa_memblks.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h index f81f98678074..07381320848f 100644 --- a/include/linux/numa_memblks.h +++ b/include/linux/numa_memblks.h @@ -34,6 +34,9 @@ int __init numa_register_meminfo(struct numa_meminfo *mi); void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, const struct numa_meminfo *mi); +int __init numa_memblks_init(int (*init_func)(void), + bool memblock_force_top_down); + #ifdef CONFIG_NUMA_EMU int numa_emu_cmdline(char *str); void __init numa_emu_update_cpu_to_node(int *emu_nid_to_phys, -- cgit v1.2.3 From 317ef4598bdc48c0196adc02ed4edaf82af279f2 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 7 Aug 2024 09:41:05 +0300 Subject: mm: numa_memblks: make several functions and variables static Make functions and variables that are exclusively used by numa_memblks static. Move numa_nodemask_from_meminfo() before its callers to avoid forward declaration. Link: https://lkml.kernel.org/r/20240807064110.1003856-22-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Tested-by: Zi Yan # for x86_64 and arm64 Reviewed-by: Jonathan Cameron Tested-by: Jonathan Cameron [arm64 + CXL via QEMU] Acked-by: Dan Williams Acked-by: David Hildenbrand Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: Davidlohr Bueso Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Rafael J. Wysocki Cc: Rob Herring (Arm) Cc: Samuel Holland Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/numa_memblks.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h index 07381320848f..5c6e12ad0b7a 100644 --- a/include/linux/numa_memblks.h +++ b/include/linux/numa_memblks.h @@ -7,7 +7,6 @@ #define NR_NODE_MEMBLKS (MAX_NUMNODES * 2) -extern int numa_distance_cnt; void __init numa_set_distance(int from, int to, int distance); void __init numa_reset_distance(void); @@ -22,17 +21,10 @@ struct numa_meminfo { struct numa_memblk blk[NR_NODE_MEMBLKS]; }; -extern struct numa_meminfo numa_meminfo __initdata_or_meminfo; -extern struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo; - int __init numa_add_memblk(int nodeid, u64 start, u64 end); void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi); int __init numa_cleanup_meminfo(struct numa_meminfo *mi); -int __init numa_register_meminfo(struct numa_meminfo *mi); - -void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, - const struct numa_meminfo *mi); int __init numa_memblks_init(int (*init_func)(void), bool memblock_force_top_down); -- cgit v1.2.3 From 767507654c22578ea0b51d181211b2e7714ea7cd Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 7 Aug 2024 09:41:08 +0300 Subject: arch_numa: switch over to numa_memblks Until now arch_numa was directly translating firmware NUMA information to memblock. Using numa_memblks as an intermediate step has a few advantages: * alignment with more battle tested x86 implementation * availability of NUMA emulation * maintaining node information for not yet populated memory Adjust a few places in numa_memblks to compile with 32-bit phys_addr_t and replace current functionality related to numa_add_memblk() and __node_distance() in arch_numa with the implementation based on numa_memblks and add functions required by numa_emulation. [rppt@kernel.org: fix section mismatch] Link: https://lkml.kernel.org/r/ZrO6cExVz1He_yPn@kernel.org [rppt@kernel.org: PFN_PHYS() translation is unnecessary here] Link: https://lkml.kernel.org/r/Zs2T5wkSYO9MGcab@kernel.org Link: https://lkml.kernel.org/r/20240807064110.1003856-25-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Tested-by: Zi Yan # for x86_64 and arm64 Reviewed-by: Jonathan Cameron Tested-by: Jonathan Cameron [arm64 + CXL via QEMU] Acked-by: Dan Williams Acked-by: David Hildenbrand Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: Davidlohr Bueso Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Rafael J. Wysocki Cc: Rob Herring (Arm) Cc: Samuel Holland Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- include/asm-generic/numa.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/numa.h b/include/asm-generic/numa.h index c32e0cf23c90..e063d6487f66 100644 --- a/include/asm-generic/numa.h +++ b/include/asm-generic/numa.h @@ -32,10 +32,8 @@ static inline const struct cpumask *cpumask_of_node(int node) void __init arch_numa_init(void); int __init numa_add_memblk(int nodeid, u64 start, u64 end); -void __init numa_set_distance(int from, int to, int distance); -void __init numa_free_distance(void); void __init early_map_cpu_to_node(unsigned int cpu, int nid); -int __init early_cpu_to_node(int cpu); +int early_cpu_to_node(int cpu); void numa_store_cpu_info(unsigned int cpu); void numa_add_cpu(unsigned int cpu); void numa_remove_cpu(unsigned int cpu); @@ -51,4 +49,8 @@ static inline int early_cpu_to_node(int cpu) { return 0; } #endif /* CONFIG_NUMA */ +#ifdef CONFIG_NUMA_EMU +void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable); +#endif + #endif /* __ASM_GENERIC_NUMA_H */ -- cgit v1.2.3 From 1b5695b02444660297cc54cab44f123ff28de2cc Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 7 Aug 2024 09:41:09 +0300 Subject: mm: make range-to-target_node lookup facility a part of numa_memblks The x86 implementation of range-to-target_node lookup (i.e. phys_to_target_node() and memory_add_physaddr_to_nid()) relies on numa_memblks. Since numa_memblks are now part of the generic code, move these functions from x86 to mm/numa_memblks.c and select CONFIG_NUMA_KEEP_MEMINFO when CONFIG_NUMA_MEMBLKS=y for dax and cxl. [rppt@kernel.org: fix build] Link: https://lkml.kernel.org/r/ZtVfSt_zloPdDqVB@kernel.org Link: https://lkml.kernel.org/r/20240807064110.1003856-26-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Jonathan Cameron Tested-by: Zi Yan # for x86_64 and arm64 Tested-by: Jonathan Cameron [arm64 + CXL via QEMU] Reviewed-by: Dan Williams Acked-by: David Hildenbrand Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: Davidlohr Bueso Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Rafael J. Wysocki Cc: Rob Herring (Arm) Cc: Samuel Holland Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/numa_memblks.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h index 5c6e12ad0b7a..cfad6ce7e1bd 100644 --- a/include/linux/numa_memblks.h +++ b/include/linux/numa_memblks.h @@ -46,6 +46,13 @@ static inline int numa_emu_cmdline(char *str) } #endif /* CONFIG_NUMA_EMU */ +#ifdef CONFIG_NUMA_KEEP_MEMINFO +extern int phys_to_target_node(u64 start); +#define phys_to_target_node phys_to_target_node +extern int memory_add_physaddr_to_nid(u64 start); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +#endif /* CONFIG_NUMA_KEEP_MEMINFO */ + #endif /* CONFIG_NUMA_MEMBLKS */ #endif /* __NUMA_MEMBLKS_H */ -- cgit v1.2.3 From 650180760be6bb448609d4d155eef3b728ace641 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 12 Aug 2024 15:42:02 +0800 Subject: mm: swap: extend swap_shmem_alloc() to support batch SWAP_MAP_SHMEM flag setting Patch series "support large folio swap-out and swap-in for shmem", v5. Shmem will support large folio allocation [1] [2] to get a better performance, however, the memory reclaim still splits the precious large folios when trying to swap-out shmem, which may lead to the memory fragmentation issue and can not take advantage of the large folio for shmeme. Moreover, the swap code already supports for swapping out large folio without split, and large folio swap-in[3] series is queued into mm-unstable branch. Hence this patch set also supports the large folio swap-out and swap-in for shmem. This patch (of 9): To support shmem large folio swap operations, add a new parameter to swap_shmem_alloc() that allows batch SWAP_MAP_SHMEM flag setting for shmem swap entries. While we are at it, using folio_nr_pages() to get the number of pages of the folio as a preparation. Link: https://lkml.kernel.org/r/cover.1723434324.git.baolin.wang@linux.alibaba.com Link: https://lkml.kernel.org/r/99f64115d04b285e009580eb177352c57119ffd0.1723434324.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Reviewed-by: Barry Song Cc: Chris Li Cc: Daniel Gomez Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Kefeng Wang Cc: Lance Yang Cc: Matthew Wilcox Cc: Pankaj Raghav Cc: Ryan Roberts Cc: Yang Shi Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/swap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 1c8f844a9f0f..248db1dd7812 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -481,7 +481,7 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order); extern int add_swap_count_continuation(swp_entry_t, gfp_t); -extern void swap_shmem_alloc(swp_entry_t); +extern void swap_shmem_alloc(swp_entry_t, int); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t entry, int nr); extern void swap_free_nr(swp_entry_t entry, int nr_pages); @@ -548,7 +548,7 @@ static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask) return 0; } -static inline void swap_shmem_alloc(swp_entry_t swp) +static inline void swap_shmem_alloc(swp_entry_t swp, int nr) { } -- cgit v1.2.3 From 809bc86517cc408b5b8cb8e08e69096639432bc8 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 12 Aug 2024 15:42:10 +0800 Subject: mm: shmem: support large folio swap out Shmem will support large folio allocation [1] [2] to get a better performance, however, the memory reclaim still splits the precious large folios when trying to swap out shmem, which may lead to the memory fragmentation issue and can not take advantage of the large folio for shmeme. Moreover, the swap code already supports for swapping out large folio without split, hence this patch set supports the large folio swap out for shmem. Note the i915_gem_shmem driver still need to be split when swapping, thus add a new flag 'split_large_folio' for writeback_control to indicate spliting the large folio. [1] https://lore.kernel.org/all/cover.1717495894.git.baolin.wang@linux.alibaba.com/ [2] https://lore.kernel.org/all/20240515055719.32577-1-da.gomez@samsung.com/ [hughd@google.com: shmem_writepage() split folio at EOF before swapout] Link: https://lkml.kernel.org/r/aef55f8d-6040-692d-65e3-16150cce4440@google.com [baolin.wang@linux.alibaba.com: remove the wbc->split_large_folio per Hugh] Link: https://lkml.kernel.org/r/1236a002daa301b3b9ba73d6c0fab348427cf295.1724833399.git.baolin.wang@linux.alibaba.com Link: https://lkml.kernel.org/r/d80c21abd20e1b0f5ca66b330f074060fb2f082d.1723434324.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Signed-off-by: Hugh Dickins Cc: Barry Song Cc: Chris Li Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Kefeng Wang Cc: Lance Yang Cc: Matthew Wilcox Cc: Pankaj Raghav Cc: Ryan Roberts Cc: Yang Shi Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/writeback.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 1a54676d843a..51278327b3c6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -79,6 +79,9 @@ struct writeback_control { */ struct swap_iocb **swap_plug; + /* Target list for splitting a large folio */ + struct list_head *list; + /* internal fields used by the ->writepages implementation: */ struct folio_batch fbatch; pgoff_t index; -- cgit v1.2.3 From f77f0c7514789577125c1b2df145703161736359 Mon Sep 17 00:00:00 2001 From: Kaiyang Zhao Date: Wed, 14 Aug 2024 17:42:27 +0000 Subject: mm,memcg: provide per-cgroup counters for NUMA balancing operations The ability to observe the demotion and promotion decisions made by the kernel on a per-cgroup basis is important for monitoring and tuning containerized workloads on machines equipped with tiered memory. Different containers in the system may experience drastically different memory tiering actions that cannot be distinguished from the global counters alone. For example, a container running a workload that has a much hotter memory accesses will likely see more promotions and fewer demotions, potentially depriving a colocated container of top tier memory to such an extent that its performance degrades unacceptably. For another example, some containers may exhibit longer periods between data reuse, causing much more numa_hint_faults than numa_pages_migrated. In this case, tuning hot_threshold_ms may be appropriate, but the signal can easily be lost if only global counters are available. In the long term, we hope to introduce per-cgroup control of promotion and demotion actions to implement memory placement policies in tiering. This patch set adds seven counters to memory.stat in a cgroup: numa_pages_migrated, numa_pte_updates, numa_hint_faults, pgdemote_kswapd, pgdemote_khugepaged, pgdemote_direct and pgpromote_success. pgdemote_* and pgpromote_success are also available in memory.numa_stat. count_memcg_events_mm() is added to count multiple event occurrences at once, and get_mem_cgroup_from_folio() is added because we need to get a reference to the memcg of a folio before it's migrated to track numa_pages_migrated. The accounting of PGDEMOTE_* is moved to shrink_inactive_list() before being changed to per-cgroup. [kaiyang2@cs.cmu.edu: add documentation of the memcg counters in cgroup-v2.rst] Link: https://lkml.kernel.org/r/20240814235122.252309-1-kaiyang2@cs.cmu.edu Link: https://lkml.kernel.org/r/20240814174227.30639-1-kaiyang2@cs.cmu.edu Signed-off-by: Kaiyang Zhao Cc: David Rientjes Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Cc: Wei Xu Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 24 +++++++++++++++++++++--- include/linux/vmstat.h | 1 + 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ed170399179a..fe05fdb92779 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -784,6 +784,8 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); struct mem_cgroup *get_mem_cgroup_from_current(void); +struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio); + struct lruvec *folio_lruvec_lock(struct folio *folio); struct lruvec *folio_lruvec_lock_irq(struct folio *folio); struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, @@ -1028,8 +1030,8 @@ static inline void count_memcg_folio_events(struct folio *folio, count_memcg_events(memcg, idx, nr); } -static inline void count_memcg_event_mm(struct mm_struct *mm, - enum vm_event_item idx) +static inline void count_memcg_events_mm(struct mm_struct *mm, + enum vm_event_item idx, unsigned long count) { struct mem_cgroup *memcg; @@ -1039,10 +1041,16 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, rcu_read_lock(); memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); if (likely(memcg)) - count_memcg_events(memcg, idx, 1); + count_memcg_events(memcg, idx, count); rcu_read_unlock(); } +static inline void count_memcg_event_mm(struct mm_struct *mm, + enum vm_event_item idx) +{ + count_memcg_events_mm(mm, idx, 1); +} + static inline void memcg_memory_event(struct mem_cgroup *memcg, enum memcg_memory_event event) { @@ -1262,6 +1270,11 @@ static inline struct mem_cgroup *get_mem_cgroup_from_current(void) return NULL; } +static inline struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio) +{ + return NULL; +} + static inline struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css) { @@ -1484,6 +1497,11 @@ static inline void count_memcg_folio_events(struct folio *folio, { } +static inline void count_memcg_events_mm(struct mm_struct *mm, + enum vm_event_item idx, unsigned long count) +{ +} + static inline void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 9eb77c9007e6..d2761bf8ff32 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -32,6 +32,7 @@ struct reclaim_stat { unsigned nr_ref_keep; unsigned nr_unmap_fail; unsigned nr_lazyfree_fail; + unsigned nr_demoted; }; /* Stat data for system wide items */ -- cgit v1.2.3 From e98337d11bbdfa3e3f0fb99aa93e40f97549e0cd Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Tue, 13 Aug 2024 21:54:49 -0600 Subject: mm/contig_alloc: support __GFP_COMP Patch series "mm/hugetlb: alloc/free gigantic folios", v2. Use __GFP_COMP for gigantic folios can greatly reduce not only the amount of code but also the allocation and free time. Approximate LOC to mm/hugetlb.c: +60, -240 Allocate and free 500 1GB hugeTLB memory without HVO by: time echo 500 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages time echo 0 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages Before After Alloc ~13s ~10s Free ~15s <1s The above magnitude generally holds for multiple x86 and arm64 CPU models. Perf profile before: Alloc - 99.99% alloc_pool_huge_folio - __alloc_fresh_hugetlb_folio - 83.23% alloc_contig_pages_noprof - 47.46% alloc_contig_range_noprof - 20.96% isolate_freepages_range 16.10% split_page - 14.10% start_isolate_page_range - 12.02% undo_isolate_page_range Free - update_and_free_pages_bulk - 87.71% free_contig_range - 76.02% free_unref_page - 41.30% free_unref_page_commit - 32.58% free_pcppages_bulk - 24.75% __free_one_page 13.96% _raw_spin_trylock 12.27% __update_and_free_hugetlb_folio Perf profile after: Alloc - 99.99% alloc_pool_huge_folio alloc_gigantic_folio - alloc_contig_pages_noprof - 59.15% alloc_contig_range_noprof - 20.72% start_isolate_page_range 20.64% prep_new_page - 17.13% undo_isolate_page_range Free - update_and_free_pages_bulk - __folio_put - __free_pages_ok 7.46% free_tail_page_prepare - 1.97% free_one_page 1.86% __free_one_page This patch (of 3): Support __GFP_COMP in alloc_contig_range(). When the flag is set, upon success the function returns a large folio prepared by prep_new_page(), rather than a range of order-0 pages prepared by split_free_pages() (which is renamed from split_map_pages()). alloc_contig_range() can be used to allocate folios larger than MAX_PAGE_ORDER, e.g., gigantic hugeTLB folios. So on the free path, free_one_page() needs to handle that by split_large_buddy(). [akpm@linux-foundation.org: fix folio_alloc_gigantic_noprof() WARN expression, per Yu Liao] Link: https://lkml.kernel.org/r/20240814035451.773331-1-yuzhao@google.com Link: https://lkml.kernel.org/r/20240814035451.773331-2-yuzhao@google.com Signed-off-by: Yu Zhao Acked-by: Zi Yan Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Cc: Frank van der Linden Signed-off-by: Andrew Morton --- include/linux/gfp.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index f53f76e0b17e..03ba9563c6db 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -446,4 +446,27 @@ extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_ #endif void free_contig_range(unsigned long pfn, unsigned long nr_pages); +#ifdef CONFIG_CONTIG_ALLOC +static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp, + int nid, nodemask_t *node) +{ + struct page *page; + + if (WARN_ON(!order || !(gfp & __GFP_COMP))) + return NULL; + + page = alloc_contig_pages_noprof(1 << order, gfp, nid, node); + + return page ? page_folio(page) : NULL; +} +#else +static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp, + int nid, nodemask_t *node) +{ + return NULL; +} +#endif +/* This should be paired with folio_put() rather than free_contig_range(). */ +#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__)) + #endif /* __LINUX_GFP_H */ -- cgit v1.2.3 From 463586e9ff398f951a9a63d98a3b93f44434d20c Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Tue, 13 Aug 2024 21:54:50 -0600 Subject: mm/cma: add cma_{alloc,free}_folio() With alloc_contig_range() and free_contig_range() supporting large folios, CMA can allocate and free large folios too, by cma_alloc_folio() and cma_free_folio(). [yuzhao@google.com: fix WARN in cma_alloc_folio()] Link: https://lkml.kernel.org/r/Zsd0PgAQmbpR8jS6@google.com Link: https://lkml.kernel.org/r/20240814035451.773331-3-yuzhao@google.com Signed-off-by: Yu Zhao Acked-by: Zi Yan Cc: Frank van der Linden Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/cma.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/cma.h b/include/linux/cma.h index 9db877506ea8..d15b64f51336 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -52,4 +52,20 @@ extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data); extern void cma_reserve_pages_on_error(struct cma *cma); + +#ifdef CONFIG_CMA +struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp); +bool cma_free_folio(struct cma *cma, const struct folio *folio); +#else +static inline struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp) +{ + return NULL; +} + +static inline bool cma_free_folio(struct cma *cma, const struct folio *folio) +{ + return false; +} +#endif + #endif -- cgit v1.2.3 From cf54f310d0d313bce6505d010a555948b0ae0e2a Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Tue, 13 Aug 2024 21:54:51 -0600 Subject: mm/hugetlb: use __GFP_COMP for gigantic folios Use __GFP_COMP for gigantic folios to greatly reduce not only the amount of code but also the allocation and free time. LOC (approximately): +60, -240 Allocate and free 500 1GB hugeTLB memory without HVO by: time echo 500 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages time echo 0 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages Before After Alloc ~13s ~10s Free ~15s <1s The above magnitude generally holds for multiple x86 and arm64 CPU models. Link: https://lkml.kernel.org/r/20240814035451.773331-4-yuzhao@google.com Signed-off-by: Yu Zhao Reported-by: Frank van der Linden Acked-by: Zi Yan Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 3100a52ceb73..98c47c394b89 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -896,10 +896,11 @@ static inline bool hugepage_movable_supported(struct hstate *h) /* Movability of hugepages depends on migration support. */ static inline gfp_t htlb_alloc_mask(struct hstate *h) { - if (hugepage_movable_supported(h)) - return GFP_HIGHUSER_MOVABLE; - else - return GFP_HIGHUSER; + gfp_t gfp = __GFP_COMP | __GFP_NOWARN; + + gfp |= hugepage_movable_supported(h) ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER; + + return gfp; } static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) -- cgit v1.2.3 From d0b003ce97ad6518dea4b0ed70081df95de04179 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 16 Aug 2024 12:32:46 +0200 Subject: mm/rmap: use folio->_mapcount for small folios We have some cases left whereby we operate on small folios and still refer to page->_mapcount. Let's just use folio->_mapcount instead, which currently still overlays page->_mapcount, so no change. This change will make it easier to later spot any remaining users of page->_mapcount that target tail pages. Link: https://lkml.kernel.org/r/20240816103246.719209-1-david@redhat.com Signed-off-by: David Hildenbrand Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/rmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 0978c64f49d8..91b5935e8485 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -331,7 +331,7 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, switch (level) { case RMAP_LEVEL_PTE: if (!folio_test_large(folio)) { - atomic_inc(&page->_mapcount); + atomic_inc(&folio->_mapcount); break; } @@ -425,7 +425,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, if (!folio_test_large(folio)) { if (PageAnonExclusive(page)) ClearPageAnonExclusive(page); - atomic_inc(&page->_mapcount); + atomic_inc(&folio->_mapcount); break; } -- cgit v1.2.3 From 489a744e5fb16503b495ad1fa3dd1abf25b224e7 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 13 Aug 2024 00:34:35 +0200 Subject: mm: krealloc: clarify valid usage of __GFP_ZERO Properly document that if __GFP_ZERO logic is requested, callers must ensure that, starting with the initial memory allocation, every subsequent call to this API for the same memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that __GFP_ZERO is not fully honored by this API. Link: https://lkml.kernel.org/r/20240812223707.32049-2-dakr@kernel.org Signed-off-by: Danilo Krummrich Acked-by: David Rientjes Cc: Christoph Lameter Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joonsoo Kim Cc: Pekka Enberg Cc: Roman Gushchin Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/slab.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index c9cb42203183..2282e67a01c7 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -733,6 +733,16 @@ static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t siz * @new_n: new number of elements to alloc * @new_size: new size of a single member of the array * @flags: the type of memory to allocate (see kmalloc) + * + * If __GFP_ZERO logic is requested, callers must ensure that, starting with the + * initial memory allocation, every subsequent call to this API for the same + * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that + * __GFP_ZERO is not fully honored by this API. + * + * See krealloc_noprof() for further details. + * + * In any case, the contents of the object pointed to are preserved up to the + * lesser of the new and old sizes. */ static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(void *p, size_t new_n, -- cgit v1.2.3 From a759e37fb46708029c9c3c56c3b62e6f24d85cf5 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sun, 18 Aug 2024 23:01:51 +0200 Subject: err.h: add ERR_PTR_PCPU(), PTR_ERR_PCPU() and IS_ERR_PCPU() macros Add ERR_PTR_PCPU(), PTR_ERR_PCPU() and IS_ERR_PCPU() macros that operate on pointers in the percpu address space. These macros remove the need for (__force void *) function argument casts (to avoid sparse -Wcast-from-as warnings). The patch will also avoid future build errors due to pointer address space mismatch with enabled strict percpu address space checks. Link: https://lkml.kernel.org/r/20240818210235.33481-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Acked-by: Catalin Marinas Signed-off-by: Andrew Morton --- include/linux/err.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/err.h b/include/linux/err.h index b5d9bb2a2349..a4dacd745fcf 100644 --- a/include/linux/err.h +++ b/include/linux/err.h @@ -41,6 +41,9 @@ static inline void * __must_check ERR_PTR(long error) return (void *) error; } +/* Return the pointer in the percpu address space. */ +#define ERR_PTR_PCPU(error) ((void __percpu *)(unsigned long)ERR_PTR(error)) + /** * PTR_ERR - Extract the error code from an error pointer. * @ptr: An error pointer. @@ -51,6 +54,9 @@ static inline long __must_check PTR_ERR(__force const void *ptr) return (long) ptr; } +/* Read an error pointer from the percpu address space. */ +#define PTR_ERR_PCPU(ptr) (PTR_ERR((const void *)(__force const unsigned long)(ptr))) + /** * IS_ERR - Detect an error pointer. * @ptr: The pointer to check. @@ -61,6 +67,9 @@ static inline bool __must_check IS_ERR(__force const void *ptr) return IS_ERR_VALUE((unsigned long)ptr); } +/* Read an error pointer from the percpu address space. */ +#define IS_ERR_PCPU(ptr) (IS_ERR((const void *)(__force const unsigned long)(ptr))) + /** * IS_ERR_OR_NULL - Detect an error pointer or a null pointer. * @ptr: The pointer to check. -- cgit v1.2.3 From ef5f379de302884b9b7ad9b62587a942a9f0bb55 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 20 Aug 2024 14:22:10 +0200 Subject: mm: always inline _compound_head() with CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y We already force-inline page_fixed_fake_head(), page_is_fake_head() and PageTail(), however the compiler might decide that _compound_head() is not worthy to be inlined, because of page_fixed_fake_head(). The result is that, for example, PageAnonExclusive() now might involve a function call when checking PageHuge(), which performs a page_folio()->_compound_head() call. This can lead to a slight regression of the stress-ng.clone benchmark. This is not super-urgent to fix, but always inlining _compound_head() seems like the obvious thing to do for this primitive, similar to the other ones. This change restores the slight regression and a compilation with CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y shows no relevant bloat [2]: add/remove: 15/14 grow/shrink: 79/87 up/down: 12836/-13917 (-1081) ... Total: Before=32786363, After=32785282, chg -0.00% [1] https://lkml.kernel.org/r/817150f2-abf7-430f-9973-540bd6cdd26f@intel.com [2] https://lore.kernel.org/all/116e117c-2821-401d-8e62-b85cdec37f4a@redhat.com/ Link: https://lkml.kernel.org/r/20240820122210.660140-1-david@redhat.com Fixes: c0bff412e67b ("mm: allow anon exclusive check over hugetlb tail pages") Signed-off-by: David Hildenbrand Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202407301049.5051dc19-oliver.sang@intel.com Tested-by: Yin Fengwei Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 17175a328e6b..2515ae85f191 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -232,7 +232,7 @@ static __always_inline int page_is_fake_head(const struct page *page) return page_fixed_fake_head(page) != page; } -static inline unsigned long _compound_head(const struct page *page) +static __always_inline unsigned long _compound_head(const struct page *page) { unsigned long head = READ_ONCE(page->compound_head); -- cgit v1.2.3 From 0a2d82946be67e02fdf85a4010606bdc0546ba44 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 20 Aug 2024 10:26:39 +0800 Subject: mm: allow read-ahead with IOCB_NOWAIT set Readahead support for IOCB_NOWAIT was introduced in commit 2e85abf053b9 ("mm: allow read-ahead with IOCB_NOWAIT set"). However, this implementation broke the semantics of IOCB_NOWAIT by potentially causing it to wait on I/O during memory reclamation. This behavior was later modified in commit efa8480a8316 ("fs: RWF_NOWAIT should imply IOCB_NOIO"). To resolve the blocking issue during memory reclamation, we can use memalloc_noio_{save,restore} to ensure non-blocking behavior. This change restores the original functionality, allowing preadv2(IOCB_NOWAIT) to trigger readahead if the file content is not present in the page cache. While this process may trigger direct memory reclamation, the __GFP_NORETRY flag is set in the readahead GFP flags, ensuring it won't block. A use case for this change is when we want to trigger readahead in the preadv2(2) syscall if the file cache is absent, but without waiting for certain filesystem locks, like xfs_ilock. A simple example is as follows: retry: if (preadv2(fd, iovec, cnt, offset, RWF_NOWAIT) < 0) { do_other_work(); goto retry; } Link: https://lore.gnuweeb.org/io-uring/20200624164127.GP21350@casper.infradead.org/ Link: https://lkml.kernel.org/r/20240820022639.89562-1-laoar.shao@gmail.com Signed-off-by: Yafang Shao Cc: Jens Axboe Cc: Matthew Wilcox Cc: Dave Chinner Cc: Jan Kara Cc: Christian Brauner Signed-off-by: Andrew Morton --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6ca11e241a24..8610242ec2ab 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3461,7 +3461,6 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags, if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; - kiocb_flags |= IOCB_NOIO; } if (flags & RWF_ATOMIC) { if (rw_type != WRITE) -- cgit v1.2.3 From e27ad6560e4b5993315b56d6884ca5a4652468f4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 18:39:09 +0100 Subject: printf: remove %pGt support Patch series "Increase the number of bits available in page_type". Kent wants more than 16 bits in page_type, so I resurrected this old patch and expanded it a bit. It's a bit more efficient than our current scheme (1 4-byte insn vs 3 insns of 13 bytes total) to test a single page type. This patch (of 4): An upcoming patch will convert page type from being a bitfield to a single byte, so we will not be able to use %pG to print the page type any more. The printing of the symbolic name will be restored in that patch. Link: https://lkml.kernel.org/r/20240821173914.2270383-1-willy@infradead.org Link: https://lkml.kernel.org/r/20240821173914.2270383-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Kent Overstreet Signed-off-by: Andrew Morton --- include/trace/events/mmflags.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index b5c4da370a50..c151cc21d367 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -130,16 +130,6 @@ IF_HAVE_PG_ARCH_X(arch_3) __def_pageflag_names \ ) : "none" -#define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) } - -#define __def_pagetype_names \ - DEF_PAGETYPE_NAME(slab), \ - DEF_PAGETYPE_NAME(hugetlb), \ - DEF_PAGETYPE_NAME(offline), \ - DEF_PAGETYPE_NAME(guard), \ - DEF_PAGETYPE_NAME(table), \ - DEF_PAGETYPE_NAME(buddy) - #if defined(CONFIG_X86) #define __VM_ARCH_SPECIFIC_1 {VM_PAT, "pat" } #elif defined(CONFIG_PPC) -- cgit v1.2.3 From e880034cf7182aa35962bd30dfc9fa60476d56a4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 18:39:10 +0100 Subject: mm: introduce page_mapcount_is_type() Resolve the awkward "and add one to this opaque constant" test into a self-documenting inline function. Link: https://lkml.kernel.org/r/20240821173914.2270383-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Kent Overstreet Signed-off-by: Andrew Morton --- include/linux/mm.h | 3 +-- include/linux/page-flags.h | 12 +++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0fde51c0532f..6f6053d7ea6f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1228,8 +1228,7 @@ static inline int folio_mapcount(const struct folio *folio) if (likely(!folio_test_large(folio))) { mapcount = atomic_read(&folio->_mapcount) + 1; - /* Handle page_has_type() pages */ - if (mapcount < PAGE_MAPCOUNT_RESERVE + 1) + if (page_mapcount_is_type(mapcount)) mapcount = 0; return mapcount; } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 2515ae85f191..0b5484fdbca1 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -956,12 +956,18 @@ enum pagetype { #define folio_test_type(folio, flag) \ ((READ_ONCE(folio->page.page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) -static inline int page_type_has_type(unsigned int page_type) +static inline bool page_type_has_type(int page_type) { - return (int)page_type < PAGE_MAPCOUNT_RESERVE; + return page_type < PAGE_MAPCOUNT_RESERVE; } -static inline int page_has_type(const struct page *page) +/* This takes a mapcount which is one more than page->_mapcount */ +static inline bool page_mapcount_is_type(unsigned int mapcount) +{ + return page_type_has_type(mapcount - 1); +} + +static inline bool page_has_type(const struct page *page) { return page_type_has_type(READ_ONCE(page->page_type)); } -- cgit v1.2.3 From 4ffca5a96678c9fe1a39ec1e9c8fd326b57ea8a7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 18:39:11 +0100 Subject: mm: support only one page_type per page By using a few values in the top byte, users of page_type can store up to 24 bits of additional data in page_type. It also reduces the code size as (with replacement of READ_ONCE() with data_race()), the kernel can check just a single byte. eg: ffffffff811e3a79: 8b 47 30 mov 0x30(%rdi),%eax ffffffff811e3a7c: 55 push %rbp ffffffff811e3a7d: 48 89 e5 mov %rsp,%rbp ffffffff811e3a80: 25 00 00 00 82 and $0x82000000,%eax ffffffff811e3a85: 3d 00 00 00 80 cmp $0x80000000,%eax ffffffff811e3a8a: 74 4d je ffffffff811e3ad9 becomes: ffffffff811e3a69: 80 7f 33 f5 cmpb $0xf5,0x33(%rdi) ffffffff811e3a6d: 55 push %rbp ffffffff811e3a6e: 48 89 e5 mov %rsp,%rbp ffffffff811e3a71: 74 4d je ffffffff811e3ac0 replacing three instructions with one. [wangkefeng.wang@huawei.com: fix ubsan warnings] Link: https://lkml.kernel.org/r/2d19c48a-c550-4345-bf36-d05cd303c5de@huawei.com Link: https://lkml.kernel.org/r/20240821173914.2270383-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Kent Overstreet Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 68 +++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 0b5484fdbca1..7a1b2948b075 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -923,42 +923,29 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif /* - * For pages that are never mapped to userspace, - * page_type may be used. Because it is initialised to -1, we invert the - * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and - * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and - * low bits so that an underflow or overflow of _mapcount won't be - * mistaken for a page type value. + * For pages that do not use mapcount, page_type may be used. + * The low 24 bits of pagetype may be used for your own purposes, as long + * as you are careful to not affect the top 8 bits. The low bits of + * pagetype will be overwritten when you clear the page_type from the page. */ - enum pagetype { - PG_buddy = 0x40000000, - PG_offline = 0x20000000, - PG_table = 0x10000000, - PG_guard = 0x08000000, - PG_hugetlb = 0x04000000, - PG_slab = 0x02000000, - PG_zsmalloc = 0x01000000, - PG_unaccepted = 0x00800000, - - PAGE_TYPE_BASE = 0x80000000, - - /* - * Reserve 0xffff0000 - 0xfffffffe to catch _mapcount underflows and - * allow owners that set a type to reuse the lower 16 bit for their own - * purposes. - */ - PAGE_MAPCOUNT_RESERVE = ~0x0000ffff, + /* 0x00-0x7f are positive numbers, ie mapcount */ + /* Reserve 0x80-0xef for mapcount overflow. */ + PGTY_buddy = 0xf0, + PGTY_offline = 0xf1, + PGTY_table = 0xf2, + PGTY_guard = 0xf3, + PGTY_hugetlb = 0xf4, + PGTY_slab = 0xf5, + PGTY_zsmalloc = 0xf6, + PGTY_unaccepted = 0xf7, + + PGTY_mapcount_underflow = 0xff }; -#define PageType(page, flag) \ - ((READ_ONCE(page->page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) -#define folio_test_type(folio, flag) \ - ((READ_ONCE(folio->page.page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) - static inline bool page_type_has_type(int page_type) { - return page_type < PAGE_MAPCOUNT_RESERVE; + return page_type < (PGTY_mapcount_underflow << 24); } /* This takes a mapcount which is one more than page->_mapcount */ @@ -969,40 +956,41 @@ static inline bool page_mapcount_is_type(unsigned int mapcount) static inline bool page_has_type(const struct page *page) { - return page_type_has_type(READ_ONCE(page->page_type)); + return page_mapcount_is_type(data_race(page->page_type)); } #define FOLIO_TYPE_OPS(lname, fname) \ -static __always_inline bool folio_test_##fname(const struct folio *folio)\ +static __always_inline bool folio_test_##fname(const struct folio *folio) \ { \ - return folio_test_type(folio, PG_##lname); \ + return data_race(folio->page.page_type >> 24) == PGTY_##lname; \ } \ static __always_inline void __folio_set_##fname(struct folio *folio) \ { \ - VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ - folio->page.page_type &= ~PG_##lname; \ + VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX, \ + folio); \ + folio->page.page_type = (unsigned int)PGTY_##lname << 24; \ } \ static __always_inline void __folio_clear_##fname(struct folio *folio) \ { \ VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ - folio->page.page_type |= PG_##lname; \ + folio->page.page_type = UINT_MAX; \ } #define PAGE_TYPE_OPS(uname, lname, fname) \ FOLIO_TYPE_OPS(lname, fname) \ static __always_inline int Page##uname(const struct page *page) \ { \ - return PageType(page, PG_##lname); \ + return data_race(page->page_type >> 24) == PGTY_##lname; \ } \ static __always_inline void __SetPage##uname(struct page *page) \ { \ - VM_BUG_ON_PAGE(!PageType(page, 0), page); \ - page->page_type &= ~PG_##lname; \ + VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page); \ + page->page_type = (unsigned int)PGTY_##lname << 24; \ } \ static __always_inline void __ClearPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!Page##uname(page), page); \ - page->page_type |= PG_##lname; \ + page->page_type = UINT_MAX; \ } /* -- cgit v1.2.3 From bf03c806993091d8fb2fc0e98f07a8ef251c78f3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 20:34:34 +0100 Subject: mm: remove PageActive Patch series "Simplify the page flags a little". In the course of our folio conversions, we have made many page flags only used on folios, so we can now remove the page-based accessors. This should cut down compile time a little, and prevent new users from cropping up. There is more that could be done in this area, but it would produce merge conflicts, so I'll sit on those patches until next merge window. We now have line of sight to removing PG_private_2 and PG_private. This patch (of 10): This flag is now only used on folios, so we can remove all the page accessors. [akpm@linux-foundation.org: fix arch/powerpc/mm/pgtable-frag.c] Link: https://lkml.kernel.org/r/20240821193445.2294269-1-willy@infradead.org Link: https://lkml.kernel.org/r/20240821193445.2294269-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 7a1b2948b075..34347bee1217 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -510,8 +510,9 @@ PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD) __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD) PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD) TESTCLEARFLAG(LRU, lru, PF_HEAD) -PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) - TESTCLEARFLAG(Active, active, PF_HEAD) +FOLIO_FLAG(active, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(active, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(active, FOLIO_HEAD_PAGE) PAGEFLAG(Workingset, workingset, PF_HEAD) TESTCLEARFLAG(Workingset, workingset, PF_HEAD) PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */ -- cgit v1.2.3 From 0b7582803649e0e58e5f8c9a4ede362777bc7eec Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 20:34:35 +0100 Subject: mm: remove PageSwapBacked This flag is now only used on folios, so we can remove all the page accessors. Link: https://lkml.kernel.org/r/20240821193445.2294269-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 34347bee1217..cebb3dc0fd0c 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -528,9 +528,9 @@ PAGEFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND) PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) __CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) __SETPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) -PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) - __CLEARPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) - __SETPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) +FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(swapbacked, FOLIO_HEAD_PAGE) + __FOLIO_SET_FLAG(swapbacked, FOLIO_HEAD_PAGE) /* * Private page markings that may be used by the filesystem that owns the page -- cgit v1.2.3 From 6f394ee9ddb4bd1879e42c9c8648a37f650bea99 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 20:34:36 +0100 Subject: mm: remove PageReadahead This flag is now only used on folios, so we can remove all the page accessors. Link: https://lkml.kernel.org/r/20240821193445.2294269-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index cebb3dc0fd0c..71444223d630 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -553,8 +553,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL) -PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND) - TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND) +FOLIO_FLAG(readahead, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(readahead, FOLIO_HEAD_PAGE) #ifdef CONFIG_HIGHMEM /* -- cgit v1.2.3 From 32f51ead3d7771cdec29f75e08d50a76d2c6253d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 20:34:37 +0100 Subject: mm: remove PageSwapCache This flag is now only used on folios, so we can remove all the page accessors and reword the comments that refer to them. Link: https://lkml.kernel.org/r/20240821193445.2294269-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 2 +- include/linux/page-flags.h | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2419e60c9a7f..6e3bdf8e38bc 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -109,7 +109,7 @@ struct page { /** * @private: Mapping-private opaque data. * Usually used for buffer_heads if PagePrivate. - * Used for swp_entry_t if PageSwapCache. + * Used for swp_entry_t if swapcache flag set. * Indicates order in the buddy system if PageBuddy. */ unsigned long private; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 71444223d630..b31d2c50b6f0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -574,15 +574,10 @@ static __always_inline bool folio_test_swapcache(const struct folio *folio) test_bit(PG_swapcache, const_folio_flags(folio, 0)); } -static __always_inline bool PageSwapCache(const struct page *page) -{ - return folio_test_swapcache(page_folio(page)); -} - -SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) -CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) +FOLIO_SET_FLAG(swapcache, FOLIO_HEAD_PAGE) +FOLIO_CLEAR_FLAG(swapcache, FOLIO_HEAD_PAGE) #else -PAGEFLAG_FALSE(SwapCache, swapcache) +FOLIO_FLAG_FALSE(swapcache) #endif PAGEFLAG(Unevictable, unevictable, PF_HEAD) -- cgit v1.2.3 From cb29e7941d5d57576392a52c08077ef327ec8e17 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 20:34:38 +0100 Subject: mm: remove PageUnevictable There is only one caller of PageUnevictable() left; convert it to call folio_test_unevictable() and remove all the page accessors. Link: https://lkml.kernel.org/r/20240821193445.2294269-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b31d2c50b6f0..2150502195d5 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -580,9 +580,9 @@ FOLIO_CLEAR_FLAG(swapcache, FOLIO_HEAD_PAGE) FOLIO_FLAG_FALSE(swapcache) #endif -PAGEFLAG(Unevictable, unevictable, PF_HEAD) - __CLEARPAGEFLAG(Unevictable, unevictable, PF_HEAD) - TESTCLEARFLAG(Unevictable, unevictable, PF_HEAD) +FOLIO_FLAG(unevictable, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE) #ifdef CONFIG_MMU PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) -- cgit v1.2.3 From 99f86bbda31790f2ca0e365f02650585536929ab Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 20:34:39 +0100 Subject: mm: remove PageMlocked This flag is now only used on folios, so we can remove all the page accessors. Link: https://lkml.kernel.org/r/20240821193445.2294269-7-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 2150502195d5..08cce16c82aa 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -585,12 +585,15 @@ FOLIO_FLAG(unevictable, FOLIO_HEAD_PAGE) FOLIO_TEST_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE) #ifdef CONFIG_MMU -PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) - __CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) - TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL) +FOLIO_FLAG(mlocked, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(mlocked, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(mlocked, FOLIO_HEAD_PAGE) + FOLIO_TEST_SET_FLAG(mlocked, FOLIO_HEAD_PAGE) #else -PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked) - TESTSCFLAG_FALSE(Mlocked, mlocked) +FOLIO_FLAG_FALSE(mlocked) + __FOLIO_CLEAR_FLAG_NOOP(mlocked) + FOLIO_TEST_CLEAR_FLAG_FALSE(mlocked) + FOLIO_TEST_SET_FLAG_FALSE(mlocked) #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED -- cgit v1.2.3 From 3026bc1e82b695d69cde21712512922abe74aab9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 20:34:40 +0100 Subject: mm: remove PageOwnerPriv1 While there are many aliases for this flag, nobody actually uses the *PageOwnerPriv1() nor folio_*_owner_priv_1() accessors. Remove them. Link: https://lkml.kernel.org/r/20240821193445.2294269-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 08cce16c82aa..458d2dc5124d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -539,8 +539,6 @@ FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE) */ PAGEFLAG(Private, private, PF_ANY) PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY) -PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY) - TESTCLEARFLAG(OwnerPriv1, owner_priv_1, PF_ANY) /* * Only test-and-set exist for PG_writeback. The unconditional operators are -- cgit v1.2.3 From 6dc151388e44957d471633ce70d72e3d27ae836d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 20:34:41 +0100 Subject: mm: remove page_has_private() This function has no more callers, except folio_has_private(). Combine the two functions. Link: https://lkml.kernel.org/r/20240821193445.2294269-9-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 458d2dc5124d..017205048cab 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -1175,20 +1175,15 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) /** - * page_has_private - Determine if page has private stuff - * @page: The page to be checked + * folio_has_private - Determine if folio has private stuff + * @folio: The folio to be checked * - * Determine if a page has private stuff, indicating that release routines + * Determine if a folio has private stuff, indicating that release routines * should be invoked upon it. */ -static inline int page_has_private(const struct page *page) +static inline int folio_has_private(const struct folio *folio) { - return !!(page->flags & PAGE_FLAGS_PRIVATE); -} - -static inline bool folio_has_private(const struct folio *folio) -{ - return page_has_private(&folio->page); + return !!(folio->flags & PAGE_FLAGS_PRIVATE); } #undef PF_ANY -- cgit v1.2.3 From 02e1960aafac33721401dcd92e915325fdb524b2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 20:34:42 +0100 Subject: mm: rename PG_mappedtodisk to PG_owner_2 This flag has similar constraints to PG_owner_priv_1 -- it is ignored by core code, and is entirely for the use of the code which allocated the folio. Since the pagecache does not use it, individual filesystems can use it. The bufferhead code does use it, so filesystems which use the buffer cache must not use it for another purpose. Link: https://lkml.kernel.org/r/20240821193445.2294269-10-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/kernel-page-flags.h | 2 +- include/linux/page-flags.h | 24 ++++++++++++++++-------- include/trace/events/mmflags.h | 2 +- 3 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h index 859f4b0c1b2b..7c587a711be1 100644 --- a/include/linux/kernel-page-flags.h +++ b/include/linux/kernel-page-flags.h @@ -10,7 +10,7 @@ */ #define KPF_RESERVED 32 #define KPF_MLOCKED 33 -#define KPF_MAPPEDTODISK 34 +#define KPF_OWNER_2 34 #define KPF_PRIVATE 35 #define KPF_PRIVATE_2 36 #define KPF_OWNER_PRIVATE 37 diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 017205048cab..1ff3d172c22c 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -101,12 +101,12 @@ enum pageflags { PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_active, PG_workingset, - PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ + PG_owner_priv_1, /* Owner use. If pagecache, fs may use */ + PG_owner_2, /* Owner use. If pagecache, fs may use */ PG_arch_1, PG_reserved, PG_private, /* If pagecache, has fs-private data */ PG_private_2, /* If pagecache, has fs aux data */ - PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ PG_swapbacked, /* Page is backed by RAM/swap */ PG_unevictable, /* Page is "unevictable" */ @@ -131,6 +131,11 @@ enum pageflags { PG_readahead = PG_reclaim, + /* Anonymous memory (and shmem) */ + PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */ + /* Some filesystems */ + PG_checked = PG_owner_priv_1, + /* * Depending on the way an anonymous folio can be mapped into a page * table (e.g., single PMD/PUD/CONT of the head page vs. PTE-mapped @@ -138,13 +143,13 @@ enum pageflags { * tail pages of an anonymous folio. For now, we only expect it to be * set on tail pages for PTE-mapped THP. */ - PG_anon_exclusive = PG_mappedtodisk, - - /* Filesystems */ - PG_checked = PG_owner_priv_1, + PG_anon_exclusive = PG_owner_2, - /* SwapBacked */ - PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */ + /* + * Set if all buffer heads in the folio are mapped. + * Filesystems which do not use BHs can use it for their own purpose. + */ + PG_mappedtodisk = PG_owner_2, /* Two page bits are conscripted by FS-Cache to maintain local caching * state. These bits are set on pages belonging to the netfs's inodes @@ -540,6 +545,9 @@ FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE) PAGEFLAG(Private, private, PF_ANY) PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY) +/* owner_2 can be set on tail pages for anon memory */ +FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE) + /* * Only test-and-set exist for PG_writeback. The unconditional operators are * risky: they bypass page accounting. diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index c151cc21d367..3b51558cdc9b 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -107,13 +107,13 @@ DEF_PAGEFLAG_NAME(active), \ DEF_PAGEFLAG_NAME(workingset), \ DEF_PAGEFLAG_NAME(owner_priv_1), \ + DEF_PAGEFLAG_NAME(owner_2), \ DEF_PAGEFLAG_NAME(arch_1), \ DEF_PAGEFLAG_NAME(reserved), \ DEF_PAGEFLAG_NAME(private), \ DEF_PAGEFLAG_NAME(private_2), \ DEF_PAGEFLAG_NAME(writeback), \ DEF_PAGEFLAG_NAME(head), \ - DEF_PAGEFLAG_NAME(mappedtodisk), \ DEF_PAGEFLAG_NAME(reclaim), \ DEF_PAGEFLAG_NAME(swapbacked), \ DEF_PAGEFLAG_NAME(unevictable) \ -- cgit v1.2.3 From 7a87225ae2c6c317c7b80cf599e5cf0eee699196 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 20:34:43 +0100 Subject: x86: remove PG_uncached Convert x86 to use PG_arch_2 instead of PG_uncached and remove PG_uncached. Link: https://lkml.kernel.org/r/20240821193445.2294269-11-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/kernel-page-flags.h | 1 - include/linux/page-flags.h | 13 +++---------- include/trace/events/mmflags.h | 23 +++++++++++------------ 3 files changed, 14 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h index 7c587a711be1..196778a087c4 100644 --- a/include/linux/kernel-page-flags.h +++ b/include/linux/kernel-page-flags.h @@ -15,7 +15,6 @@ #define KPF_PRIVATE_2 36 #define KPF_OWNER_PRIVATE 37 #define KPF_ARCH 38 -#define KPF_UNCACHED 39 #define KPF_SOFTDIRTY 40 #define KPF_ARCH_2 41 #define KPF_ARCH_3 42 diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1ff3d172c22c..2175ebceb41c 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -113,9 +113,6 @@ enum pageflags { #ifdef CONFIG_MMU PG_mlocked, /* Page is vma mlocked */ #endif -#ifdef CONFIG_ARCH_USES_PG_UNCACHED - PG_uncached, /* Page has been mapped as uncached */ -#endif #ifdef CONFIG_MEMORY_FAILURE PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif @@ -123,8 +120,10 @@ enum pageflags { PG_young, PG_idle, #endif -#ifdef CONFIG_ARCH_USES_PG_ARCH_X +#ifdef CONFIG_ARCH_USES_PG_ARCH_2 PG_arch_2, +#endif +#ifdef CONFIG_ARCH_USES_PG_ARCH_3 PG_arch_3, #endif __NR_PAGEFLAGS, @@ -602,12 +601,6 @@ FOLIO_FLAG_FALSE(mlocked) FOLIO_TEST_SET_FLAG_FALSE(mlocked) #endif -#ifdef CONFIG_ARCH_USES_PG_UNCACHED -PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND) -#else -PAGEFLAG_FALSE(Uncached, uncached) -#endif - #ifdef CONFIG_MEMORY_FAILURE PAGEFLAG(HWPoison, hwpoison, PF_ANY) TESTSCFLAG(HWPoison, hwpoison, PF_ANY) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 3b51558cdc9b..58f2699331b6 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -71,12 +71,6 @@ #define IF_HAVE_PG_MLOCK(_name) #endif -#ifdef CONFIG_ARCH_USES_PG_UNCACHED -#define IF_HAVE_PG_UNCACHED(_name) ,{1UL << PG_##_name, __stringify(_name)} -#else -#define IF_HAVE_PG_UNCACHED(_name) -#endif - #ifdef CONFIG_MEMORY_FAILURE #define IF_HAVE_PG_HWPOISON(_name) ,{1UL << PG_##_name, __stringify(_name)} #else @@ -89,10 +83,16 @@ #define IF_HAVE_PG_IDLE(_name) #endif -#ifdef CONFIG_ARCH_USES_PG_ARCH_X -#define IF_HAVE_PG_ARCH_X(_name) ,{1UL << PG_##_name, __stringify(_name)} +#ifdef CONFIG_ARCH_USES_PG_ARCH_2 +#define IF_HAVE_PG_ARCH_2(_name) ,{1UL << PG_##_name, __stringify(_name)} +#else +#define IF_HAVE_PG_ARCH_2(_name) +#endif + +#ifdef CONFIG_ARCH_USES_PG_ARCH_3 +#define IF_HAVE_PG_ARCH_3(_name) ,{1UL << PG_##_name, __stringify(_name)} #else -#define IF_HAVE_PG_ARCH_X(_name) +#define IF_HAVE_PG_ARCH_3(_name) #endif #define DEF_PAGEFLAG_NAME(_name) { 1UL << PG_##_name, __stringify(_name) } @@ -118,12 +118,11 @@ DEF_PAGEFLAG_NAME(swapbacked), \ DEF_PAGEFLAG_NAME(unevictable) \ IF_HAVE_PG_MLOCK(mlocked) \ -IF_HAVE_PG_UNCACHED(uncached) \ IF_HAVE_PG_HWPOISON(hwpoison) \ IF_HAVE_PG_IDLE(idle) \ IF_HAVE_PG_IDLE(young) \ -IF_HAVE_PG_ARCH_X(arch_2) \ -IF_HAVE_PG_ARCH_X(arch_3) +IF_HAVE_PG_ARCH_2(arch_2) \ +IF_HAVE_PG_ARCH_3(arch_3) #define show_page_flags(flags) \ (flags) ? __print_flags(flags, "|", \ -- cgit v1.2.3 From 0ca0c24e3211586d44a31b3c4767fe3f43a008a7 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Fri, 23 Aug 2024 20:04:39 +0100 Subject: mm: store zero pages to be swapped out in a bitmap Patch series "mm: store zero pages to be swapped out in a bitmap", v8. As shown in the patch series that introduced the zswap same-filled optimization [1], 10-20% of the pages stored in zswap are same-filled. This is also observed across Meta's server fleet. By using VM counters in swap_writepage (not included in this patchseries) it was found that less than 1% of the same-filled pages to be swapped out are non-zero pages. For conventional swap setup (without zswap), rather than reading/writing these pages to flash resulting in increased I/O and flash wear, a bitmap can be used to mark these pages as zero at write time, and the pages can be filled at read time if the bit corresponding to the page is set. When using zswap with swap, this also means that a zswap_entry does not need to be allocated for zero filled pages resulting in memory savings which would offset the memory used for the bitmap. A similar attempt was made earlier in [2] where zswap would only track zero-filled pages instead of same-filled. This patchseries adds zero-filled pages optimization to swap (hence it can be used even if zswap is disabled) and removes the same-filled code from zswap (as only 1% of the same-filled pages are non-zero), simplifying code. [1] https://lore.kernel.org/all/20171018104832epcms5p1b2232e2236258de3d03d1344dde9fce0@epcms5p1/ [2] https://lore.kernel.org/lkml/20240325235018.2028408-1-yosryahmed@google.com/ This patch (of 2): Approximately 10-20% of pages to be swapped out are zero pages [1]. Rather than reading/writing these pages to flash resulting in increased I/O and flash wear, a bitmap can be used to mark these pages as zero at write time, and the pages can be filled at read time if the bit corresponding to the page is set. With this patch, NVMe writes in Meta server fleet decreased by almost 10% with conventional swap setup (zswap disabled). [1] https://lore.kernel.org/all/20171018104832epcms5p1b2232e2236258de3d03d1344dde9fce0@epcms5p1/ Link: https://lkml.kernel.org/r/20240823190545.979059-1-usamaarif642@gmail.com Link: https://lkml.kernel.org/r/20240823190545.979059-2-usamaarif642@gmail.com Signed-off-by: Usama Arif Reviewed-by: Chengming Zhou Reviewed-by: Yosry Ahmed Reviewed-by: Nhat Pham Acked-by: Johannes Weiner Cc: David Hildenbrand Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Cc: Shakeel Butt Cc: Usama Arif Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/linux/swap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 248db1dd7812..ca533b478c21 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -296,6 +296,7 @@ struct swap_info_struct { signed char type; /* strange name for an index */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ + unsigned long *zeromap; /* kvmalloc'ed bitmap to track zero pages */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ struct list_head free_clusters; /* free clusters list */ struct list_head full_clusters; /* full clusters list */ -- cgit v1.2.3 From 63fc66f5b6b18f39269a66cf34d8cb7a24fbfe88 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 30 Aug 2024 00:00:58 -0400 Subject: ipc/shm, mm: drop do_vma_munmap() The do_vma_munmap() wrapper existed for callers that didn't have a vma iterator and needed to check the vma mseal status prior to calling the underlying munmap(). All callers now use a vma iterator and since the mseal check has been moved to do_vmi_align_munmap() and the vmas are aligned, this function can just be called instead. do_vmi_align_munmap() can no longer be static as ipc/shm is using it and it is exported via the mm.h header. Link: https://lkml.kernel.org/r/20240830040101.822209-19-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Reviewed-by: Lorenzo Stoakes Cc: Bert Karwatzki Cc: Jeff Xu Cc: Jiri Olsa Cc: Kees Cook Cc: Lorenzo Stoakes Cc: Mark Brown Cc: Matthew Wilcox Cc: "Paul E. McKenney" Cc: Paul Moore Cc: Sidhartha Kumar Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6f6053d7ea6f..b0ff06d18c71 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3287,14 +3287,14 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr, extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, bool unlock); +int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, + struct mm_struct *mm, unsigned long start, + unsigned long end, struct list_head *uf, bool unlock); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); #ifdef CONFIG_MMU -extern int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, - unsigned long start, unsigned long end, - struct list_head *uf, bool unlock); extern int __mm_populate(unsigned long addr, unsigned long len, int ignore_errors); static inline void mm_populate(unsigned long addr, unsigned long len) -- cgit v1.2.3 From f1264e9531b0fbadf88e0b9c8143c546343b481c Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 27 Aug 2024 19:47:27 +0800 Subject: mm: migrate: add isolate_folio_to_list() Add isolate_folio_to_list() helper to try to isolate HugeTLB, no-LRU movable and LRU folios to a list, which will be reused by do_migrate_range() from memory hotplug soon, also drop the mf_isolate_folio() since we could directly use new helper in the soft_offline_in_use_page(). Link: https://lkml.kernel.org/r/20240827114728.3212578-5-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: David Hildenbrand Acked-by: Miaohe Lin Tested-by: Miaohe Lin Cc: Dan Carpenter Cc: Jonathan Cameron Cc: Naoya Horiguchi Cc: Oscar Salvador Signed-off-by: Andrew Morton --- include/linux/migrate.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 644be30b69c8..002e49b2ebd9 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -70,6 +70,7 @@ int migrate_pages(struct list_head *l, new_folio_t new, free_folio_t free, unsigned int *ret_succeeded); struct folio *alloc_migration_target(struct folio *src, unsigned long private); bool isolate_movable_page(struct page *page, isolate_mode_t mode); +bool isolate_folio_to_list(struct folio *folio, struct list_head *list); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); @@ -91,6 +92,8 @@ static inline struct folio *alloc_migration_target(struct folio *src, { return NULL; } static inline bool isolate_movable_page(struct page *page, isolate_mode_t mode) { return false; } +static inline bool isolate_folio_to_list(struct folio *folio, struct list_head *list) + { return false; } static inline int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src) -- cgit v1.2.3 From d29e741cad3f8f41df1834bf74df79380c1c6c6d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 19 Aug 2024 17:17:04 +0200 Subject: gpio: davinci: drop platform data support There are no more any board files that use the platform data for gpio-davinci. We can remove the header defining it and port the code to no longer store any context in pdata. Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240819151705.37258-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- include/linux/platform_data/gpio-davinci.h | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 include/linux/platform_data/gpio-davinci.h (limited to 'include') diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h deleted file mode 100644 index b82e44662efe..000000000000 --- a/include/linux/platform_data/gpio-davinci.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * DaVinci GPIO Platform Related Defines - * - * Copyright (C) 2013 Texas Instruments Incorporated - https://www.ti.com/ - */ - -#ifndef __DAVINCI_GPIO_PLATFORM_H -#define __DAVINCI_GPIO_PLATFORM_H - -struct davinci_gpio_platform_data { - bool no_auto_base; - u32 base; - u32 ngpio; - u32 gpio_unbanked; -}; - -/* Convert GPIO signal to GPIO pin number */ -#define GPIO_TO_PIN(bank, gpio) (16 * (bank) + (gpio)) - -#endif -- cgit v1.2.3 From 8b1451f2f723f845c05b8bad3d4c45de284338b5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 3 Sep 2024 21:54:28 -1000 Subject: sched_ext: Replace SCX_TASK_BAL_KEEP with SCX_RQ_BAL_KEEP SCX_TASK_BAL_KEEP is used by balance_one() to tell pick_next_task_scx() to keep running the current task. It's not really a task property. Replace it with SCX_RQ_BAL_KEEP which resides in rq->scx.flags and is a better fit for the usage. Also, the existing clearing rule is unnecessarily strict and makes it difficult to use with core-sched. Just clear it on entry to balance_one(). Signed-off-by: Tejun Heo --- include/linux/sched/ext.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 69f68e2121a8..db2a266113ac 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -71,7 +71,6 @@ struct scx_dispatch_q { /* scx_entity.flags */ enum scx_ent_flags { SCX_TASK_QUEUED = 1 << 0, /* on ext runqueue */ - SCX_TASK_BAL_KEEP = 1 << 1, /* balance decided to keep current */ SCX_TASK_RESET_RUNNABLE_AT = 1 << 2, /* runnable_at should be reset */ SCX_TASK_DEQD_FOR_SLEEP = 1 << 3, /* last dequeue was for SLEEP */ -- cgit v1.2.3 From 47f218d108950984b24af81f66356ceda380eb74 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:17 -0300 Subject: iommu/amd: Store the nid in io_pgtable_cfg instead of the domain We already have memory in the union here that is being wasted in AMD's case, use it to store the nid. Putting the nid here further isolates the io_pgtable code from the struct protection_domain. Fixup protection_domain_alloc so that the NID from the device is provided, at this point dev is never NULL for AMD so this will now allocate the first table pointer on the correct NUMA node. Signed-off-by: Jason Gunthorpe Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/8-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/io-pgtable.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index f9a81761bfce..b1ecfc3cd5bc 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -171,6 +171,10 @@ struct io_pgtable_cfg { u64 ttbr[4]; u32 n_ttbrs; } apple_dart_cfg; + + struct { + int nid; + } amd; }; }; -- cgit v1.2.3 From b6ecc662037694488bfff7c9fd21c405df8411f2 Mon Sep 17 00:00:00 2001 From: Souradeep Chakrabarti Date: Mon, 2 Sep 2024 05:43:47 -0700 Subject: net: mana: Fix error handling in mana_create_txq/rxq's NAPI cleanup Currently napi_disable() gets called during rxq and txq cleanup, even before napi is enabled and hrtimer is initialized. It causes kernel panic. ? page_fault_oops+0x136/0x2b0 ? page_counter_cancel+0x2e/0x80 ? do_user_addr_fault+0x2f2/0x640 ? refill_obj_stock+0xc4/0x110 ? exc_page_fault+0x71/0x160 ? asm_exc_page_fault+0x27/0x30 ? __mmdrop+0x10/0x180 ? __mmdrop+0xec/0x180 ? hrtimer_active+0xd/0x50 hrtimer_try_to_cancel+0x2c/0xf0 hrtimer_cancel+0x15/0x30 napi_disable+0x65/0x90 mana_destroy_rxq+0x4c/0x2f0 mana_create_rxq.isra.0+0x56c/0x6d0 ? mana_uncfg_vport+0x50/0x50 mana_alloc_queues+0x21b/0x320 ? skb_dequeue+0x5f/0x80 Cc: stable@vger.kernel.org Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ") Signed-off-by: Souradeep Chakrabarti Reviewed-by: Haiyang Zhang Reviewed-by: Shradha Gupta Signed-off-by: David S. Miller --- include/net/mana/mana.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 7caa334f4888..b8a6c7504ee1 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -98,6 +98,8 @@ struct mana_txq { atomic_t pending_sends; + bool napi_initialized; + struct mana_stats_tx stats; }; -- cgit v1.2.3 From 9f82f15ddfdd60bb9820f09737333b2345e22ab3 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 22 Aug 2024 16:10:46 +0100 Subject: mm: use ARCH_PKEY_BITS to define VM_PKEY_BITN Use the new CONFIG_ARCH_PKEY_BITS to simplify setting these bits for different architectures. Signed-off-by: Joey Gouly Cc: Andrew Morton Cc: linux-fsdevel@vger.kernel.org Cc: linux-mm@kvack.org Acked-by: Dave Hansen Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20240822151113.1479789-4-joey.gouly@arm.com Signed-off-by: Will Deacon --- include/linux/mm.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c4b238a20b76..fb6ccd93f589 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -330,12 +330,16 @@ extern unsigned int kobjsize(const void *objp); #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ #ifdef CONFIG_ARCH_HAS_PKEYS -# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0 -# define VM_PKEY_BIT0 VM_HIGH_ARCH_0 /* A protection key is a 4-bit value */ -# define VM_PKEY_BIT1 VM_HIGH_ARCH_1 /* on x86 and 5-bit value on ppc64 */ -# define VM_PKEY_BIT2 VM_HIGH_ARCH_2 -# define VM_PKEY_BIT3 VM_HIGH_ARCH_3 -#ifdef CONFIG_PPC +# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0 +# define VM_PKEY_BIT0 VM_HIGH_ARCH_0 +# define VM_PKEY_BIT1 VM_HIGH_ARCH_1 +# define VM_PKEY_BIT2 VM_HIGH_ARCH_2 +#if CONFIG_ARCH_PKEY_BITS > 3 +# define VM_PKEY_BIT3 VM_HIGH_ARCH_3 +#else +# define VM_PKEY_BIT3 0 +#endif +#if CONFIG_ARCH_PKEY_BITS > 4 # define VM_PKEY_BIT4 VM_HIGH_ARCH_4 #else # define VM_PKEY_BIT4 0 -- cgit v1.2.3 From facaa1373c9aabf8e34109a9cb205ad0f3a8584e Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 22 Aug 2024 16:10:55 +0100 Subject: arm64: re-order MTE VM_ flags VM_PKEY_BIT[012] will use VM_HIGH_ARCH_[012], move the MTE VM flags to accommodate this. Signed-off-by: Joey Gouly Cc: Catalin Marinas Cc: Will Deacon Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20240822151113.1479789-13-joey.gouly@arm.com Signed-off-by: Will Deacon --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index fb6ccd93f589..406512c16471 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -378,8 +378,8 @@ extern unsigned int kobjsize(const void *objp); #endif #if defined(CONFIG_ARM64_MTE) -# define VM_MTE VM_HIGH_ARCH_0 /* Use Tagged memory for access control */ -# define VM_MTE_ALLOWED VM_HIGH_ARCH_1 /* Tagged memory permitted */ +# define VM_MTE VM_HIGH_ARCH_4 /* Use Tagged memory for access control */ +# define VM_MTE_ALLOWED VM_HIGH_ARCH_5 /* Tagged memory permitted */ #else # define VM_MTE VM_NONE # define VM_MTE_ALLOWED VM_NONE -- cgit v1.2.3 From 17519819926211e6b2834e00e4554bec0daf22ac Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 22 Aug 2024 16:11:03 +0100 Subject: arm64/ptrace: add support for FEAT_POE Add a regset for POE containing POR_EL0. Signed-off-by: Joey Gouly Cc: Catalin Marinas Cc: Will Deacon Reviewed-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20240822151113.1479789-21-joey.gouly@arm.com Signed-off-by: Will Deacon --- include/uapi/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b54b313bcf07..81762ff3c99e 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -441,6 +441,7 @@ typedef struct elf64_shdr { #define NT_ARM_ZA 0x40c /* ARM SME ZA registers */ #define NT_ARM_ZT 0x40d /* ARM SME ZT registers */ #define NT_ARM_FPMR 0x40e /* ARM floating point mode register */ +#define NT_ARM_POE 0x40f /* ARM POE registers */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3 From c6fb88a5270f4ba24859f5ecb61cfc731ef16300 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 4 Sep 2024 21:51:50 +0900 Subject: firewire: core: allocate workqueue to handle isochronous contexts in card This commit adds a workqueue dedicated for isochronous context processing. The workqueue is allocated per instance of fw_card structure to satisfy the following characteristics descending from 1394 OHCI specification: In 1394 OHCI specification, memory pages are reserved to each isochronous context dedicated to DMA transmission. It allows to operate these per-context pages concurrently. Software can schedule hardware interrupt for several isochronous context to the same cycle, thus WQ_UNBOUND is specified. Additionally, it is sleepable to operate the content of pages, thus WQ_BH is not used. The isochronous context delivers the packets with time stamp, thus WQ_HIGHPRI is specified for semi real-time data such as IEC 61883-1/6 protocol implemented by ALSA firewire stack. The isochronous context is not used by the implementation of SCSI over IEEE1394 protocol (sbp2), thus WQ_MEM_RECLAIM is not specified. It is useful for users to adjust cpu affinity of the workqueue depending on their work loads, thus WQ_SYS is specified to expose the attributes to user space. Tested-by: Edmund Raile Link: https://lore.kernel.org/r/20240904125155.461886-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 1cca14cf5652..10e135d60824 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -134,6 +134,8 @@ struct fw_card { __be32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4]; __be32 maint_utility_register; + + struct workqueue_struct *isoc_wq; }; static inline struct fw_card *fw_card_get(struct fw_card *card) -- cgit v1.2.3 From 4f55ad754d6b7b6fa4ebcfd8c50f7e53e661a78e Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 4 Sep 2024 21:51:51 +0900 Subject: firewire: core: add local API to queue work item to workqueue specific to isochronous contexts In the previous commit, the workqueue is added per the instance of fw_card structure for isochronous contexts. The workqueue is designed to be used by the implementation of fw_card_driver structure underlying the fw_card. This commit adds some local APIs to be used by the implementation. Tested-by: Edmund Raile Link: https://lore.kernel.org/r/20240904125155.461886-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 10e135d60824..72f497b61739 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -511,6 +511,7 @@ union fw_iso_callback { struct fw_iso_context { struct fw_card *card; + struct work_struct work; int type; int channel; int speed; -- cgit v1.2.3 From bd07d864522e7c3e4ee364e91aee8754992f5855 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:20 +0206 Subject: printk: nbcon: Add function for printers to reacquire ownership Since ownership can be lost at any time due to handover or takeover, a printing context _must_ be prepared to back out immediately and carefully. However, there are scenarios where the printing context must reacquire ownership in order to finalize or revert hardware changes. One such example is when interrupts are disabled during printing. No other context will automagically re-enable the interrupts. For this case, the disabling context _must_ reacquire nbcon ownership so that it can re-enable the interrupts. Provide nbcon_reacquire_nobuf() for exactly this purpose. It allows a printing context to reacquire ownership using the same priority as its previous ownership. Note that after a successful reacquire the printing context will have no output buffer because that has been lost. This function cannot be used to resume printing. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-2-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/console.h b/include/linux/console.h index 9a13f91b0c43..88050d30a9cc 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -366,6 +366,10 @@ struct console { * * The callback should allow the takeover whenever it is safe. It * increases the chance to see messages when the system is in trouble. + * If the driver must reacquire ownership in order to finalize or + * revert hardware changes, nbcon_reacquire_nobuf() can be used. + * However, on reacquire the buffer content is no longer available. A + * reacquire cannot be used to resume printing. * * The callback can be called from any context (including NMI). * Therefore it must avoid usage of any locking and instead rely @@ -558,12 +562,14 @@ extern void nbcon_cpu_emergency_exit(void); extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); +extern void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt); #else static inline void nbcon_cpu_emergency_enter(void) { } static inline void nbcon_cpu_emergency_exit(void) { } static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } +static inline void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) { } #endif extern int console_set_on_cmdline; -- cgit v1.2.3 From 76f258bf3f2aae570209319944703a92ac64e29e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Sep 2024 14:11:25 +0206 Subject: printk: nbcon: Introduce printer kthreads Provide the main implementation for running a printer kthread per nbcon console that is takeover/handover aware. This includes: - new mandatory write_thread() callback - kthread creation - kthread main printing loop - kthread wakeup mechanism - kthread shutdown kthread creation is a bit tricky because consoles may register before kthreads can be created. In such cases, registration will succeed, even though no kthread exists. Once kthreads can be created, an early_initcall will set @printk_kthreads_ready. If there are no registered boot consoles, the early_initcall creates the kthreads for all registered nbcon consoles. If kthread creation fails, the related console is unregistered. If there are registered boot consoles when @printk_kthreads_ready is set, no kthreads are created until the final boot console unregisters. Once kthread creation finally occurs, @printk_kthreads_running is set so that the system knows kthreads are available for all registered nbcon consoles. If @printk_kthreads_running is already set when the console is registering, the kthread is created during registration. If kthread creation fails, the registration will fail. Until @printk_kthreads_running is set, console printing occurs directly via the console_lock. kthread shutdown on system shutdown/reboot is necessary to ensure the printer kthreads finish their printing so that the system can cleanly transition back to direct printing via the console_lock in order to reliably push out the final shutdown/reboot messages. @printk_kthreads_running is cleared before shutting down the individual kthreads. The kthread uses a new mandatory write_thread() callback that is called with both device_lock() and the console context acquired. The console ownership handling is necessary for synchronization against write_atomic() which is synchronized only via the console context ownership. The device_lock() serializes acquiring the console context with NBCON_PRIO_NORMAL. It is needed in case the device_lock() does not disable preemption. It prevents the following race: CPU0 CPU1 [ task A ] nbcon_context_try_acquire() # success with NORMAL prio # .unsafe == false; // safe for takeover [ schedule: task A -> B ] WARN_ON() nbcon_atomic_flush_pending() nbcon_context_try_acquire() # success with EMERGENCY prio # flushing nbcon_context_release() # HERE: con->nbcon_state is free # to take by anyone !!! nbcon_context_try_acquire() # success with NORMAL prio [ task B ] [ schedule: task B -> A ] nbcon_enter_unsafe() nbcon_context_can_proceed() BUG: nbcon_context_can_proceed() returns "true" because the console is owned by a context on CPU0 with NBCON_PRIO_NORMAL. But it should return "false". The console is owned by a context from task B and we do the check in a context from task A. Note that with these changes, the printer kthreads do not yet take over full responsibility for nbcon printing during normal operation. These changes only focus on the lifecycle of the kthreads. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-7-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/linux/console.h b/include/linux/console.h index 88050d30a9cc..788ce9c829f6 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -16,7 +16,9 @@ #include #include +#include #include +#include #include #include @@ -324,6 +326,9 @@ struct nbcon_write_context { * @nbcon_seq: Sequence number of the next record for nbcon to print * @nbcon_device_ctxt: Context available for non-printing operations * @pbufs: Pointer to nbcon private buffer + * @kthread: Printer kthread for this console + * @rcuwait: RCU-safe wait object for @kthread waking + * @irq_work: Defer @kthread waking to IRQ work context */ struct console { char name[16]; @@ -377,6 +382,37 @@ struct console { */ void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt); + /** + * @write_thread: + * + * NBCON callback to write out text in task context. + * + * This callback must be called only in task context with both + * device_lock() and the nbcon console acquired with + * NBCON_PRIO_NORMAL. + * + * The same rules for console ownership verification and unsafe + * sections handling applies as with write_atomic(). + * + * The console ownership handling is necessary for synchronization + * against write_atomic() which is synchronized only via the context. + * + * The device_lock() provides the primary serialization for operations + * on the device. It might be as relaxed (mutex)[*] or as tight + * (disabled preemption and interrupts) as needed. It allows + * the kthread to operate in the least restrictive mode[**]. + * + * [*] Standalone nbcon_context_try_acquire() is not safe with + * the preemption enabled, see nbcon_owner_matches(). But it + * can be safe when always called in the preemptive context + * under the device_lock(). + * + * [**] The device_lock() makes sure that nbcon_context_try_acquire() + * would never need to spin which is important especially with + * PREEMPT_RT. + */ + void (*write_thread)(struct console *con, struct nbcon_write_context *wctxt); + /** * @device_lock: * @@ -423,7 +459,11 @@ struct console { atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct nbcon_context __private nbcon_device_ctxt; + struct printk_buffers *pbufs; + struct task_struct *kthread; + struct rcuwait rcuwait; + struct irq_work irq_work; }; #ifdef CONFIG_LOCKDEP -- cgit v1.2.3 From 5102981d5e2a5a7a12424b5d26c7524ac2899898 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 4 Sep 2024 14:11:30 +0206 Subject: printk: nbcon: Show replay message on takeover An emergency or panic context can takeover console ownership while the current owner was printing a printk message. The atomic printer will re-print the message that the previous owner was printing. However, this can look confusing to the user and may even seem as though a message was lost. [3430014.1 [3430014.181123] usb 1-2: Product: USB Audio Add a new field @nbcon_prev_seq to struct console to track the sequence number to print that was assigned to the previous console owner. If this matches the sequence number to print that the current owner is assigned, then a takeover must have occurred. In this case, print an additional message to inform the user that the previous message is being printed again. [3430014.1 ** replaying previous printk message ** [3430014.181123] usb 1-2: Product: USB Audio Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240904120536.115780-12-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- include/linux/console.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/console.h b/include/linux/console.h index 788ce9c829f6..eba367bf605d 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -325,6 +325,7 @@ struct nbcon_write_context { * @nbcon_state: State for nbcon consoles * @nbcon_seq: Sequence number of the next record for nbcon to print * @nbcon_device_ctxt: Context available for non-printing operations + * @nbcon_prev_seq: Seq num the previous nbcon owner was assigned to print * @pbufs: Pointer to nbcon private buffer * @kthread: Printer kthread for this console * @rcuwait: RCU-safe wait object for @kthread waking @@ -459,6 +460,7 @@ struct console { atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct nbcon_context __private nbcon_device_ctxt; + atomic_long_t __private nbcon_prev_seq; struct printk_buffers *pbufs; struct task_struct *kthread; -- cgit v1.2.3 From a4d398a573d0f0c98c39d4af1866a3edaec4959c Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Mon, 2 Sep 2024 07:43:23 +0100 Subject: ARM: 9416/1: amba: make amba_bustype constant Since commit d492cc2573a0 ("driver core: device.h: make struct bus_type a const *"), the driver core can properly handle constant struct bus_type, move the amba_bustype variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Kunwu Chan Signed-off-by: Russell King (Oracle) --- include/linux/amba/bus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 958a55bcc708..dda2f3ea89cb 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -105,7 +105,7 @@ enum amba_vendor { AMBA_VENDOR_LSI = 0xb6, }; -extern struct bus_type amba_bustype; +extern const struct bus_type amba_bustype; #define to_amba_device(d) container_of_const(d, struct amba_device, dev) -- cgit v1.2.3 From 593377036e50de89132bc1222800174fde0780ec Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Fri, 10 May 2024 23:05:56 -0300 Subject: kvm: Note an RCU quiescent state on guest exit As of today, KVM notes a quiescent state only in guest entry, which is good as it avoids the guest being interrupted for current RCU operations. While the guest vcpu runs, it can be interrupted by a timer IRQ that will check for any RCU operations waiting for this CPU. In case there are any of such, it invokes rcu_core() in order to sched-out the current thread and note a quiescent state. This occasional schedule work will introduce tens of microsseconds of latency, which is really bad for vcpus running latency-sensitive applications, such as real-time workloads. So, note a quiescent state in guest exit, so the interrupted guests is able to deal with any pending RCU operations before being required to invoke rcu_core(), and thus avoid the overhead of related scheduler work. Signed-off-by: Leonardo Bras Acked-by: Paul E. McKenney Acked-by: Sean Christopherson Message-ID: <20240511020557.1198200-1-leobras@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/context_tracking.h | 6 ++++-- include/linux/kvm_host.h | 10 +++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 6e76b9dba00e..8a78fabeafc3 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -80,10 +80,12 @@ static __always_inline bool context_tracking_guest_enter(void) return context_tracking_enabled_this_cpu(); } -static __always_inline void context_tracking_guest_exit(void) +static __always_inline bool context_tracking_guest_exit(void) { if (context_tracking_enabled()) __ct_user_exit(CONTEXT_GUEST); + + return context_tracking_enabled_this_cpu(); } #define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond)) @@ -98,7 +100,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) { } static inline int ct_state(void) { return -1; } static inline int __ct_state(void) { return -1; } static __always_inline bool context_tracking_guest_enter(void) { return false; } -static __always_inline void context_tracking_guest_exit(void) { } +static __always_inline bool context_tracking_guest_exit(void) { return false; } #define CT_WARN_ON(cond) do { } while (0) #endif /* !CONFIG_CONTEXT_TRACKING_USER */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b23c6d48392f..0d5125a3e31a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -485,7 +485,15 @@ static __always_inline void guest_state_enter_irqoff(void) */ static __always_inline void guest_context_exit_irqoff(void) { - context_tracking_guest_exit(); + /* + * Guest mode is treated as a quiescent state, see + * guest_context_enter_irqoff() for more details. + */ + if (!context_tracking_guest_exit()) { + instrumentation_begin(); + rcu_virt_note_context_switch(); + instrumentation_end(); + } } /* -- cgit v1.2.3 From 2097154a10c6ee78be8796411e5d0ad81ee06ed6 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Tue, 3 Sep 2024 17:16:12 +0200 Subject: namespace: introduce SB_I_NOIDMAP flag Right now we determine if filesystem support vfs idmappings or not basing on the FS_ALLOW_IDMAP flag presence. This "static" way works perfecly well for local filesystems like ext4, xfs, btrfs, etc. But for network-like filesystems like fuse, cephfs this approach is not ideal, because sometimes proper support of vfs idmaps requires some extensions for the on-wire protocol, which implies that changes have to be made not only in the Linux kernel code but also in the 3rd party components like libfuse, cephfs MDS server and so on. We have seen that issue during our work on cephfs idmapped mounts [1] with Christian, but right now I'm working on the idmapped mounts support for fuse/virtiofs and I think that it is a right time for this extension. [1] 5ccd8530dd7 ("ceph: handle idmapped mounts in create_request_message()") Suggested-by: Christian Brauner Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Christian Brauner Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index fd34b5755c0b..6ff547ef21f2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1189,6 +1189,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */ #define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ +#define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */ /* Possible states of 'frozen' field */ enum { -- cgit v1.2.3 From aa16880d9f13c6490e80ad614402c8a6fe6f3efa Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Tue, 3 Sep 2024 17:16:13 +0200 Subject: fuse: add basic infrastructure to support idmappings Add some preparational changes in fuse_get_req/fuse_force_creds to handle idmappings. Miklos suggested [1], [2] to change the meaning of in.h.uid/in.h.gid fields when daemon declares support for idmapped mounts. In a new semantic, we fill uid/gid values in fuse header with a id-mapped caller uid/gid (for requests which create new inodes), for all the rest cases we just send -1 to userspace. No functional changes intended. Link: https://lore.kernel.org/all/CAJfpegsVY97_5mHSc06mSw79FehFWtoXT=hhTUK_E-Yhr7OAuQ@mail.gmail.com/ [1] Link: https://lore.kernel.org/all/CAJfpegtHQsEUuFq1k4ZbTD3E1h-GsrN3PWyv7X8cg6sfU_W2Yw@mail.gmail.com/ [2] Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index d08b99d60f6f..2ccf38181df2 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -984,6 +984,8 @@ struct fuse_fallocate_in { */ #define FUSE_UNIQUE_RESEND (1ULL << 63) +#define FUSE_INVALID_UIDGID ((uint32_t)(-1)) + struct fuse_in_header { uint32_t len; uint32_t opcode; -- cgit v1.2.3 From 16e1503eaf329129170e4e7a078aee17686967a5 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Tue, 3 Sep 2024 17:16:25 +0200 Subject: fuse: allow idmapped mounts Now we have everything in place and we can allow idmapped mounts by setting the FS_ALLOW_IDMAP flag. Notice that real availability of idmapped mounts will depend on the fuse daemon. Fuse daemon have to set FUSE_ALLOW_IDMAP flag in the FUSE_INIT reply. To discuss: - we enable idmapped mounts support only if "default_permissions" mode is enabled, because otherwise we would need to deal with UID/GID mappings in the userspace side OR provide the userspace with idmapped req->in.h.uid/req->in.h.gid values which is not something that we probably want to. Idmapped mounts philosophy is not about faking caller uid/gid. Some extra links and examples: - libfuse support https://github.com/mihalicyn/libfuse/commits/idmap_support - fuse-overlayfs support: https://github.com/mihalicyn/fuse-overlayfs/commits/idmap_support - cephfs-fuse conversion example https://github.com/mihalicyn/ceph/commits/fuse_idmap - glusterfs conversion example https://github.com/mihalicyn/glusterfs/commits/fuse_idmap Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Christian Brauner Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 2ccf38181df2..f1e99458e29e 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -217,6 +217,9 @@ * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag * - add FUSE_NO_EXPORT_SUPPORT init flag * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag + * + * 7.41 + * - add FUSE_ALLOW_IDMAP */ #ifndef _LINUX_FUSE_H @@ -252,7 +255,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 40 +#define FUSE_KERNEL_MINOR_VERSION 41 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -421,6 +424,7 @@ struct fuse_file_lock { * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit * of the request ID indicates resend requests + * FUSE_ALLOW_IDMAP: allow creation of idmapped mounts */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -466,6 +470,7 @@ struct fuse_file_lock { /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP +#define FUSE_ALLOW_IDMAP (1ULL << 40) /** * CUSE INIT request/reply flags @@ -984,6 +989,19 @@ struct fuse_fallocate_in { */ #define FUSE_UNIQUE_RESEND (1ULL << 63) +/** + * This value will be set by the kernel to + * (struct fuse_in_header).{uid,gid} fields in + * case when: + * - fuse daemon enabled FUSE_ALLOW_IDMAP + * - idmapping information is not available and uid/gid + * can not be mapped in accordance with an idmapping. + * + * Note: an idmapping information always available + * for inode creation operations like: + * FUSE_MKNOD, FUSE_SYMLINK, FUSE_MKDIR, FUSE_TMPFILE, + * FUSE_CREATE and FUSE_RENAME2 (with RENAME_WHITEOUT). + */ #define FUSE_INVALID_UIDGID ((uint32_t)(-1)) struct fuse_in_header { -- cgit v1.2.3 From 071f24ad28cdcdfa544fdb79b1b1d2b423717a11 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:35:54 -0700 Subject: KVM: Rename arch hooks related to per-CPU virtualization enabling Rename the per-CPU hooks used to enable virtualization in hardware to align with the KVM-wide helpers in kvm_main.c, and to better capture that the callbacks are invoked on every online CPU. No functional change intended. Suggested-by: Paolo Bonzini Signed-off-by: Sean Christopherson Reviewed-by: Kai Huang Message-ID: <20240830043600.127750-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b23c6d48392f..4ceecba64f93 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1521,8 +1521,8 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING -int kvm_arch_hardware_enable(void); -void kvm_arch_hardware_disable(void); +int kvm_arch_enable_virtualization_cpu(void); +void kvm_arch_disable_virtualization_cpu(void); #endif int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From b67107a251b01161956892352d53fb122346eda1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:35:58 -0700 Subject: KVM: Add arch hooks for enabling/disabling virtualization Add arch hooks that are invoked when KVM enables/disable virtualization. x86 will use the hooks to register an "emergency disable" callback, which is essentially an x86-specific shutdown notifier that is used when the kernel is doing an emergency reboot/shutdown/kexec. Add comments for the declarations to help arch code understand exactly when the callbacks are invoked. Alternatively, the APIs themselves could communicate most of the same info, but kvm_arch_pre_enable_virtualization() and kvm_arch_post_disable_virtualization() are a bit cumbersome, and make it a bit less obvious that they are intended to be implemented as a pair. Reviewed-by: Chao Gao Reviewed-by: Kai Huang Acked-by: Kai Huang Tested-by: Farrah Chen Signed-off-by: Sean Christopherson Message-ID: <20240830043600.127750-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4ceecba64f93..dfd6ad66efcc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1521,6 +1521,20 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING +/* + * kvm_arch_{enable,disable}_virtualization() are called on one CPU, under + * kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of + * kvm_usage_count, i.e. at the beginning of the generic hardware enabling + * sequence, and at the end of the generic hardware disabling sequence. + */ +void kvm_arch_enable_virtualization(void); +void kvm_arch_disable_virtualization(void); +/* + * kvm_arch_{enable,disable}_virtualization_cpu() are called on "every" CPU to + * do the actual twiddling of hardware bits. The hooks are called on all + * online CPUs when KVM enables/disabled virtualization, and on a single CPU + * when that CPU is onlined/offlined (including for Resume/Suspend). + */ int kvm_arch_enable_virtualization_cpu(void); void kvm_arch_disable_virtualization_cpu(void); #endif -- cgit v1.2.3 From 940ce73bdec5a020fec058ba947d2bf627462c53 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 4 Sep 2024 11:08:44 -0700 Subject: bpf: Remove the insn_buf array stack usage from the inline_bpf_loop() This patch removes the insn_buf array stack usage from the inline_bpf_loop(). Instead, the env->insn_buf is used. The usage in inline_bpf_loop() needs more than 16 insn, so the INSN_BUF_SIZE needs to be increased from 16 to 32. The compiler stack size warning on the verifier is gone after this change. Cc: Eduard Zingerman Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240904180847.56947-2-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 2e20207315a9..8458632824a4 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -24,7 +24,7 @@ */ #define TMP_STR_BUF_LEN 320 /* Patch buffer size */ -#define INSN_BUF_SIZE 16 +#define INSN_BUF_SIZE 32 /* Liveness marks, used for registers and spilled-regs (in stack slots). * Read marks propagate upwards until they find a write mark; they record that -- cgit v1.2.3 From a8532fac7b5d27b8d62008a89593dccb6f9786ef Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 30 Aug 2024 22:02:34 -1000 Subject: sched_ext: TASK_DEAD tasks must be switched into SCX on ops_enable During scx_ops_enable(), SCX needs to invoke the sleepable ops.init_task() on every task. To do this, it does get_task_struct() on each iterated task, drop the lock and then call ops.init_task(). However, a TASK_DEAD task may already have lost all its usage count and be waiting for RCU grace period to be freed. If get_task_struct() is called on such task, use-after-free can happen. To avoid such situations, scx_ops_enable() skips initialization of TASK_DEAD tasks, which seems safe as they are never going to be scheduled again. Unfortunately, a racing sched_setscheduler(2) can grab the task before the task is unhashed and then continue to e.g. move the task from RT to SCX after TASK_DEAD is set and ops_enable skipped the task. As the task hasn't gone through scx_ops_init_task(), scx_ops_enable_task() called from switching_to_scx() triggers the following warning: sched_ext: Invalid task state transition 0 -> 3 for stress-ng-race-[2872] WARNING: CPU: 6 PID: 2367 at kernel/sched/ext.c:3327 scx_ops_enable_task+0x18f/0x1f0 ... RIP: 0010:scx_ops_enable_task+0x18f/0x1f0 ... switching_to_scx+0x13/0xa0 __sched_setscheduler+0x84e/0xa50 do_sched_setscheduler+0x104/0x1c0 __x64_sys_sched_setscheduler+0x18/0x30 do_syscall_64+0x7b/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e As in the ops_disable path, it just doesn't seem like a good idea to leave any task in an inconsistent state, even when the task is dead. The root cause is ops_enable not being able to tell reliably whether a task is truly dead (no one else is looking at it and it's about to be freed) and was testing TASK_DEAD instead. Fix it by testing the task's usage count directly. - ops_init no longer ignores TASK_DEAD tasks. As now all users iterate all tasks, @include_dead is removed from scx_task_iter_next_locked() along with dead task filtering. - tryget_task_struct() is added. Tasks are skipped iff tryget_task_struct() fails. Signed-off-by: Tejun Heo Cc: David Vernet Cc: Peter Zijlstra --- include/linux/sched/task.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 4df2f9055587..0f2aeb37bbb0 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -120,6 +120,11 @@ static inline struct task_struct *get_task_struct(struct task_struct *t) return t; } +static inline struct task_struct *tryget_task_struct(struct task_struct *t) +{ + return refcount_inc_not_zero(&t->usage) ? t : NULL; +} + extern void __put_task_struct(struct task_struct *t); extern void __put_task_struct_rcu_cb(struct rcu_head *rhp); -- cgit v1.2.3 From 8195136669661fdfe54e9a8923c33b31c92fc1da Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 4 Sep 2024 10:24:59 -1000 Subject: sched_ext: Add cgroup support Add sched_ext_ops operations to init/exit cgroups, and track task migrations and config changes. A BPF scheduler may not implement or implement only subset of cgroup features. The implemented features can be indicated using %SCX_OPS_HAS_CGOUP_* flags. If cgroup configuration makes use of features that are not implemented, a warning is triggered. While a BPF scheduler is being enabled and disabled, relevant cgroup operations are locked out using scx_cgroup_rwsem. This avoids situations like task prep taking place while the task is being moved across cgroups, making things easier for BPF schedulers. v7: - cgroup interface file visibility toggling is dropped in favor just warning messages. Dynamically changing interface visiblity caused more confusion than helping. v6: - Updated to reflect the removal of SCX_KF_SLEEPABLE. - Updated to use CONFIG_GROUP_SCHED_WEIGHT and fixes for !CONFIG_FAIR_GROUP_SCHED && CONFIG_EXT_GROUP_SCHED. v5: - Flipped the locking order between scx_cgroup_rwsem and cpus_read_lock() to avoid locking order conflict w/ cpuset. Better documentation around locking. - sched_move_task() takes an early exit if the source and destination are identical. This triggered the warning in scx_cgroup_can_attach() as it left p->scx.cgrp_moving_from uncleared. Updated the cgroup migration path so that ops.cgroup_prep_move() is skipped for identity migrations so that its invocations always match ops.cgroup_move() one-to-one. v4: - Example schedulers moved into their own patches. - Fix build failure when !CONFIG_CGROUP_SCHED, reported by Andrea Righi. v3: - Make scx_example_pair switch all tasks by default. - Convert to BPF inline iterators. - scx_bpf_task_cgroup() is added to determine the current cgroup from CPU controller's POV. This allows BPF schedulers to accurately track CPU cgroup membership. - scx_example_flatcg added. This demonstrates flattened hierarchy implementation of CPU cgroup control and shows significant performance improvement when cgroups which are nested multiple levels are under competition. v2: - Build fixes for different CONFIG combinations. Signed-off-by: Tejun Heo Reviewed-by: David Vernet Acked-by: Josh Don Acked-by: Hao Luo Acked-by: Barret Rhoden Reported-by: kernel test robot Cc: Andrea Righi --- include/linux/sched/ext.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index db2a266113ac..dc646cca4fcf 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -188,6 +188,9 @@ struct sched_ext_entity { bool disallow; /* reject switching into SCX */ /* cold fields */ +#ifdef CONFIG_EXT_GROUP_SCHED + struct cgroup *cgrp_moving_from; +#endif /* must be the last field, see init_scx_entity() */ struct list_head tasks_node; }; -- cgit v1.2.3 From 4e893545ef8712d25f3176790ebb95beb073637e Mon Sep 17 00:00:00 2001 From: Mariusz Tkaczyk Date: Wed, 4 Sep 2024 12:48:47 +0200 Subject: PCI/NPEM: Add Native PCIe Enclosure Management support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Native PCIe Enclosure Management (NPEM, PCIe r6.1 sec 6.28) allows managing LEDs in storage enclosures. NPEM is indication oriented and it does not give direct access to LEDs. Although each indication *could* represent an individual LED, multiple indications could also be represented as a single, multi-color LED or a single LED blinking in a specific interval. The specification leaves that open. Each enabled indication (capability register bit on) is represented as a ledclass_dev which can be controlled through sysfs. For every ledclass device only 2 brightness states are allowed: LED_ON (1) or LED_OFF (0). This corresponds to the NPEM control register (Indication bit on/off). Ledclass devices appear in sysfs as child devices (subdirectory) of PCI device which has an NPEM Extended Capability and indication is enabled in NPEM capability register. For example, these are LEDs created for pcieport "10000:02:05.0" on my setup: leds/ ├── 10000:02:05.0:enclosure:fail ├── 10000:02:05.0:enclosure:locate ├── 10000:02:05.0:enclosure:ok └── 10000:02:05.0:enclosure:rebuild They can be also found in "/sys/class/leds" directory. The parent PCIe device domain/bus/device/function address is used to guarantee uniqueness across leds subsystem. To enable/disable a "fail" indication, the "brightness" file can be edited: echo 1 > ./leds/10000:02:05.0:enclosure:fail/brightness echo 0 > ./leds/10000:02:05.0:enclosure:fail/brightness PCIe r6.1, sec 7.9.19.2 defines the possible indications. Multiple indications for same parent PCIe device can conflict and hardware may update them when processing new request. To avoid issues, driver refresh all indications by reading back control register. This driver expects to be the exclusive NPEM extended capability manager. It waits up to 1 second after imposing new request, it doesn't verify if controller is busy before write, and it assumes the mutex lock gives protection from concurrent updates. If _DSM LED management is available, we assume the platform may be using NPEM for its own purposes (see PCI Firmware Spec r3.3 sec 4.7), so the driver does not use NPEM. A future patch will add _DSM support; an info message notes whether NPEM or _DSM is being used. NPEM is a PCIe extended capability so it should be registered in pcie_init_capabilities() but it is not possible due to LED dependency. The parent pci_device must be added earlier for led_classdev_register() to be successful. NPEM does not require configuration on kernel side, so it is safe to register LED devices later. Link: https://lore.kernel.org/r/20240904104848.23480-3-mariusz.tkaczyk@linux.intel.com Suggested-by: Lukas Wunner Signed-off-by: Mariusz Tkaczyk Signed-off-by: Bjorn Helgaas Tested-by: Stuart Hayes Reviewed-by: Christoph Hellwig Reviewed-by: Ilpo Järvinen --- include/linux/pci.h | 3 +++ include/uapi/linux/pci_regs.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4cf89a4b4cbc..c9db853e269f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -516,6 +516,9 @@ struct pci_dev { #endif #ifdef CONFIG_PCI_DOE struct xarray doe_mbs; /* Data Object Exchange mailboxes */ +#endif +#ifdef CONFIG_PCI_NPEM + struct npem *npem; /* Native PCIe Enclosure Management */ #endif u16 acs_cap; /* ACS Capability offset */ phys_addr_t rom; /* Physical address if not from BAR */ diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 94c00996e633..9751b413f3d6 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -740,6 +740,7 @@ #define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */ #define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */ #define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */ +#define PCI_EXT_CAP_ID_NPEM 0x29 /* Native PCIe Enclosure Management */ #define PCI_EXT_CAP_ID_PL_32GT 0x2A /* Physical Layer 32.0 GT/s */ #define PCI_EXT_CAP_ID_DOE 0x2E /* Data Object Exchange */ #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DOE @@ -1121,6 +1122,40 @@ #define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK 0x000000F0 #define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT 4 +/* Native PCIe Enclosure Management */ +#define PCI_NPEM_CAP 0x04 /* NPEM capability register */ +#define PCI_NPEM_CAP_CAPABLE 0x00000001 /* NPEM Capable */ + +#define PCI_NPEM_CTRL 0x08 /* NPEM control register */ +#define PCI_NPEM_CTRL_ENABLE 0x00000001 /* NPEM Enable */ + +/* + * Native PCIe Enclosure Management indication bits and Reset command bit + * are corresponding for capability and control registers. + */ +#define PCI_NPEM_CMD_RESET 0x00000002 /* Reset Command */ +#define PCI_NPEM_IND_OK 0x00000004 /* OK */ +#define PCI_NPEM_IND_LOCATE 0x00000008 /* Locate */ +#define PCI_NPEM_IND_FAIL 0x00000010 /* Fail */ +#define PCI_NPEM_IND_REBUILD 0x00000020 /* Rebuild */ +#define PCI_NPEM_IND_PFA 0x00000040 /* Predicted Failure Analysis */ +#define PCI_NPEM_IND_HOTSPARE 0x00000080 /* Hot Spare */ +#define PCI_NPEM_IND_ICA 0x00000100 /* In Critical Array */ +#define PCI_NPEM_IND_IFA 0x00000200 /* In Failed Array */ +#define PCI_NPEM_IND_IDT 0x00000400 /* Device Type */ +#define PCI_NPEM_IND_DISABLED 0x00000800 /* Disabled */ +#define PCI_NPEM_IND_SPEC_0 0x01000000 +#define PCI_NPEM_IND_SPEC_1 0x02000000 +#define PCI_NPEM_IND_SPEC_2 0x04000000 +#define PCI_NPEM_IND_SPEC_3 0x08000000 +#define PCI_NPEM_IND_SPEC_4 0x10000000 +#define PCI_NPEM_IND_SPEC_5 0x20000000 +#define PCI_NPEM_IND_SPEC_6 0x40000000 +#define PCI_NPEM_IND_SPEC_7 0x80000000 + +#define PCI_NPEM_STATUS 0x0c /* NPEM status register */ +#define PCI_NPEM_STATUS_CC 0x00000001 /* Command Completed */ + /* Data Object Exchange */ #define PCI_DOE_CAP 0x04 /* DOE Capabilities Register */ #define PCI_DOE_CAP_INT_SUP 0x00000001 /* Interrupt Support */ -- cgit v1.2.3 From 1705341485ff1eec097dfa26891d03afe5907e16 Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Sun, 1 Sep 2024 20:45:34 -0700 Subject: net: mana: Improve mana_set_channels() in low mem conditions The mana_set_channels() function requires detaching the mana driver and reattaching it with changed channel values. During this operation if the system is low on memory, the reattach might fail, causing the network device being down. To avoid this we pre-allocate buffers at the beginning of set operation, to prevent complete network loss Signed-off-by: Shradha Gupta Reviewed-by: Gerhard Engleder Reviewed-by: Haiyang Zhang Link: https://patch.msgid.link/1725248734-21760-1-git-send-email-shradhagupta@linux.microsoft.com Signed-off-by: Jakub Kicinski --- include/net/mana/mana.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 1f869624811d..dfbf78d4e557 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -488,7 +488,7 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); void mana_query_gf_stats(struct mana_port_context *apc); -int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu); +int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues); void mana_pre_dealloc_rxbufs(struct mana_port_context *apc); extern const struct ethtool_ops mana_ethtool_ops; -- cgit v1.2.3 From 1083d733eb2624837753046924a9248555a1bfbf Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 3 Sep 2024 16:35:54 +0300 Subject: ipv4: Fix user space build failure due to header change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RT_TOS() from include/uapi/linux/in_route.h is defined using IPTOS_TOS_MASK from include/uapi/linux/ip.h. This is problematic for files such as include/net/ip_fib.h that want to use RT_TOS() as without including both header files kernel compilation fails: In file included from ./include/net/ip_fib.h:25, from ./include/net/route.h:27, from ./include/net/lwtunnel.h:9, from net/core/dst.c:24: ./include/net/ip_fib.h: In function ‘fib_dscp_masked_match’: ./include/uapi/linux/in_route.h:31:32: error: ‘IPTOS_TOS_MASK’ undeclared (first use in this function) 31 | #define RT_TOS(tos) ((tos)&IPTOS_TOS_MASK) | ^~~~~~~~~~~~~~ ./include/net/ip_fib.h:440:45: note: in expansion of macro ‘RT_TOS’ 440 | return dscp == inet_dsfield_to_dscp(RT_TOS(fl4->flowi4_tos)); Therefore, cited commit changed linux/in_route.h to include linux/ip.h. However, as reported by David, this breaks iproute2 compilation due overlapping definitions between linux/ip.h and /usr/include/netinet/ip.h: In file included from ../include/uapi/linux/in_route.h:5, from iproute.c:19: ../include/uapi/linux/ip.h:25:9: warning: "IPTOS_TOS" redefined 25 | #define IPTOS_TOS(tos) ((tos)&IPTOS_TOS_MASK) | ^~~~~~~~~ In file included from iproute.c:17: /usr/include/netinet/ip.h:222:9: note: this is the location of the previous definition 222 | #define IPTOS_TOS(tos) ((tos) & IPTOS_TOS_MASK) Fix by changing include/net/ip_fib.h to include linux/ip.h. Note that usage of RT_TOS() should not spread further in the kernel due to recent work in this area. Fixes: 1fa3314c14c6 ("ipv4: Centralize TOS matching") Reported-by: David Ahern Closes: https://lore.kernel.org/netdev/2f5146ff-507d-4cab-a195-b28c0c9e654e@kernel.org/ Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20240903133554.2807343-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/ip_fib.h | 1 + include/uapi/linux/in_route.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 269ec10f63e4..967e4dc555fa 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -22,6 +22,7 @@ #include #include #include +#include #include struct fib_config { diff --git a/include/uapi/linux/in_route.h b/include/uapi/linux/in_route.h index 10bdd7e7107f..0cc2c23b47f8 100644 --- a/include/uapi/linux/in_route.h +++ b/include/uapi/linux/in_route.h @@ -2,8 +2,6 @@ #ifndef _LINUX_IN_ROUTE_H #define _LINUX_IN_ROUTE_H -#include - /* IPv4 routing cache flags */ #define RTCF_DEAD RTNH_F_DEAD -- cgit v1.2.3 From 8f52de0077ba3bf41e5d53d67a185700f41efce7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Jun 2024 11:33:01 +0100 Subject: netfs: Reduce number of conditional branches in netfs_perform_write() Reduce the number of conditional branches in netfs_perform_write() by merging in netfs_how_to_modify() and then creating a separate if-statement for each way we might modify a folio. Note that this means replicating the data copy in each path. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-6-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 606b4a0f92da..a4fd5dea52f4 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -129,7 +129,6 @@ E_(netfs_sreq_trace_put_terminated, "PUT TERM ") #define netfs_folio_traces \ - /* The first few correspond to enum netfs_how_to_modify */ \ EM(netfs_folio_is_uptodate, "mod-uptodate") \ EM(netfs_just_prefetch, "mod-prefetch") \ EM(netfs_whole_folio_modify, "mod-whole-f") \ @@ -139,7 +138,6 @@ EM(netfs_flush_content, "flush") \ EM(netfs_streaming_filled_page, "mod-streamw-f") \ EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \ - /* The rest are for writeback */ \ EM(netfs_folio_trace_cancel_copy, "cancel-copy") \ EM(netfs_folio_trace_clear, "clear") \ EM(netfs_folio_trace_clear_cc, "clear-cc") \ -- cgit v1.2.3 From 73425800ac948cb78063b897a0c345d788f3594d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Jun 2024 11:26:24 +0100 Subject: netfs, cifs: Move CIFS_INO_MODIFIED_ATTR to netfs_inode Move CIFS_INO_MODIFIED_ATTR to netfs_inode as NETFS_ICTX_MODIFIED_ATTR and then make netfs_perform_write() set it. This means that cifs doesn't need to implement the ->post_modify() hook. Signed-off-by: David Howells cc: Jeff Layton cc: Steve French cc: Paulo Alcantara cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-7-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/linux/netfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c47443e7a97e..71c748c53a1f 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -73,6 +73,7 @@ struct netfs_inode { #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ +#define NETFS_ICTX_MODIFIED_ATTR 3 /* Indicate change in mtime/ctime */ }; /* -- cgit v1.2.3 From 52d55922e0f1db1f580c9f91c174d2392bfad481 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 7 Jun 2024 09:02:58 +0100 Subject: netfs: Move max_len/max_nr_segs from netfs_io_subrequest to netfs_io_stream Move max_len/max_nr_segs from struct netfs_io_subrequest to struct netfs_io_stream as we only issue one subreq at a time and then don't need these values again for that subreq unless and until we have to retry it - in which case we want to renegotiate them. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-8-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/linux/netfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 71c748c53a1f..eea5ea2842f6 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -134,6 +134,8 @@ static inline struct netfs_group *netfs_folio_group(struct folio *folio) struct netfs_io_stream { /* Submission tracking */ struct netfs_io_subrequest *construct; /* Op being constructed */ + size_t sreq_max_len; /* Maximum size of a subrequest */ + unsigned int sreq_max_segs; /* 0 or max number of segments in an iterator */ unsigned int submit_off; /* Folio offset we're submitting from */ unsigned int submit_len; /* Amount of data left to submit */ unsigned int submit_max_len; /* Amount I/O can be rounded up to */ @@ -177,14 +179,12 @@ struct netfs_io_subrequest { struct list_head rreq_link; /* Link in rreq->subrequests */ struct iov_iter io_iter; /* Iterator for this subrequest */ unsigned long long start; /* Where to start the I/O */ - size_t max_len; /* Maximum size of the I/O */ size_t len; /* Size of the I/O */ size_t transferred; /* Amount of data transferred */ refcount_t ref; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ unsigned int nr_segs; /* Number of segs in io_iter */ - unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */ enum netfs_io_source source; /* Where to read from/write to */ unsigned char stream_nr; /* I/O stream this belongs to */ unsigned long flags; -- cgit v1.2.3 From 51e9a86a4f75c6dc8531407b2ab2ccc4ad137e5a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Jul 2024 09:45:46 +0100 Subject: netfs: Reserve netfs_sreq_source 0 as unset/unknown Reserve the 0-valued netfs_sreq_source to mean unset or unknown so that it can be seen in the trace as such rather than appearing as download-from-server when it's going to get switched to something else. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-9-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/linux/netfs.h | 1 + include/trace/events/netfs.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index eea5ea2842f6..37f8ce9c0284 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -43,6 +43,7 @@ static inline void folio_start_private_2(struct folio *folio) #define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */ enum netfs_io_source { + NETFS_SOURCE_UNKNOWN, NETFS_FILL_WITH_ZEROES, NETFS_DOWNLOAD_FROM_SERVER, NETFS_READ_FROM_CACHE, diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index a4fd5dea52f4..f4105b8e5894 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -60,6 +60,7 @@ E_(netfs_rreq_trace_write_done, "WR-DONE") #define netfs_sreq_sources \ + EM(NETFS_SOURCE_UNKNOWN, "----") \ EM(NETFS_FILL_WITH_ZEROES, "ZERO") \ EM(NETFS_DOWNLOAD_FROM_SERVER, "DOWN") \ EM(NETFS_READ_FROM_CACHE, "READ") \ -- cgit v1.2.3 From c57de2a9259d7dc18a7a425fca91c77502263d8a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Jul 2024 09:43:38 +0100 Subject: netfs: Remove NETFS_COPY_TO_CACHE Remove NETFS_COPY_TO_CACHE as it isn't used anymore. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-10-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/linux/netfs.h | 3 +-- include/trace/events/netfs.h | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 37f8ce9c0284..63ab2c775a21 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -207,11 +207,10 @@ enum netfs_io_origin { NETFS_READAHEAD, /* This read was triggered by readahead */ NETFS_READPAGE, /* This read is a synchronous read */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ - NETFS_COPY_TO_CACHE, /* This write is to copy a read to the cache */ + NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_WRITEBACK, /* This write was triggered by writepages */ NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ - NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_DIO_WRITE, /* This is a direct I/O write */ nr__netfs_io_origin } __mode(byte); diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index f4105b8e5894..47cd11aaccac 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -34,11 +34,10 @@ EM(NETFS_READAHEAD, "RA") \ EM(NETFS_READPAGE, "RP") \ EM(NETFS_READ_FOR_WRITE, "RW") \ - EM(NETFS_COPY_TO_CACHE, "CC") \ + EM(NETFS_DIO_READ, "DR") \ EM(NETFS_WRITEBACK, "WB") \ EM(NETFS_WRITETHROUGH, "WT") \ EM(NETFS_UNBUFFERED_WRITE, "UW") \ - EM(NETFS_DIO_READ, "DR") \ E_(NETFS_DIO_WRITE, "DW") #define netfs_rreq_traces \ -- cgit v1.2.3 From 1c3e34bf8802b8b17d0c2067c43bb49b7e83885c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 5 Jul 2024 23:14:51 +0200 Subject: pwm: Make info in traces about affected pwm more useful MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hashed pointer isn't useful to identify the pwm device. Instead store and emit chipid and hwpwm. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240705211452.1157967-2-u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/trace/events/pwm.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/pwm.h b/include/trace/events/pwm.h index 12b35e4ff917..8022701c446d 100644 --- a/include/trace/events/pwm.h +++ b/include/trace/events/pwm.h @@ -15,7 +15,8 @@ DECLARE_EVENT_CLASS(pwm, TP_ARGS(pwm, state, err), TP_STRUCT__entry( - __field(struct pwm_device *, pwm) + __field(unsigned int, chipid) + __field(unsigned int, hwpwm) __field(u64, period) __field(u64, duty_cycle) __field(enum pwm_polarity, polarity) @@ -24,7 +25,8 @@ DECLARE_EVENT_CLASS(pwm, ), TP_fast_assign( - __entry->pwm = pwm; + __entry->chipid = pwm->chip->id; + __entry->hwpwm = pwm->hwpwm; __entry->period = state->period; __entry->duty_cycle = state->duty_cycle; __entry->polarity = state->polarity; @@ -32,8 +34,8 @@ DECLARE_EVENT_CLASS(pwm, __entry->err = err; ), - TP_printk("%p: period=%llu duty_cycle=%llu polarity=%d enabled=%d err=%d", - __entry->pwm, __entry->period, __entry->duty_cycle, + TP_printk("pwmchip%u.%u: period=%llu duty_cycle=%llu polarity=%d enabled=%d err=%d", + __entry->chipid, __entry->hwpwm, __entry->period, __entry->duty_cycle, __entry->polarity, __entry->enabled, __entry->err) ); -- cgit v1.2.3 From f9ecc2febf6fd6ad53208a1c0e1b5066ee65dd8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 12 Jul 2024 19:18:20 +0200 Subject: pwm: Don't export pwm_capture() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is only a single caller of this function, and that's in drivers/pwm/core.c itself. So don't export the function. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240712171821.1470833-2-u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index f8c2dc12dbd3..8acd60b53f58 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -394,9 +394,6 @@ static inline bool pwm_might_sleep(struct pwm_device *pwm) } /* PWM provider APIs */ -int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, - unsigned long timeout); - void pwmchip_put(struct pwm_chip *chip); struct pwm_chip *pwmchip_alloc(struct device *parent, unsigned int npwm, size_t sizeof_priv); struct pwm_chip *devm_pwmchip_alloc(struct device *parent, unsigned int npwm, size_t sizeof_priv); @@ -462,13 +459,6 @@ static inline void pwm_disable(struct pwm_device *pwm) might_sleep(); } -static inline int pwm_capture(struct pwm_device *pwm, - struct pwm_capture *result, - unsigned long timeout) -{ - return -EINVAL; -} - static inline void pwmchip_put(struct pwm_chip *chip) { } -- cgit v1.2.3 From b4fef22c2fb97fa204f0c99c7c7f1c6b422ef0aa Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 28 Aug 2024 20:19:42 +1000 Subject: uapi: explain how per-syscall AT_* flags should be allocated Unfortunately, the way we have gone about adding new AT_* flags has been a little messy. In the beginning, all of the AT_* flags had generic meanings and so it made sense to share the flag bits indiscriminately. However, we inevitably ran into syscalls that needed their own syscall-specific flags. Due to the lack of a planned out policy, we ended up with the following situations: * Existing syscalls adding new features tended to use new AT_* bits, with some effort taken to try to re-use bits for flags that were so obviously syscall specific that they only make sense for a single syscall (such as the AT_EACCESS/AT_REMOVEDIR/AT_HANDLE_FID triplet). Given the constraints of bitflags, this works well in practice, but ideally (to avoid future confusion) we would plan ahead and define a set of "per-syscall bits" ahead of time so that when allocating new bits we don't end up with a complete mish-mash of which bits are supposed to be per-syscall and which aren't. * New syscalls dealt with this in several ways: - Some syscalls (like renameat2(2), move_mount(2), fsopen(2), and fspick(2)) created their separate own flag spaces that have no overlap with the AT_* flags. Most of these ended up allocating their bits sequentually. In the case of move_mount(2) and fspick(2), several flags have identical meanings to AT_* flags but were allocated in their own flag space. This makes sense for syscalls that will never share AT_* flags, but for some syscalls this leads to duplication with AT_* flags in a way that could cause confusion (if renameat2(2) grew a RENAME_EMPTY_PATH it seems likely that users could mistake it for AT_EMPTY_PATH since it is an *at(2) syscall). - Some syscalls unfortunately ended up both creating their own flag space while also using bits from other flag spaces. The most obvious example is open_tree(2), where the standard usage ends up using flags from *THREE* separate flag spaces: open_tree(AT_FDCWD, "/foo", OPEN_TREE_CLONE|O_CLOEXEC|AT_RECURSIVE); (Note that O_CLOEXEC is also platform-specific, so several future OPEN_TREE_* bits are also made unusable in one fell swoop.) It's not entirely clear to me what the "right" choice is for new syscalls. Just saying that all future VFS syscalls should use AT_* flags doesn't seem practical. openat2(2) has RESOLVE_* flags (many of which don't make much sense to burn generic AT_* flags for) and move_mount(2) has separate AT_*-like flags for both the source and target so separate flags are needed anyway (though it seems possible that renameat2(2) could grow *_EMPTY_PATH flags at some point, and it's a bit of a shame they can't be reused). But at least for syscalls that _do_ choose to use AT_* flags, we should explicitly state the policy that 0x2ff is currently intended for per-syscall flags and that new flags should err on the side of overlapping with existing flag bits (so we can extend the scope of generic flags in the future if necessary). And add AT_* aliases for the RENAME_* flags to further cement that renameat2(2) is an *at(2) flag, just with its own per-syscall flags. Suggested-by: Amir Goldstein Reviewed-by: Jeff Layton Reviewed-by: Josef Bacik Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/r/20240828-exportfs-u64-mount-id-v3-1-10c2c4c16708@cyphar.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 80 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index e55a3314bcb0..38a6d66d9e88 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -90,37 +90,69 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +#define AT_FDCWD -100 /* Special value for dirfd used to + indicate openat should use the + current working directory. */ + + +/* Generic flags for the *at(2) family of syscalls. */ + +/* Reserved for per-syscall flags 0xff. */ +#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic + links. */ +/* Reserved for per-syscall flags 0x200 */ +#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ +#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount + traversal. */ +#define AT_EMPTY_PATH 0x1000 /* Allow empty relative + pathname to operate on dirfd + directly. */ /* - * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is - * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to - * unlinkat. The two functions do completely different things and therefore, - * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to - * faccessat would be undefined behavior and thus treating it equivalent to - * AT_EACCESS is valid undefined behavior. + * These flags are currently statx(2)-specific, but they could be made generic + * in the future and so they should not be used for other per-syscall flags. */ -#define AT_FDCWD -100 /* Special value used to indicate - openat should use the current - working directory. */ -#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ +#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ +#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ +#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ +#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ + +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ + +/* + * Per-syscall flags for the *at(2) family of syscalls. + * + * These are flags that are so syscall-specific that a user passing these flags + * to the wrong syscall is so "clearly wrong" that we can safely call such + * usage "undefined behaviour". + * + * For example, the constants AT_REMOVEDIR and AT_EACCESS have the same value. + * AT_EACCESS is meaningful only to faccessat, while AT_REMOVEDIR is meaningful + * only to unlinkat. The two functions do completely different things and + * therefore, the flags can be allowed to overlap. For example, passing + * AT_REMOVEDIR to faccessat would be undefined behavior and thus treating it + * equivalent to AT_EACCESS is valid undefined behavior. + * + * Note for implementers: When picking a new per-syscall AT_* flag, try to + * reuse already existing flags first. This leaves us with as many unused bits + * as possible, so we can use them for generic bits in the future if necessary. + */ + +/* Flags for renameat2(2) (must match legacy RENAME_* flags). */ +#define AT_RENAME_NOREPLACE 0x0001 +#define AT_RENAME_EXCHANGE 0x0002 +#define AT_RENAME_WHITEOUT 0x0004 + +/* Flag for faccessat(2). */ #define AT_EACCESS 0x200 /* Test access permitted for effective IDs, not real IDs. */ +/* Flag for unlinkat(2). */ #define AT_REMOVEDIR 0x200 /* Remove directory instead of unlinking file. */ -#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ -#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ -#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */ - -#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ -#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ -#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ -#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ - -#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +/* Flags for name_to_handle_at(2). */ +#define AT_HANDLE_FID 0x200 /* File handle is needed to compare + object identity and may not be + usable with open_by_handle_at(2). */ -/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */ -#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to - compare object identity and may not - be usable to open_by_handle_at(2) */ #if defined(__KERNEL__) #define AT_GETATTR_NOSEC 0x80000000 #endif -- cgit v1.2.3 From 4356d575ef0f39a3e8e0ce0c40d84ce900ac3b61 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 28 Aug 2024 20:19:43 +1000 Subject: fhandle: expose u64 mount id to name_to_handle_at(2) Now that we provide a unique 64-bit mount ID interface in statx(2), we can now provide a race-free way for name_to_handle_at(2) to provide a file handle and corresponding mount without needing to worry about racing with /proc/mountinfo parsing or having to open a file just to do statx(2). While this is not necessary if you are using AT_EMPTY_PATH and don't care about an extra statx(2) call, users that pass full paths into name_to_handle_at(2) need to know which mount the file handle comes from (to make sure they don't try to open_by_handle_at a file handle from a different filesystem) and switching to AT_EMPTY_PATH would require allocating a file for every name_to_handle_at(2) call, turning err = name_to_handle_at(-EBADF, "/foo/bar/baz", &handle, &mntid, AT_HANDLE_MNT_ID_UNIQUE); into int fd = openat(-EBADF, "/foo/bar/baz", O_PATH | O_CLOEXEC); err1 = name_to_handle_at(fd, "", &handle, &unused_mntid, AT_EMPTY_PATH); err2 = statx(fd, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &statxbuf); mntid = statxbuf.stx_mnt_id; close(fd); Reviewed-by: Jeff Layton Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/r/20240828-exportfs-u64-mount-id-v3-2-10c2c4c16708@cyphar.com Reviewed-by: Jan Kara Reviewed-by: Josef Bacik Signed-off-by: Christian Brauner --- include/linux/syscalls.h | 2 +- include/uapi/linux/fcntl.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4bcf6754738d..5758104921e6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -870,7 +870,7 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, #endif asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name, struct file_handle __user *handle, - int __user *mnt_id, int flag); + void __user *mnt_id, int flag); asmlinkage long sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 38a6d66d9e88..87e2dec79fea 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -152,6 +152,7 @@ #define AT_HANDLE_FID 0x200 /* File handle is needed to compare object identity and may not be usable with open_by_handle_at(2). */ +#define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ #if defined(__KERNEL__) #define AT_GETATTR_NOSEC 0x80000000 -- cgit v1.2.3 From 19156263cb1f24128a9ba6ef7340be5cbacc3d22 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 5 Sep 2024 10:14:05 +0300 Subject: dma-mapping: use IOMMU DMA calls for common alloc/free page calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Common alloca and free pages routines are called when IOMMU DMA is used, and internally it calls to DMA ops structure which is not available for default IOMMU. This patch adds necessary if checks to call IOMMU DMA. It fixes the following crash: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000040 Mem abort info: ESR = 0x0000000096000006 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x06: level 2 translation fault Data abort info: ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=00000000d20bb000 [0000000000000040] pgd=08000000d20c1003 , p4d=08000000d20c1003 , pud=08000000d20c2003, pmd=0000000000000000 Internal error: Oops: 0000000096000006 [#1] PREEMPT SMP Modules linked in: ipv6 hci_uart venus_core btqca v4l2_mem2mem btrtl qcom_spmi_adc5 sbs_battery btbcm qcom_vadc_common cros_ec_typec videobuf2_v4l2 leds_cros_ec cros_kbd_led_backlight cros_ec_chardev videodev elan_i2c videobuf2_common qcom_stats mc bluetooth coresight_stm stm_core ecdh_generic ecc pwrseq_core panel_edp icc_bwmon ath10k_snoc ath10k_core ath mac80211 phy_qcom_qmp_combo aux_bridge libarc4 coresight_replicator coresight_etm4x coresight_tmc coresight_funnel cfg80211 rfkill coresight qcom_wdt cbmem ramoops reed_solomon pwm_bl coreboot_table backlight crct10dif_ce CPU: 7 UID: 0 PID: 70 Comm: kworker/u32:4 Not tainted 6.11.0-rc6-next-20240903-00003-gdfc6015d0711 #660 Hardware name: Google Lazor Limozeen without Touchscreen (rev5 - rev8) (DT) Workqueue: events_unbound deferred_probe_work_func hub 2-1:1.0: 4 ports detected pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : dma_common_alloc_pages+0x54/0x1b4 lr : dma_common_alloc_pages+0x4c/0x1b4 sp : ffff8000807d3730 x29: ffff8000807d3730 x28: ffff02a7d312f880 x27: 0000000000000001 x26: 000000000000c000 x25: 0000000000000000 x24: 0000000000000001 x23: ffff02a7d23b6898 x22: 0000000000006cc0 x21: 000000000000c000 x20: ffff02a7858bf410 x19: fffffe0a60006000 x18: 0000000000000001 x17: 00000000000000d5 x16: 1fffe054f0bcc261 x15: 0000000000000001 x14: ffff02a7844dc680 x13: 0000000000100180 x12: dead000000000100 x11: dead000000000122 x10: 00000000001001ff x9 : ffff02a87f7b7b00 x8 : ffff02a87f7b7b00 x7 : ffff405977d6b000 x6 : ffff8000807d3310 x5 : ffff02a87f6b6398 x4 : 0000000000000001 x3 : ffff405977d6b000 x2 : ffff02a7844dc600 x1 : 0000000100000000 x0 : fffffe0a60006000 Call trace: dma_common_alloc_pages+0x54/0x1b4 __dma_alloc_pages+0x68/0x90 dma_alloc_pages+0x10/0x1c snd_dma_noncoherent_alloc+0x28/0x8c __snd_dma_alloc_pages+0x30/0x50 snd_dma_alloc_dir_pages+0x40/0x80 do_alloc_pages+0xb8/0x13c preallocate_pcm_pages+0x6c/0xf8 preallocate_pages+0x160/0x1a4 snd_pcm_set_managed_buffer_all+0x64/0xb0 lpass_platform_pcm_new+0xc0/0xe8 snd_soc_pcm_component_new+0x3c/0xc8 soc_new_pcm+0x4fc/0x668 snd_soc_bind_card+0xabc/0xbac snd_soc_register_card+0xf0/0x108 devm_snd_soc_register_card+0x4c/0xa4 sc7180_snd_platform_probe+0x180/0x224 platform_probe+0x68/0xc0 really_probe+0xbc/0x298 __driver_probe_device+0x78/0x12c driver_probe_device+0x3c/0x15c __device_attach_driver+0xb8/0x134 bus_for_each_drv+0x84/0xe0 __device_attach+0x9c/0x188 device_initial_probe+0x14/0x20 bus_probe_device+0xac/0xb0 deferred_probe_work_func+0x88/0xc0 process_one_work+0x14c/0x28c worker_thread+0x2cc/0x3d4 kthread+0x114/0x118 ret_from_fork+0x10/0x20 Code: f9411c19 940000c9 aa0003f3 b4000460 (f9402326) ---[ end trace 0000000000000000 ]--- Fixes: b5c58b2fdc42 ("dma-mapping: direct calls for dma-iommu") Closes: https://lore.kernel.org/all/10431dfd-ce04-4e0f-973b-c78477303c18@notapiano Reported-by: Nícolas F. R. A. Prado #KernelCI Signed-off-by: Leon Romanovsky Tested-by: Nícolas F. R. A. Prado Signed-off-by: Christoph Hellwig --- include/linux/iommu-dma.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h index d30a58bf00fd..1bb55ca1ab79 100644 --- a/include/linux/iommu-dma.h +++ b/include/linux/iommu-dma.h @@ -10,6 +10,10 @@ #include #ifdef CONFIG_IOMMU_DMA +static inline bool use_dma_iommu(struct device *dev) +{ + return dev->dma_iommu; +} dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs); @@ -49,6 +53,10 @@ void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir); #else +static inline bool use_dma_iommu(struct device *dev) +{ + return false; +} static inline dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) -- cgit v1.2.3 From 59da880afed211c989ef65da577b24215ce57774 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 3 Sep 2024 10:45:58 -0700 Subject: uprobes: get rid of enum uprobe_filter_ctx in uprobe filter callbacks It serves no purpose beyond adding unnecessray argument passed to the filter callback. Just get rid of it, no one is actually using it. Signed-off-by: Andrii Nakryiko Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20240903174603.3554182-4-andrii@kernel.org --- include/linux/uprobes.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index f50df1fa93e7..63ae2ade3487 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -28,20 +28,12 @@ struct page; #define MAX_URETPROBE_DEPTH 64 -enum uprobe_filter_ctx { - UPROBE_FILTER_REGISTER, - UPROBE_FILTER_UNREGISTER, - UPROBE_FILTER_MMAP, -}; - struct uprobe_consumer { int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); int (*ret_handler)(struct uprobe_consumer *self, unsigned long func, struct pt_regs *regs); - bool (*filter)(struct uprobe_consumer *self, - enum uprobe_filter_ctx ctx, - struct mm_struct *mm); + bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm); struct uprobe_consumer *next; }; -- cgit v1.2.3 From cc01bd044e6a521d2cd128f685ee8d23ef0067f2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 3 Sep 2024 10:45:59 -0700 Subject: uprobes: travers uprobe's consumer list locklessly under SRCU protection uprobe->register_rwsem is one of a few big bottlenecks to scalability of uprobes, so we need to get rid of it to improve uprobe performance and multi-CPU scalability. First, we turn uprobe's consumer list to a typical doubly-linked list and utilize existing RCU-aware helpers for traversing such lists, as well as adding and removing elements from it. For entry uprobes we already have SRCU protection active since before uprobe lookup. For uretprobe we keep refcount, guaranteeing that uprobe won't go away from under us, but we add SRCU protection around consumer list traversal. Lastly, to keep handler_chain()'s UPROBE_HANDLER_REMOVE handling simple, we remember whether any removal was requested during handler calls, but then we double-check the decision under a proper register_rwsem using consumers' filter callbacks. Handler removal is very rare, so this extra lock won't hurt performance, overall, but we also avoid the need for any extra protection (e.g., seqcount locks). Signed-off-by: Andrii Nakryiko Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20240903174603.3554182-5-andrii@kernel.org --- include/linux/uprobes.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 63ae2ade3487..f112b56e9479 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -29,13 +29,21 @@ struct page; #define MAX_URETPROBE_DEPTH 64 struct uprobe_consumer { + /* + * handler() can return UPROBE_HANDLER_REMOVE to signal the need to + * unregister uprobe for current process. If UPROBE_HANDLER_REMOVE is + * returned, filter() callback has to be implemented as well and it + * should return false to "confirm" the decision to uninstall uprobe + * for the current process. If filter() is omitted or returns true, + * UPROBE_HANDLER_REMOVE is effectively ignored. + */ int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); int (*ret_handler)(struct uprobe_consumer *self, unsigned long func, struct pt_regs *regs); bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm); - struct uprobe_consumer *next; + struct list_head cons_node; }; #ifdef CONFIG_UPROBES -- cgit v1.2.3 From 04b01625da130c7521b768996cd5e48052198b97 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 3 Sep 2024 10:46:00 -0700 Subject: perf/uprobe: split uprobe_unregister() With uprobe_unregister() having grown a synchronize_srcu(), it becomes fairly slow to call. Esp. since both users of this API call it in a loop. Peel off the sync_srcu() and do it once, after the loop. We also need to add uprobe_unregister_sync() into uprobe_register()'s error handling path, as we need to be careful about returning to the caller before we have a guarantee that partially attached consumer won't be called anymore. This is an unlikely slow path and this should be totally fine to be slow in the case of a failed attach. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: "Peter Zijlstra (Intel)" Co-developed-by: Andrii Nakryiko Signed-off-by: Andrii Nakryiko Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20240903174603.3554182-6-andrii@kernel.org --- include/linux/uprobes.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index f112b56e9479..2b294bf1881f 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -115,7 +115,8 @@ extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); -extern void uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc); +extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc); +extern void uprobe_unregister_sync(void); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void uprobe_start_dup_mmap(void); @@ -164,7 +165,10 @@ uprobe_apply(struct uprobe* uprobe, struct uprobe_consumer *uc, bool add) return -ENOSYS; } static inline void -uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) +uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc) +{ +} +static inline void uprobe_unregister_sync(void) { } static inline int uprobe_mmap(struct vm_area_struct *vma) -- cgit v1.2.3 From 50a38035ed5ccc2ab8a28eaf70c3c7a87e060345 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 3 Sep 2024 10:46:01 -0700 Subject: rbtree: provide rb_find_rcu() / rb_find_add_rcu() Much like latch_tree, add two RCU methods for the regular RB-tree, which can be used in conjunction with a seqcount to provide lockless lookups. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: "Peter Zijlstra (Intel)" Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Reviewed-by: "Masami Hiramatsu (Google)" Link: https://lore.kernel.org/r/20240903174603.3554182-7-andrii@kernel.org --- include/linux/rbtree.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'include') diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index f7edca369eda..7c173aa64e1e 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -244,6 +244,42 @@ rb_find_add(struct rb_node *node, struct rb_root *tree, return NULL; } +/** + * rb_find_add_rcu() - find equivalent @node in @tree, or add @node + * @node: node to look-for / insert + * @tree: tree to search / modify + * @cmp: operator defining the node order + * + * Adds a Store-Release for link_node. + * + * Returns the rb_node matching @node, or NULL when no match is found and @node + * is inserted. + */ +static __always_inline struct rb_node * +rb_find_add_rcu(struct rb_node *node, struct rb_root *tree, + int (*cmp)(struct rb_node *, const struct rb_node *)) +{ + struct rb_node **link = &tree->rb_node; + struct rb_node *parent = NULL; + int c; + + while (*link) { + parent = *link; + c = cmp(node, parent); + + if (c < 0) + link = &parent->rb_left; + else if (c > 0) + link = &parent->rb_right; + else + return parent; + } + + rb_link_node_rcu(node, parent, link); + rb_insert_color(node, tree); + return NULL; +} + /** * rb_find() - find @key in tree @tree * @key: key to match @@ -272,6 +308,37 @@ rb_find(const void *key, const struct rb_root *tree, return NULL; } +/** + * rb_find_rcu() - find @key in tree @tree + * @key: key to match + * @tree: tree to search + * @cmp: operator defining the node order + * + * Notably, tree descent vs concurrent tree rotations is unsound and can result + * in false-negatives. + * + * Returns the rb_node matching @key or NULL. + */ +static __always_inline struct rb_node * +rb_find_rcu(const void *key, const struct rb_root *tree, + int (*cmp)(const void *key, const struct rb_node *)) +{ + struct rb_node *node = tree->rb_node; + + while (node) { + int c = cmp(key, node); + + if (c < 0) + node = rcu_dereference_raw(node->rb_left); + else if (c > 0) + node = rcu_dereference_raw(node->rb_right); + else + return node; + } + + return NULL; +} + /** * rb_find_first() - find the first @key in @tree * @key: key to match -- cgit v1.2.3 From e04e2b760ddbe3d7b283a05898c3a029085cd8cd Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sun, 1 Sep 2024 05:10:52 +0200 Subject: platform/x86: wmi: Pass event data directly to legacy notify handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current legacy WMI handlers are susceptible to picking up wrong WMI event data on systems where different WMI devices share some notification IDs. Prevent this by letting the WMI driver core taking care of retrieving the event data. This also simplifies the legacy WMI handlers and their implementation inside the WMI driver core. Reviewed-by: Ilpo Järvinen Reviewed-by: Hans de Goede Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20240901031055.3030-3-W_Armin@gmx.de Signed-off-by: Hans de Goede --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0687a442fec7..eed105b1fbfb 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -386,7 +386,7 @@ extern bool acpi_is_pnp_device(struct acpi_device *); #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE) -typedef void (*wmi_notify_handler) (u32 value, void *context); +typedef void (*wmi_notify_handler) (union acpi_object *data, void *context); int wmi_instance_count(const char *guid); -- cgit v1.2.3 From 79a56f4c8fa629ac79ada8f4b746195bd91f09c7 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sun, 1 Sep 2024 05:10:53 +0200 Subject: platform/x86: wmi: Remove wmi_get_event_data() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the WMI driver core now takes care of retrieving the WMI event data even for legacy WMI notify handlers, this function is no longer used. Remove it to prevent WMI drivers from messing up the ACPI firmware on some machines. Reviewed-by: Ilpo Järvinen Reviewed-by: Hans de Goede Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20240901031055.3030-4-W_Armin@gmx.de Signed-off-by: Hans de Goede --- include/linux/acpi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index eed105b1fbfb..3cbe4b57bc73 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -401,7 +401,6 @@ extern acpi_status wmi_set_block(const char *guid, u8 instance, extern acpi_status wmi_install_notify_handler(const char *guid, wmi_notify_handler handler, void *data); extern acpi_status wmi_remove_notify_handler(const char *guid); -extern acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out); extern bool wmi_has_guid(const char *guid); extern char *wmi_get_acpi_device_uid(const char *guid); -- cgit v1.2.3 From a4311c274e08001b129bd5ffd2a41dcbd3e0a855 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 29 Aug 2024 20:31:50 +0200 Subject: kunit: Fix kernel-doc for EXPORT_SYMBOL_IF_KUNIT While kunit/visibility.h is today not included in any generated kernel documentation, also likely due to the fact that none of the existing comments are correctly recognized as kernel-doc, but once we decide to add this header and fix the tool, there will be: ../include/kunit/visibility.h:61: warning: Function parameter or struct member 'symbol' not described in 'EXPORT_SYMBOL_IF_KUNIT' Signed-off-by: Michal Wajdeczko Reviewed-by: Rae Moar Acked-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/visibility.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/kunit/visibility.h b/include/kunit/visibility.h index 0dfe35feeec6..efff77b58dd6 100644 --- a/include/kunit/visibility.h +++ b/include/kunit/visibility.h @@ -22,6 +22,7 @@ * EXPORTED_FOR_KUNIT_TESTING namespace only if CONFIG_KUNIT is * enabled. Must use MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING) * in test file in order to use symbols. + * @symbol: the symbol identifier to export */ #define EXPORT_SYMBOL_IF_KUNIT(symbol) EXPORT_SYMBOL_NS(symbol, \ EXPORTED_FOR_KUNIT_TESTING) -- cgit v1.2.3 From 3e6a7e3cda773adacc2fa4b9623d0a8c0f904d50 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 27 Aug 2024 09:59:38 -0700 Subject: iommufd: Reorder struct forward declarations Reorder struct forward declarations to alphabetic order to simplify maintenance, as upcoming patches will add more to the list. No functional change intended. Link: https://patch.msgid.link/r/c5dd87100f6f01389b838c63237e28c5dd373358.1724776335.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index c2f2f6b9148e..30f832a60ccb 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -11,12 +11,12 @@ #include struct device; -struct iommufd_device; -struct page; -struct iommufd_ctx; -struct iommufd_access; struct file; struct iommu_group; +struct iommufd_access; +struct iommufd_ctx; +struct iommufd_device; +struct page; struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); -- cgit v1.2.3 From 6ff4cd1160afafc12ad1603e3d2f39256e4b708d Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Tue, 27 Aug 2024 10:45:15 +0800 Subject: lib/string_choices: Add str_true_false()/str_false_true() helper Add str_true_false()/str_false_true() helper to return "true" or "false" string literal. Signed-off-by: Hongbo Li Link: https://lore.kernel.org/r/20240827024517.914100-2-lihongbo22@huawei.com Signed-off-by: Kees Cook --- include/linux/string_choices.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index 1320bcdcb89c..ebcc56b28ede 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -48,6 +48,12 @@ static inline const char *str_up_down(bool v) } #define str_down_up(v) str_up_down(!(v)) +static inline const char *str_true_false(bool v) +{ + return v ? "true" : "false"; +} +#define str_false_true(v) str_true_false(!(v)) + /** * str_plural - Return the simple pluralization based on English counts * @num: Number used for deciding pluralization -- cgit v1.2.3 From c2708ba91c3c1fba424b77de83b6fc45cbf38c46 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 5 Sep 2024 17:25:39 +0800 Subject: lib/string_choices: Introduce several opposite string choice helpers Similar to the exists helper: str_enable_disable/ str_enabled_disabled/str_on_off/str_yes_no helpers, we can add the opposite helpers. That's str_disable_enable, str_disabled_enabled, str_off_on and str_no_yes. There are more than 10 cases currently (expect str_disable_enable now has 3 use cases) exist in the code can be replaced with these helper. Signed-off-by: Hongbo Li Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240905092540.2962122-2-lihongbo22@huawei.com Signed-off-by: Kees Cook --- include/linux/string_choices.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index ebcc56b28ede..fd067992260a 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -8,11 +8,13 @@ static inline const char *str_enable_disable(bool v) { return v ? "enable" : "disable"; } +#define str_disable_enable(v) str_enable_disable(!(v)) static inline const char *str_enabled_disabled(bool v) { return v ? "enabled" : "disabled"; } +#define str_disabled_enabled(v) str_enabled_disabled(!(v)) static inline const char *str_hi_lo(bool v) { @@ -36,11 +38,13 @@ static inline const char *str_on_off(bool v) { return v ? "on" : "off"; } +#define str_off_on(v) str_on_off(!(v)) static inline const char *str_yes_no(bool v) { return v ? "yes" : "no"; } +#define str_no_yes(v) str_yes_no(!(v)) static inline const char *str_up_down(bool v) { -- cgit v1.2.3 From c121d5cc3a993cdbfab46a152bdd50227a4d5e8c Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 5 Sep 2024 17:25:40 +0800 Subject: lib/string_choices: Add some comments to make more clear for string choices helpers. Add some comments to explain why we should use string_choices helpers. Signed-off-by: Hongbo Li Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240905092540.2962122-3-lihongbo22@huawei.com Signed-off-by: Kees Cook --- include/linux/string_choices.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index fd067992260a..120ca0f28e95 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -2,6 +2,19 @@ #ifndef _LINUX_STRING_CHOICES_H_ #define _LINUX_STRING_CHOICES_H_ +/* + * Here provide a series of helpers in the str_$TRUE_$FALSE format (you can + * also expand some helpers as needed), where $TRUE and $FALSE are their + * corresponding literal strings. These helpers can be used in the printing + * and also in other places where constant strings are required. Using these + * helpers offers the following benefits: + * 1) Reducing the hardcoding of strings, which makes the code more elegant + * through these simple literal-meaning helpers. + * 2) Unifying the output, which prevents the same string from being printed + * in various forms, such as enable/disable, enabled/disabled, en/dis. + * 3) Deduping by the linker, which results in a smaller binary file. + */ + #include static inline const char *str_enable_disable(bool v) -- cgit v1.2.3 From 6fe0593bfc3cfab8b4ec7255152a2be40b2e49a3 Mon Sep 17 00:00:00 2001 From: Erling Ljunggren Date: Wed, 28 Sep 2022 13:21:43 +0200 Subject: media: videodev2.h: add V4L2_CAP_EDID Add capability flag to indicate that the device is an EDID-only device. Signed-off-by: Erling Ljunggren Signed-off-by: Hans Verkuil Reviewed-by: Sebastian Fricke Reviewed-by: Ricardo Ribalda Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 725e86c4bbbd..27239cb64065 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -502,6 +502,7 @@ struct v4l2_capability { #define V4L2_CAP_META_CAPTURE 0x00800000 /* Is a metadata capture device */ #define V4L2_CAP_READWRITE 0x01000000 /* read/write systemcalls */ +#define V4L2_CAP_EDID 0x02000000 /* Is an EDID-only device */ #define V4L2_CAP_STREAMING 0x04000000 /* streaming I/O ioctls */ #define V4L2_CAP_META_OUTPUT 0x08000000 /* Is a metadata output device */ -- cgit v1.2.3 From c7a29258737076a7caf9ced6a7d710cce890abe5 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 12 Aug 2020 14:20:10 +0200 Subject: media: input: serio.h: add SERIO_EXTRON_DA_HD_PLUS Add a new serio ID for the Extron DA HD 4K Plus series of 4K HDMI Distribution Amplifiers. These devices support CEC over the serial port, so a new serio ID is needed to be able to associate the CEC driver. Signed-off-by: Hans Verkuil Acked-by: Dmitry Torokhov Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/serio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/serio.h b/include/uapi/linux/serio.h index ed2a96f43ce4..5a2af0942c9f 100644 --- a/include/uapi/linux/serio.h +++ b/include/uapi/linux/serio.h @@ -83,5 +83,6 @@ #define SERIO_PULSE8_CEC 0x40 #define SERIO_RAINSHADOW_CEC 0x41 #define SERIO_FSIA6B 0x42 +#define SERIO_EXTRON_DA_HD_4K_PLUS 0x43 #endif /* _UAPI_SERIO_H */ -- cgit v1.2.3 From 6bb8ef90c444c2fd97208bfce42137c4add32bbb Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 2 Aug 2024 12:19:48 +0200 Subject: media: cec: move cec_get/put_device to header Move cec_get/put_device to the media/cec.h header. This allows CEC drivers to use this. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/cec.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/media/cec.h b/include/media/cec.h index 07d2ee8a3904..16b412b3131b 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -298,6 +298,37 @@ struct cec_adapter { char input_phys[40]; }; +static inline int cec_get_device(struct cec_adapter *adap) +{ + struct cec_devnode *devnode = &adap->devnode; + + /* + * Check if the cec device is available. This needs to be done with + * the devnode->lock held to prevent an open/unregister race: + * without the lock, the device could be unregistered and freed between + * the devnode->registered check and get_device() calls, leading to + * a crash. + */ + mutex_lock(&devnode->lock); + /* + * return ENODEV if the cec device has been removed + * already or if it is not registered anymore. + */ + if (!devnode->registered) { + mutex_unlock(&devnode->lock); + return -ENODEV; + } + /* and increase the device refcount */ + get_device(&devnode->dev); + mutex_unlock(&devnode->lock); + return 0; +} + +static inline void cec_put_device(struct cec_adapter *adap) +{ + put_device(&adap->devnode.dev); +} + static inline void *cec_get_drvdata(const struct cec_adapter *adap) { return adap->priv; -- cgit v1.2.3 From 1ae497c78f01855f3695b58481311ffdd429b028 Mon Sep 17 00:00:00 2001 From: Shung-Hsi Yu Date: Thu, 5 Sep 2024 13:52:32 +0800 Subject: bpf: use type_may_be_null() helper for nullable-param check Commit 980ca8ceeae6 ("bpf: check bpf_dummy_struct_ops program params for test runs") does bitwise AND between reg_type and PTR_MAYBE_NULL, which is correct, but due to type difference the compiler complains: net/bpf/bpf_dummy_struct_ops.c:118:31: warning: bitwise operation between different enumeration types ('const enum bpf_reg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 118 | if (info && (info->reg_type & PTR_MAYBE_NULL)) | ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~ Workaround the warning by moving the type_may_be_null() helper from verifier.c into bpf_verifier.h, and reuse it here to check whether param is nullable. Fixes: 980ca8ceeae6 ("bpf: check bpf_dummy_struct_ops program params for test runs") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202404241956.HEiRYwWq-lkp@intel.com/ Signed-off-by: Shung-Hsi Yu Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240905055233.70203-1-shung-hsi.yu@suse.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 8458632824a4..4513372c5bc8 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -927,6 +927,11 @@ static inline bool type_is_sk_pointer(enum bpf_reg_type type) type == PTR_TO_XDP_SOCK; } +static inline bool type_may_be_null(u32 type) +{ + return type & PTR_MAYBE_NULL; +} + static inline void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) { env->scratched_regs |= 1U << regno; -- cgit v1.2.3 From 12cb32a52eb607dc4d0e45fe6f4cf946d08da0fd Mon Sep 17 00:00:00 2001 From: David Gow Date: Thu, 5 Sep 2024 10:47:55 +0800 Subject: kunit: Fix missing kerneldoc comment Add a missing kerneldoc comment for the 'test' test context parameter, fixing the following warning: include/kunit/test.h:492: warning: Function parameter or struct member 'test' not described in 'kunit_kfree_const' Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/lkml/20240827160631.67e121ed@canb.auug.org.au/ Fixes: f2c6dbd22017 ("kunit: Device wrappers should also manage driver name") Signed-off-by: David Gow Reviewed-by: Kees Cook Signed-off-by: Shuah Khan --- include/kunit/test.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index 5ac237c949a0..34b71e42fb10 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -484,6 +484,7 @@ static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp /** * kunit_kfree_const() - conditionally free test managed memory + * @test: The test context object. * @x: pointer to the memory * * Calls kunit_kfree() only if @x is not in .rodata section. -- cgit v1.2.3 From 706ae6446494b4523d33b0d96ea1fd9d50ca9be6 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Wed, 4 Sep 2024 14:41:07 +0300 Subject: clk: fixed-rate: add devm_clk_hw_register_fixed_rate_parent_data() Add devm_clk_hw_register_fixed_rate_parent_data(), devres-managed helper to register fixed-rate clock with parent_data. Signed-off-by: Nikita Shubin Link: https://lore.kernel.org/r/20240904-devm_clk_hw_register_fixed_rate_parent_data-v1-1-7f14d6b456e5@maquefel.me Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 4a537260f655..7e43caabb54b 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -393,6 +393,20 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, #define devm_clk_hw_register_fixed_rate(dev, name, parent_name, flags, fixed_rate) \ __clk_hw_register_fixed_rate((dev), NULL, (name), (parent_name), NULL, \ NULL, (flags), (fixed_rate), 0, 0, true) +/** + * devm_clk_hw_register_fixed_rate_parent_data - register fixed-rate clock with + * the clock framework + * @dev: device that is registering this clock + * @name: name of this clock + * @parent_data: parent clk data + * @flags: framework-specific flags + * @fixed_rate: non-adjustable clock rate + */ +#define devm_clk_hw_register_fixed_rate_parent_data(dev, name, parent_data, flags, \ + fixed_rate) \ + __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, NULL, \ + (parent_data), (flags), (fixed_rate), 0, \ + 0, true) /** * clk_hw_register_fixed_rate_parent_hw - register fixed-rate clock with * the clock framework -- cgit v1.2.3 From 9934a1bd45b2b03f6d1204a6ae2780d3b009799f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 5 Aug 2024 10:57:31 +0200 Subject: clk: provide devm_clk_get_optional_enabled_with_rate() There are clock users in the kernel that can't use devm_clk_get_optional_enabled() as they need to set rate after getting the clock and before enabling it. Provide a managed helper that wraps these operations in the correct order. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20240805-clk-new-helper-v2-1-e5fdd1e1d729@linaro.org Signed-off-by: Stephen Boyd --- include/linux/clk.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/linux/clk.h b/include/linux/clk.h index 0fa56d672532..851a0f2cf42c 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -640,6 +640,32 @@ struct clk *devm_clk_get_optional_prepared(struct device *dev, const char *id); */ struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id); +/** + * devm_clk_get_optional_enabled_with_rate - devm_clk_get_optional() + + * clk_set_rate() + + * clk_prepare_enable() + * @dev: device for clock "consumer" + * @id: clock consumer ID + * @rate: new clock rate + * + * Context: May sleep. + * + * Return: a struct clk corresponding to the clock producer, or + * valid IS_ERR() condition containing errno. The implementation + * uses @dev and @id to determine the clock consumer, and thereby + * the clock producer. If no such clk is found, it returns NULL + * which serves as a dummy clk. That's the only difference compared + * to devm_clk_get_enabled(). + * + * The returned clk (if valid) is prepared and enabled and rate was set. + * + * The clock will automatically be disabled, unprepared and freed + * when the device is unbound from the bus. + */ +struct clk *devm_clk_get_optional_enabled_with_rate(struct device *dev, + const char *id, + unsigned long rate); + /** * devm_get_clk_from_child - lookup and obtain a managed reference to a * clock producer from child node. @@ -982,6 +1008,13 @@ static inline struct clk *devm_clk_get_optional_enabled(struct device *dev, return NULL; } +static inline struct clk * +devm_clk_get_optional_enabled_with_rate(struct device *dev, const char *id, + unsigned long rate) +{ + return NULL; +} + static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, struct clk_bulk_data *clks) { -- cgit v1.2.3 From 08062af0a52107a243f7608fd972edb54ca5b7f8 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 4 Sep 2024 15:34:30 +0000 Subject: net: napi: Prevent overflow of napi_defer_hard_irqs In commit 6f8b12d661d0 ("net: napi: add hard irqs deferral feature") napi_defer_irqs was added to net_device and napi_defer_irqs_count was added to napi_struct, both as type int. This value never goes below zero, so there is not reason for it to be a signed int. Change the type for both from int to u32, and add an overflow check to sysfs to limit the value to S32_MAX. The limit of S32_MAX was chosen because the practical limit before this patch was S32_MAX (anything larger was an overflow) and thus there are no behavioral changes introduced. If the extra bit is needed in the future, the limit can be raised. Before this patch: $ sudo bash -c 'echo 2147483649 > /sys/class/net/eth4/napi_defer_hard_irqs' $ cat /sys/class/net/eth4/napi_defer_hard_irqs -2147483647 After this patch: $ sudo bash -c 'echo 2147483649 > /sys/class/net/eth4/napi_defer_hard_irqs' bash: line 0: echo: write error: Numerical result out of range Similarly, /sys/class/net/XXXXX/tx_queue_len is defined as unsigned: include/linux/netdevice.h: unsigned int tx_queue_len; And has an overflow check: dev_change_tx_queue_len(..., unsigned long new_len): if (new_len != (unsigned int)new_len) return -ERANGE; Suggested-by: Jakub Kicinski Signed-off-by: Joe Damato Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240904153431.307932-1-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ca5f0dda733b..b47c00657bd0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -356,7 +356,7 @@ struct napi_struct { unsigned long state; int weight; - int defer_hard_irqs_count; + u32 defer_hard_irqs_count; unsigned long gro_bitmask; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL @@ -2075,7 +2075,7 @@ struct net_device { unsigned int real_num_rx_queues; struct netdev_rx_queue *_rx; unsigned long gro_flush_timeout; - int napi_defer_hard_irqs; + u32 napi_defer_hard_irqs; unsigned int gro_max_size; unsigned int gro_ipv4_max_size; rx_handler_func_t __rcu *rx_handler; -- cgit v1.2.3 From de35996d4b364749194c5b473d1912578880833e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 4 Aug 2024 18:47:08 -0700 Subject: Input: matrix_keypad - remove support for platform data There are no more users of struct matrix_keypad_platform_data in the kernel, remove support for it from the driver. Acked-by: Arnd Bergmann Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240805014710.1961677-6-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- include/linux/input/matrix_keypad.h | 48 ------------------------------------- 1 file changed, 48 deletions(-) (limited to 'include') diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h index b8d8d69eba29..90867f44ab4d 100644 --- a/include/linux/input/matrix_keypad.h +++ b/include/linux/input/matrix_keypad.h @@ -34,52 +34,6 @@ struct matrix_keymap_data { unsigned int keymap_size; }; -/** - * struct matrix_keypad_platform_data - platform-dependent keypad data - * @keymap_data: pointer to &matrix_keymap_data - * @row_gpios: pointer to array of gpio numbers representing rows - * @col_gpios: pointer to array of gpio numbers reporesenting colums - * @num_row_gpios: actual number of row gpios used by device - * @num_col_gpios: actual number of col gpios used by device - * @col_scan_delay_us: delay, measured in microseconds, that is - * needed before we can keypad after activating column gpio - * @debounce_ms: debounce interval in milliseconds - * @clustered_irq: may be specified if interrupts of all row/column GPIOs - * are bundled to one single irq - * @clustered_irq_flags: flags that are needed for the clustered irq - * @active_low: gpio polarity - * @wakeup: controls whether the device should be set up as wakeup - * source - * @no_autorepeat: disable key autorepeat - * @drive_inactive_cols: drive inactive columns during scan, rather than - * making them inputs. - * - * This structure represents platform-specific data that use used by - * matrix_keypad driver to perform proper initialization. - */ -struct matrix_keypad_platform_data { - const struct matrix_keymap_data *keymap_data; - - const unsigned int *row_gpios; - const unsigned int *col_gpios; - - unsigned int num_row_gpios; - unsigned int num_col_gpios; - - unsigned int col_scan_delay_us; - - /* key debounce interval in milli-second */ - unsigned int debounce_ms; - - unsigned int clustered_irq; - unsigned int clustered_irq_flags; - - bool active_low; - bool wakeup; - bool no_autorepeat; - bool drive_inactive_cols; -}; - int matrix_keypad_build_keymap(const struct matrix_keymap_data *keymap_data, const char *keymap_name, unsigned int rows, unsigned int cols, @@ -88,6 +42,4 @@ int matrix_keypad_build_keymap(const struct matrix_keymap_data *keymap_data, int matrix_keypad_parse_properties(struct device *dev, unsigned int *rows, unsigned int *cols); -#define matrix_keypad_parse_of_params matrix_keypad_parse_properties - #endif /* _MATRIX_KEYPAD_H */ -- cgit v1.2.3 From f820b43754cdbd078a0d17122b281f42cbcd2155 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 23 Aug 2024 22:50:27 -0700 Subject: Input: zforce_ts - remove support for platfrom data There are no in-tree users of platform data and any new ones should either use device tree or static device properties, so let's remove platform data support. Tested-by: Andreas Kemnade # Tolino Shine2HD Link: https://lore.kernel.org/r/20240824055047.1706392-4-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- include/linux/platform_data/zforce_ts.h | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 include/linux/platform_data/zforce_ts.h (limited to 'include') diff --git a/include/linux/platform_data/zforce_ts.h b/include/linux/platform_data/zforce_ts.h deleted file mode 100644 index 2463a4a856a6..000000000000 --- a/include/linux/platform_data/zforce_ts.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* drivers/input/touchscreen/zforce.c - * - * Copyright (C) 2012-2013 MundoReader S.L. - */ - -#ifndef _LINUX_INPUT_ZFORCE_TS_H -#define _LINUX_INPUT_ZFORCE_TS_H - -struct zforce_ts_platdata { - unsigned int x_max; - unsigned int y_max; -}; - -#endif /* _LINUX_INPUT_ZFORCE_TS_H */ -- cgit v1.2.3 From 090624b7dc8389a516df3adb447a25dc0723adfe Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 5 Sep 2024 16:12:52 +0200 Subject: ALSA: pcm: add more sample rate definitions This adds a sample rate definition for 12kHz, 24kHz and 128kHz. Admittedly, just a few drivers are currently using these sample rates but there is enough of a recurrence to justify adding a definition for them and remove some custom rate constraint code while at it. The new definitions are not added to the interval definitions, such as SNDRV_PCM_RATE_8000_44100, because it would silently add new supported rates to drivers that may or may not support them. For sure the drivers have not been tested for these new rates so it is better to leave them out of interval definitions. That being said, the added rates are multiples of well know rates families, it is very likely that a lot of devices out there actually supports them. Signed-off-by: Jerome Brunet Reviewed-by: David Rhodes Acked-by: Mark Brown Reviewed-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240905-alsa-12-24-128-v1-1-8371948d3921@baylibre.com --- include/sound/pcm.h | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 732121b934fd..c993350975a9 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -109,20 +109,23 @@ struct snd_pcm_ops { #define SNDRV_PCM_RATE_5512 (1U<<0) /* 5512Hz */ #define SNDRV_PCM_RATE_8000 (1U<<1) /* 8000Hz */ #define SNDRV_PCM_RATE_11025 (1U<<2) /* 11025Hz */ -#define SNDRV_PCM_RATE_16000 (1U<<3) /* 16000Hz */ -#define SNDRV_PCM_RATE_22050 (1U<<4) /* 22050Hz */ -#define SNDRV_PCM_RATE_32000 (1U<<5) /* 32000Hz */ -#define SNDRV_PCM_RATE_44100 (1U<<6) /* 44100Hz */ -#define SNDRV_PCM_RATE_48000 (1U<<7) /* 48000Hz */ -#define SNDRV_PCM_RATE_64000 (1U<<8) /* 64000Hz */ -#define SNDRV_PCM_RATE_88200 (1U<<9) /* 88200Hz */ -#define SNDRV_PCM_RATE_96000 (1U<<10) /* 96000Hz */ -#define SNDRV_PCM_RATE_176400 (1U<<11) /* 176400Hz */ -#define SNDRV_PCM_RATE_192000 (1U<<12) /* 192000Hz */ -#define SNDRV_PCM_RATE_352800 (1U<<13) /* 352800Hz */ -#define SNDRV_PCM_RATE_384000 (1U<<14) /* 384000Hz */ -#define SNDRV_PCM_RATE_705600 (1U<<15) /* 705600Hz */ -#define SNDRV_PCM_RATE_768000 (1U<<16) /* 768000Hz */ +#define SNDRV_PCM_RATE_12000 (1U<<3) /* 12000Hz */ +#define SNDRV_PCM_RATE_16000 (1U<<4) /* 16000Hz */ +#define SNDRV_PCM_RATE_22050 (1U<<5) /* 22050Hz */ +#define SNDRV_PCM_RATE_24000 (1U<<6) /* 24000Hz */ +#define SNDRV_PCM_RATE_32000 (1U<<7) /* 32000Hz */ +#define SNDRV_PCM_RATE_44100 (1U<<8) /* 44100Hz */ +#define SNDRV_PCM_RATE_48000 (1U<<9) /* 48000Hz */ +#define SNDRV_PCM_RATE_64000 (1U<<10) /* 64000Hz */ +#define SNDRV_PCM_RATE_88200 (1U<<11) /* 88200Hz */ +#define SNDRV_PCM_RATE_96000 (1U<<12) /* 96000Hz */ +#define SNDRV_PCM_RATE_128000 (1U<<13) /* 128000Hz */ +#define SNDRV_PCM_RATE_176400 (1U<<14) /* 176400Hz */ +#define SNDRV_PCM_RATE_192000 (1U<<15) /* 192000Hz */ +#define SNDRV_PCM_RATE_352800 (1U<<16) /* 352800Hz */ +#define SNDRV_PCM_RATE_384000 (1U<<17) /* 384000Hz */ +#define SNDRV_PCM_RATE_705600 (1U<<18) /* 705600Hz */ +#define SNDRV_PCM_RATE_768000 (1U<<19) /* 768000Hz */ #define SNDRV_PCM_RATE_CONTINUOUS (1U<<30) /* continuous range */ #define SNDRV_PCM_RATE_KNOT (1U<<31) /* supports more non-continuous rates */ -- cgit v1.2.3 From 23de126f9248a2f9218175ea0dd1c1403e334440 Mon Sep 17 00:00:00 2001 From: Pieter Van Trappen Date: Wed, 4 Sep 2024 08:27:42 +0200 Subject: net: dsa: microchip: replace unclear KSZ8830 strings Replace ksz8830 with ksz88x3 for CHIP_ID definition and other strings. This due to KSZ8830 not being an actual switch but the Chip ID shared among KSZ8863/8873 switches, impossible to differentiate from their Chip ID or Revision ID registers. Now all KSZ*_CHIP_ID macros refer to actual, existing switches which removes confusion. Signed-off-by: Pieter Van Trappen Acked-by: Arun Ramadoss Signed-off-by: David S. Miller --- include/linux/platform_data/microchip-ksz.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h index d074019474f5..2ee1a679e592 100644 --- a/include/linux/platform_data/microchip-ksz.h +++ b/include/linux/platform_data/microchip-ksz.h @@ -27,7 +27,7 @@ enum ksz_chip_id { KSZ8795_CHIP_ID = 0x8795, KSZ8794_CHIP_ID = 0x8794, KSZ8765_CHIP_ID = 0x8765, - KSZ8830_CHIP_ID = 0x8830, + KSZ88X3_CHIP_ID = 0x8830, KSZ8864_CHIP_ID = 0x8864, KSZ8895_CHIP_ID = 0x8895, KSZ9477_CHIP_ID = 0x00947700, -- cgit v1.2.3 From 62c16f219a73c237b5bef9bd160e32fbb38794f9 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Fri, 6 Sep 2024 12:14:22 +0530 Subject: wifi: cfg80211: move DFS related members to links[] in wireless_dev A few members related to DFS handling are currently under per wireless device data structure. However, in order to support DFS with MLO, there is a need to have them on a per-link manner. Hence, as a preliminary step, move members cac_started, cac_start_time and cac_time_ms to be on a per-link basis. Since currently, link ID is not known at all places, use default value of 0 for now. Signed-off-by: Aditya Kumar Singh Link: https://patch.msgid.link/20240906064426.2101315-5-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 192d72c8b465..727a94a6b7c3 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6194,9 +6194,6 @@ enum ieee80211_ap_reg_power { * @address: The address for this device, valid only if @netdev is %NULL * @is_running: true if this is a non-netdev device that has been started, e.g. * the P2P Device. - * @cac_started: true if DFS channel availability check has been started - * @cac_start_time: timestamp (jiffies) when the dfs state was entered. - * @cac_time_ms: CAC time in ms * @ps: powersave mode is enabled * @ps_timeout: dynamic powersave timeout * @ap_unexpected_nlportid: (private) netlink port ID of application @@ -6220,6 +6217,11 @@ enum ieee80211_ap_reg_power { * unprotected beacon report * @links: array of %IEEE80211_MLD_MAX_NUM_LINKS elements containing @addr * @ap and @client for each link + * @links[].cac_started: true if DFS channel availability check has been + * started + * @links[].cac_start_time: timestamp (jiffies) when the dfs state was + * entered. + * @links[].cac_time_ms: CAC time in ms * @valid_links: bitmap describing what elements of @links are valid */ struct wireless_dev { @@ -6261,11 +6263,6 @@ struct wireless_dev { u32 owner_nlportid; bool nl_owner_dead; - /* FIXME: need to rework radar detection for MLO */ - bool cac_started; - unsigned long cac_start_time; - unsigned int cac_time_ms; - #ifdef CONFIG_CFG80211_WEXT /* wext data */ struct { @@ -6332,6 +6329,10 @@ struct wireless_dev { struct cfg80211_internal_bss *current_bss; } client; }; + + bool cac_started; + unsigned long cac_start_time; + unsigned int cac_time_ms; } links[IEEE80211_MLD_MAX_NUM_LINKS]; u16 valid_links; }; -- cgit v1.2.3 From 81f67d60ebf290da068af96ad0c4a4e4faebdf1d Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Fri, 6 Sep 2024 12:14:23 +0530 Subject: wifi: cfg80211: handle DFS per link Currently, during starting a radar detection, no link id information is parsed and passed down. In order to support starting radar detection during Multi Link Operation, it is required to pass link id as well. Add changes to first parse and then pass link id in the start radar detection path. Additionally, update notification APIs to allow drivers/mac80211 to pass the link ID. However, everything is handled at link 0 only until all API's are ready to handle it per link. Signed-off-by: Aditya Kumar Singh Link: https://patch.msgid.link/20240906064426.2101315-6-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 727a94a6b7c3..efdea667cb92 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4837,9 +4837,9 @@ struct cfg80211_ops { int (*start_radar_detection)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_chan_def *chandef, - u32 cac_time_ms); + u32 cac_time_ms, int link_id); void (*end_cac)(struct wiphy *wiphy, - struct net_device *dev); + struct net_device *dev, unsigned int link_id); int (*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_update_ft_ies_params *ftie); int (*crit_proto_start)(struct wiphy *wiphy, @@ -8741,6 +8741,7 @@ void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac, * @chandef: chandef for the current channel * @event: type of event * @gfp: context flags + * @link_id: valid link_id for MLO operation or 0 otherwise. * * This function is called when a Channel availability check (CAC) is finished * or aborted. This must be called to notify the completion of a CAC process, @@ -8748,7 +8749,8 @@ void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac, */ void cfg80211_cac_event(struct net_device *netdev, const struct cfg80211_chan_def *chandef, - enum nl80211_radar_event event, gfp_t gfp); + enum nl80211_radar_event event, gfp_t gfp, + unsigned int link_id); /** * cfg80211_background_cac_abort - Channel Availability Check offchan abort event -- cgit v1.2.3 From bca8bc0399ac2efd56e6adbed0307e10125a556c Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Fri, 6 Sep 2024 12:14:26 +0530 Subject: wifi: mac80211: handle ieee80211_radar_detected() for MLO Currently DFS works under assumption there could be only one channel context in the hardware. Hence, drivers just calls the function ieee80211_radar_detected() passing the hardware structure. However, with MLO, this obviously will not work since number of channel contexts will be more than one and hence drivers would need to pass the channel information as well on which the radar is detected. Also, when radar is detected in one of the links, other link's CAC should not be cancelled. Hence, in order to support DFS with MLO, do the following changes - * Add channel context conf pointer as an argument to the function ieee80211_radar_detected(). During MLO, drivers would have to pass on which channel context conf radar is detected. Otherwise, drivers could just pass NULL. * ieee80211_radar_detected() will iterate over all channel contexts present and * if channel context conf is passed, only mark that as radar detected * if NULL is passed, then mark all channel contexts as radar detected * Then as usual, schedule the radar detected work. * In the worker, go over all the contexts again and for all such context which is marked with radar detected, cancel the ongoing CAC by calling ieee80211_dfs_cac_cancel() and then notify cfg80211 via cfg80211_radar_event(). * To cancel the CAC, pass the channel context as well where radar is detected to ieee80211_dfs_cac_cancel(). This ensures that CAC is canceled only on the links using the provided context, leaving other links unaffected. This would also help in scenarios where there is split phy 5 GHz radio, which is capable of DFS channels in both lower and upper band. In this case, simultaneous radars can be detected. Signed-off-by: Aditya Kumar Singh Link: https://patch.msgid.link/20240906064426.2101315-9-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index adfec877f392..954dff901b69 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6748,8 +6748,11 @@ void ieee80211_cqm_beacon_loss_notify(struct ieee80211_vif *vif, gfp_t gfp); * ieee80211_radar_detected - inform that a radar was detected * * @hw: pointer as obtained from ieee80211_alloc_hw() + * @chanctx_conf: Channel context on which radar is detected. Mandatory to + * pass a valid pointer during MLO. For non-MLO %NULL can be passed */ -void ieee80211_radar_detected(struct ieee80211_hw *hw); +void ieee80211_radar_detected(struct ieee80211_hw *hw, + struct ieee80211_chanctx_conf *chanctx_conf); /** * ieee80211_chswitch_done - Complete channel switch process -- cgit v1.2.3 From 56bd72e9cd8272687aa2a8eccddabc5526cd7845 Mon Sep 17 00:00:00 2001 From: Marek Maslanka Date: Mon, 12 Aug 2024 18:41:42 +0000 Subject: clocksource: acpi_pm: Add external callback for suspend/resume Provides the capability to register an external callback for the ACPI PM timer, which is called during the suspend and resume processes. Signed-off-by: Marek Maslanka Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240812184150.1079924-1-mmaslanka@google.com Signed-off-by: Daniel Lezcano --- include/linux/acpi_pmtmr.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h index 50d88bf1498d..0ded9220d379 100644 --- a/include/linux/acpi_pmtmr.h +++ b/include/linux/acpi_pmtmr.h @@ -26,6 +26,19 @@ static inline u32 acpi_pm_read_early(void) return acpi_pm_read_verified() & ACPI_PM_MASK; } +/** + * Register callback for suspend and resume event + * + * @cb Callback triggered on suspend and resume + * @data Data passed with the callback + */ +void acpi_pmtmr_register_suspend_resume_callback(void (*cb)(void *data, bool suspend), void *data); + +/** + * Remove registered callback for suspend and resume event + */ +void acpi_pmtmr_unregister_suspend_resume_callback(void); + #else static inline u32 acpi_pm_read_early(void) -- cgit v1.2.3 From 4849b2f78020cf0e3ba67777aadd07c620c91811 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 5 Sep 2024 05:14:29 +0000 Subject: ASoC: makes rtd->initialized bit field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rtd->initialized is used to know whether soc_init_pcm_runtime() was correctly fined, and used to call snd_soc_link_exit(). We don't need to have it as bool, let's make it bit-field same as other flags. Signed-off-by: Kuninori Morimoto Cc: Amadeusz Sławiński Cc: Cezary Rojewski Link: https://patch.msgid.link/87o752k7gq.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 81ef380b2e06..e6e359c1a2ac 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1207,8 +1207,7 @@ struct snd_soc_pcm_runtime { /* bit field */ unsigned int pop_wait:1; unsigned int fe_compr:1; /* for Dynamic PCM */ - - bool initialized; + unsigned int initialized:1; /* CPU/Codec/Platform */ int num_components; -- cgit v1.2.3 From e49dacc71ec2621ce4c422cd5605d4d06f7807b0 Mon Sep 17 00:00:00 2001 From: Wouter Verhelst Date: Mon, 12 Aug 2024 15:20:37 +0200 Subject: nbd: implement the WRITE_ZEROES command The NBD protocol defines a message for zeroing out a region of an export Add support to the kernel driver for that message. Signed-off-by: Wouter Verhelst Cc: Eric Blake Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240812133032.115134-3-w@uter.be Signed-off-by: Jens Axboe --- include/uapi/linux/nbd.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index d75215f2c675..f1d468acfb25 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -42,8 +42,9 @@ enum { NBD_CMD_WRITE = 1, NBD_CMD_DISC = 2, NBD_CMD_FLUSH = 3, - NBD_CMD_TRIM = 4 + NBD_CMD_TRIM = 4, /* userspace defines additional extension commands */ + NBD_CMD_WRITE_ZEROES = 6, }; /* values for flags field, these are server interaction specific. */ @@ -53,11 +54,13 @@ enum { #define NBD_FLAG_SEND_FUA (1 << 3) /* send FUA (forced unit access) */ #define NBD_FLAG_ROTATIONAL (1 << 4) /* device is rotational */ #define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* supports WRITE_ZEROES */ /* there is a gap here to match userspace */ #define NBD_FLAG_CAN_MULTI_CONN (1 << 8) /* Server supports multiple connections per export. */ /* values for cmd flags in the upper 16 bits of request type */ #define NBD_CMD_FLAG_FUA (1 << 16) /* FUA (forced unit access) op */ +#define NBD_CMD_FLAG_NO_HOLE (1 << 17) /* Do not punch a hole for WRITE_ZEROES */ /* These are client behavior specific flags. */ #define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on -- cgit v1.2.3 From 3f4c0ad490cc55db5b375a2e19c67159cb255795 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 26 Aug 2024 12:14:04 +0200 Subject: mtd: nand: Rename the NAND IO iteration helper Soon a helper for iterating over blocks will be needed (for continuous read purposes). In order to clarify the intend of this helper, let's rename it with the "page" wording inside. While at it, improve the doc and fix a typo. Signed-off-by: Miquel Raynal Reviewed-by: Pratyush Yadav Link: https://lore.kernel.org/linux-mtd/20240826101412.20644-2-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index b2996dc987ff..92e06ac33815 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -906,19 +906,19 @@ static inline void nanddev_pos_next_page(struct nand_device *nand, } /** - * nand_io_iter_init - Initialize a NAND I/O iterator + * nand_io_page_iter_init - Initialize a NAND I/O iterator * @nand: NAND device * @offs: absolute offset * @req: MTD request * @iter: NAND I/O iterator * * Initializes a NAND iterator based on the information passed by the MTD - * layer. + * layer for page jumps. */ -static inline void nanddev_io_iter_init(struct nand_device *nand, - enum nand_page_io_req_type reqtype, - loff_t offs, struct mtd_oob_ops *req, - struct nand_io_iter *iter) +static inline void nanddev_io_page_iter_init(struct nand_device *nand, + enum nand_page_io_req_type reqtype, + loff_t offs, struct mtd_oob_ops *req, + struct nand_io_iter *iter) { struct mtd_info *mtd = nanddev_to_mtd(nand); @@ -990,10 +990,10 @@ static inline bool nanddev_io_iter_end(struct nand_device *nand, * @req: MTD I/O request * @iter: NAND I/O iterator * - * Should be used for iterate over pages that are contained in an MTD request. + * Should be used for iterating over pages that are contained in an MTD request. */ #define nanddev_io_for_each_page(nand, type, start, req, iter) \ - for (nanddev_io_iter_init(nand, type, start, req, iter); \ + for (nanddev_io_page_iter_init(nand, type, start, req, iter); \ !nanddev_io_iter_end(nand, iter); \ nanddev_io_iter_next_page(nand, iter)) -- cgit v1.2.3 From 8adf1ac24ba8ee34b8fd36ebf159004ec472fca0 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 26 Aug 2024 12:14:05 +0200 Subject: mtd: nand: Introduce a block iterator In order to be able to iterate easily across eraseblocks rather than pages, let's introduce a block iterator inspired from the page iterator. The main usage of this iterator will be for continuous/sequential reads, where it is interesting to use a single request rather than split the requests in smaller chunks (ie. pages) that can be hardly optimized. So a "continuous" boolean get's added for this purpose. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240826101412.20644-3-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 74 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 92e06ac33815..1e4208040956 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -103,6 +103,8 @@ enum nand_page_io_req_type { * @ooblen: the number of OOB bytes to read from/write to this page * @oobbuf: buffer to store OOB data in or get OOB data from * @mode: one of the %MTD_OPS_XXX mode + * @continuous: no need to start over the operation at the end of each page, the + * NAND device will automatically prepare the next one * * This object is used to pass per-page I/O requests to NAND sub-layers. This * way all useful information are already formatted in a useful way and @@ -125,6 +127,7 @@ struct nand_page_io_req { void *in; } oobbuf; int mode; + bool continuous; }; const struct mtd_ooblayout_ops *nand_get_small_page_ooblayout(void); @@ -937,6 +940,43 @@ static inline void nanddev_io_page_iter_init(struct nand_device *nand, iter->req.ooblen = min_t(unsigned int, iter->oobbytes_per_page - iter->req.ooboffs, iter->oobleft); + iter->req.continuous = false; +} + +/** + * nand_io_block_iter_init - Initialize a NAND I/O iterator + * @nand: NAND device + * @offs: absolute offset + * @req: MTD request + * @iter: NAND I/O iterator + * + * Initializes a NAND iterator based on the information passed by the MTD + * layer for block jumps (no OOB) + * + * In practice only reads may leverage this iterator. + */ +static inline void nanddev_io_block_iter_init(struct nand_device *nand, + enum nand_page_io_req_type reqtype, + loff_t offs, struct mtd_oob_ops *req, + struct nand_io_iter *iter) +{ + unsigned int offs_in_eb; + + iter->req.type = reqtype; + iter->req.mode = req->mode; + iter->req.dataoffs = nanddev_offs_to_pos(nand, offs, &iter->req.pos); + iter->req.ooboffs = 0; + iter->oobbytes_per_page = 0; + iter->dataleft = req->len; + iter->oobleft = 0; + iter->req.databuf.in = req->datbuf; + offs_in_eb = (nand->memorg.pagesize * iter->req.pos.page) + iter->req.dataoffs; + iter->req.datalen = min_t(unsigned int, + nanddev_eraseblock_size(nand) - offs_in_eb, + iter->dataleft); + iter->req.oobbuf.in = NULL; + iter->req.ooblen = 0; + iter->req.continuous = true; } /** @@ -962,6 +1002,25 @@ static inline void nanddev_io_iter_next_page(struct nand_device *nand, iter->oobleft); } +/** + * nand_io_iter_next_block - Move to the next block + * @nand: NAND device + * @iter: NAND I/O iterator + * + * Updates the @iter to point to the next block. + * No OOB handling available. + */ +static inline void nanddev_io_iter_next_block(struct nand_device *nand, + struct nand_io_iter *iter) +{ + nanddev_pos_next_eraseblock(nand, &iter->req.pos); + iter->dataleft -= iter->req.datalen; + iter->req.databuf.in += iter->req.datalen; + iter->req.dataoffs = 0; + iter->req.datalen = min_t(unsigned int, nanddev_eraseblock_size(nand), + iter->dataleft); +} + /** * nand_io_iter_end - Should end iteration or not * @nand: NAND device @@ -997,6 +1056,21 @@ static inline bool nanddev_io_iter_end(struct nand_device *nand, !nanddev_io_iter_end(nand, iter); \ nanddev_io_iter_next_page(nand, iter)) +/** + * nand_io_for_each_block - Iterate over all NAND pages contained in an MTD I/O + * request, one block at a time + * @nand: NAND device + * @start: start address to read/write from + * @req: MTD I/O request + * @iter: NAND I/O iterator + * + * Should be used for iterating over blocks that are contained in an MTD request. + */ +#define nanddev_io_for_each_block(nand, type, start, req, iter) \ + for (nanddev_io_block_iter_init(nand, type, start, req, iter); \ + !nanddev_io_iter_end(nand, iter); \ + nanddev_io_iter_next_block(nand, iter)) + bool nanddev_isbad(struct nand_device *nand, const struct nand_pos *pos); bool nanddev_isreserved(struct nand_device *nand, const struct nand_pos *pos); int nanddev_markbad(struct nand_device *nand, const struct nand_pos *pos); -- cgit v1.2.3 From 631cfdd0520d19b7f4fc13b834fd9c8b46c6dbac Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 26 Aug 2024 12:14:07 +0200 Subject: mtd: spi-nand: Add continuous read support A regular page read consist in: - Asking one page of content from the NAND array to be loaded in the chip's SRAM, - Waiting for the operation to be done, - Retrieving the data (I/O phase) from the chip's SRAM. When reading several sequential pages, the above operation is repeated over and over. There is however a way to optimize these accesses, by enabling continuous reads. The feature requires the NAND chip to have a second internal SRAM area plus a bit of additional internal logic to trigger another internal transfer between the NAND array and the second SRAM area while the I/O phase is ongoing. Once the first I/O phase is done, the host can continue reading more data, continuously, as the chip will automatically switch to the second SRAM content (which has already been loaded) and in turns trigger the next load into the first SRAM area again. From an instruction perspective, the command op-codes are different, but the same cycles are required. The only difference is that after a continuous read (which is stopped by a CS deassert), the host must observe a delay of tRST. However, because there is no guarantee in Linux regarding the actual state of the CS pin after a transfer (in order to speed-up the next transfer if targeting the same device), it was necessary to manually end the continuous read with a configuration register write operation. Continuous reads have two main drawbacks: * They only work on full pages (column address ignored) * Only the main data area is pulled, out-of-band bytes are not accessible. Said otherwise, the feature can only be useful with on-die ECC engines. Performance wise, measures have been performed on a Zynq platform using Macronix SPI-NAND controller with a Macronix chip (based on the flash_speed tool modified for testing sequential reads): - 1-1-1 mode: performances improved from +3% (2-pages) up to +10% after a dozen pages. - 1-1-4 mode: performances improved from +15% (2-pages) up to +40% after a dozen pages. This series is based on a previous work from Macronix engineer Jaime Liao. Signed-off-by: Miquel Raynal Reviewed-by: Pratyush Yadav Link: https://lore.kernel.org/linux-mtd/20240826101412.20644-5-miquel.raynal@bootlin.com --- include/linux/mtd/spinand.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 5c19ead60499..14dce347dc46 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -336,6 +336,7 @@ struct spinand_ondie_ecc_conf { * @op_variants.update_cache: variants of the update-cache operation * @select_target: function used to select a target/die. Required only for * multi-die chips + * @set_cont_read: enable/disable continuous cached reads * * Each SPI NAND manufacturer driver should have a spinand_info table * describing all the chips supported by the driver. @@ -354,6 +355,8 @@ struct spinand_info { } op_variants; int (*select_target)(struct spinand_device *spinand, unsigned int target); + int (*set_cont_read)(struct spinand_device *spinand, + bool enable); }; #define SPINAND_ID(__method, ...) \ @@ -379,6 +382,9 @@ struct spinand_info { #define SPINAND_SELECT_TARGET(__func) \ .select_target = __func, +#define SPINAND_CONT_READ(__set_cont_read) \ + .set_cont_read = __set_cont_read, + #define SPINAND_INFO(__model, __id, __memorg, __eccreq, __op_variants, \ __flags, ...) \ { \ @@ -422,6 +428,12 @@ struct spinand_dirmap { * passed in spi_mem_op be DMA-able, so we can't based the bufs on * the stack * @manufacturer: SPI NAND manufacturer information + * @cont_read_possible: Field filled by the core once the whole system + * configuration is known to tell whether continuous reads are + * suitable to use or not in general with this chip/configuration. + * A per-transfer check must of course be done to ensure it is + * actually relevant to enable this feature. + * @set_cont_read: Enable/disable the continuous read feature * @priv: manufacturer private data */ struct spinand_device { @@ -451,6 +463,10 @@ struct spinand_device { u8 *scratchbuf; const struct spinand_manufacturer *manufacturer; void *priv; + + bool cont_read_possible; + int (*set_cont_read)(struct spinand_device *spinand, + bool enable); }; /** -- cgit v1.2.3 From a06f2e7cc4de104b14971e08d37f9d08c256b053 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 26 Aug 2024 12:14:08 +0200 Subject: mtd: spi-nand: Expose spinand_write_reg_op() This helper function will soon be used from a vendor driver, let's export it through the spinand.h header. No need for any export, as there is currently no reason for any module to need it. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240826101412.20644-6-miquel.raynal@bootlin.com --- include/linux/mtd/spinand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 14dce347dc46..8dbf67ca710a 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -533,6 +533,7 @@ int spinand_match_and_init(struct spinand_device *spinand, enum spinand_readid_method rdid_method); int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val); +int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val); int spinand_select_target(struct spinand_device *spinand, unsigned int target); #endif /* __LINUX_MTD_SPINAND_H */ -- cgit v1.2.3 From bb2ac59f8205165fce5aee9135bd1e2ea9b1a74b Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 16 Aug 2024 01:18:23 +0100 Subject: mfd: axp20x: AXP717: Add support for boost regulator The AXP717 also contains a boost regulator, to provide the 5V USB VBUS rail when running on battery. Add the registers to the MFD description to be able to use them from the regulator driver. Signed-off-by: Andre Przywara Reviewed-by: John Watts Acked-by: Lee Jones Reviewed-by: Chen-Yu Tsai Link: https://patch.msgid.link/20240816001824.6028-3-andre.przywara@arm.com Signed-off-by: Mark Brown --- include/linux/mfd/axp20x.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 8c0a33a2e9ce..3758f986491c 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -115,6 +115,8 @@ enum axp20x_variants { #define AXP313A_IRQ_STATE 0x21 #define AXP717_ON_INDICATE 0x00 +#define AXP717_MODULE_EN_CONTROL_2 0x19 +#define AXP717_BOOST_CONTROL 0x1e #define AXP717_IRQ0_EN 0x40 #define AXP717_IRQ1_EN 0x41 #define AXP717_IRQ2_EN 0x42 -- cgit v1.2.3 From 22dfe2ea1d63f15d8b9661e7690ac0a03159db6a Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 16 Aug 2024 01:18:24 +0100 Subject: regulator: axp20x: AXP717: Add boost regulator The AXP717 also contains an adjustable boost regulator, to provide the 5V USB VBUS rail when running on battery. Add the regulator description that states the voltage range this regulator can cover. Signed-off-by: Andre Przywara Reviewed-by: John Watts Reviewed-by: Chen-Yu Tsai Link: https://patch.msgid.link/20240816001824.6028-4-andre.przywara@arm.com Signed-off-by: Mark Brown --- include/linux/mfd/axp20x.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 3758f986491c..e0cd66bd3b6d 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -486,6 +486,7 @@ enum { AXP717_CLDO3, AXP717_CLDO4, AXP717_CPUSLDO, + AXP717_BOOST, AXP717_REG_ID_MAX, }; -- cgit v1.2.3 From 663b0f1e141dc60ce6c09ae6afc5f213b22d13ca Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 16 Feb 2024 11:00:10 -0500 Subject: drm/amdkfd: Document and define SVM events message macro Document how to use SMI system management interface to enable and receive SVM events. Document SVM event triggers. Define SVM events message string format macro that could be used by user mode for sscanf to parse the event. Add it to uAPI header file to make it obvious that is changing uAPI in future. No functional changes. Signed-off-by: Philip Yang Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 100 +++++++++++++++++++++++++++++++++++------ 1 file changed, 87 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 71a7ce5f2d4c..717307d6b5b7 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -540,26 +540,29 @@ enum kfd_smi_event { KFD_SMI_EVENT_ALL_PROCESS = 64 }; +/* The reason of the page migration event */ enum KFD_MIGRATE_TRIGGERS { - KFD_MIGRATE_TRIGGER_PREFETCH, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, - KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, - KFD_MIGRATE_TRIGGER_TTM_EVICTION + KFD_MIGRATE_TRIGGER_PREFETCH, /* Prefetch to GPU VRAM or system memory */ + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, /* GPU page fault recover */ + KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, /* CPU page fault recover */ + KFD_MIGRATE_TRIGGER_TTM_EVICTION /* TTM eviction */ }; +/* The reason of user queue evition event */ enum KFD_QUEUE_EVICTION_TRIGGERS { - KFD_QUEUE_EVICTION_TRIGGER_SVM, - KFD_QUEUE_EVICTION_TRIGGER_USERPTR, - KFD_QUEUE_EVICTION_TRIGGER_TTM, - KFD_QUEUE_EVICTION_TRIGGER_SUSPEND, - KFD_QUEUE_EVICTION_CRIU_CHECKPOINT, - KFD_QUEUE_EVICTION_CRIU_RESTORE + KFD_QUEUE_EVICTION_TRIGGER_SVM, /* SVM buffer migration */ + KFD_QUEUE_EVICTION_TRIGGER_USERPTR, /* userptr movement */ + KFD_QUEUE_EVICTION_TRIGGER_TTM, /* TTM move buffer */ + KFD_QUEUE_EVICTION_TRIGGER_SUSPEND, /* GPU suspend */ + KFD_QUEUE_EVICTION_CRIU_CHECKPOINT, /* CRIU checkpoint */ + KFD_QUEUE_EVICTION_CRIU_RESTORE /* CRIU restore */ }; +/* The reason of unmap buffer from GPU event */ enum KFD_SVM_UNMAP_TRIGGERS { - KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY, - KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE, - KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU + KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY, /* MMU notifier CPU buffer movement */ + KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,/* MMU notifier page migration */ + KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU /* Unmap to free the buffer */ }; #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) @@ -570,6 +573,77 @@ struct kfd_ioctl_smi_events_args { __u32 anon_fd; /* from KFD */ }; +/* + * SVM event tracing via SMI system management interface + * + * Open event file descriptor + * use ioctl AMDKFD_IOC_SMI_EVENTS, pass in gpuid and return a anonymous file + * descriptor to receive SMI events. + * If calling with sudo permission, then file descriptor can be used to receive + * SVM events from all processes, otherwise, to only receive SVM events of same + * process. + * + * To enable the SVM event + * Write event file descriptor with KFD_SMI_EVENT_MASK_FROM_INDEX(event) bitmap + * mask to start record the event to the kfifo, use bitmap mask combination + * for multiple events. New event mask will overwrite the previous event mask. + * KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS) bit requires sudo + * permisson to receive SVM events from all process. + * + * To receive the event + * Application can poll file descriptor to wait for the events, then read event + * from the file into a buffer. Each event is one line string message, starting + * with the event id, then the event specific information. + * + * To decode event information + * The following event format string macro can be used with sscanf to decode + * the specific event information. + * event triggers: the reason to generate the event, defined as enum for unmap, + * eviction and migrate events. + * node, from, to, prefetch_loc, preferred_loc: GPU ID, or 0 for system memory. + * addr: user mode address, in pages + * size: in pages + * pid: the process ID to generate the event + * ns: timestamp in nanosecond-resolution, starts at system boot time but + * stops during suspend + * migrate_update: GPU page fault is recovered by 'M' for migrate, 'U' for update + * rw: 'W' for write page fault, 'R' for read page fault + * rescheduled: 'R' if the queue restore failed and rescheduled to try again + */ +#define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\ + "%x %s\n", (reset_seq_num), (reset_cause) + +#define KFD_EVENT_FMT_THERMAL_THROTTLING(bitmask, counter)\ + "%llx:%llx\n", (bitmask), (counter) + +#define KFD_EVENT_FMT_VMFAULT(pid, task_name)\ + "%x:%s\n", (pid), (task_name) + +#define KFD_EVENT_FMT_PAGEFAULT_START(ns, pid, addr, node, rw)\ + "%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (rw) + +#define KFD_EVENT_FMT_PAGEFAULT_END(ns, pid, addr, node, migrate_update)\ + "%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (migrate_update) + +#define KFD_EVENT_FMT_MIGRATE_START(ns, pid, start, size, from, to, prefetch_loc,\ + preferred_loc, migrate_trigger)\ + "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", (ns), (pid), (start), (size),\ + (from), (to), (prefetch_loc), (preferred_loc), (migrate_trigger) + +#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger)\ + "%lld -%d @%lx(%lx) %x->%x %d\n", (ns), (pid), (start), (size),\ + (from), (to), (migrate_trigger) + +#define KFD_EVENT_FMT_QUEUE_EVICTION(ns, pid, node, evict_trigger)\ + "%lld -%d %x %d\n", (ns), (pid), (node), (evict_trigger) + +#define KFD_EVENT_FMT_QUEUE_RESTORE(ns, pid, node, rescheduled)\ + "%lld -%d %x %c\n", (ns), (pid), (node), (rescheduled) + +#define KFD_EVENT_FMT_UNMAP_FROM_GPU(ns, pid, addr, size, node, unmap_trigger)\ + "%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\ + (node), (unmap_trigger) + /************************************************************************************************** * CRIU IOCTLs (Checkpoint Restore In Userspace) * -- cgit v1.2.3 From 78f76b09c915d7281317d8e082c4c02f325a0366 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 17 Jul 2024 17:48:16 +0900 Subject: ata: libata: Move sata_std_hardreset() definition to libata-sata.c Unlike ata_std_prereset() and ata_std_postreset(), the function sata_std_hardreset() applies only to SATA devices, as its name implies. So move its definition to libata-sata.c. Together with this, also move the definition of sata_port_ops to libata-sata.c, where it belongs. Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Reviewed-by: Hannes Reinecke --- include/linux/libata.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 6552e90753ae..d52ae7723c05 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1104,8 +1104,6 @@ static inline bool ata_port_is_frozen(const struct ata_port *ap) extern int ata_std_prereset(struct ata_link *link, unsigned long deadline); extern int ata_wait_after_reset(struct ata_link *link, unsigned long deadline, int (*check_ready)(struct ata_link *link)); -extern int sata_std_hardreset(struct ata_link *link, unsigned int *class, - unsigned long deadline); extern void ata_std_postreset(struct ata_link *link, unsigned int *classes); extern struct ata_host *ata_host_alloc(struct device *dev, int n_ports); @@ -1229,6 +1227,8 @@ extern int sata_scr_read(struct ata_link *link, int reg, u32 *val); extern int sata_scr_write(struct ata_link *link, int reg, u32 val); extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); extern int sata_set_spd(struct ata_link *link); +int sata_std_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline); extern int sata_link_hardreset(struct ata_link *link, const unsigned int *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)); @@ -1256,6 +1256,11 @@ static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val) return -EOPNOTSUPP; } static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; } +static inline int sata_std_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline) +{ + return -EOPNOTSUPP; +} static inline int sata_link_hardreset(struct ata_link *link, const unsigned int *timing, unsigned long deadline, -- cgit v1.2.3 From 10e807637f288f9963340a30ae8da9bb06beca3c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 10 Jun 2024 19:11:31 +0900 Subject: ata: libata: Rename ata_eh_read_sense_success_ncq_log() The function ata_eh_read_sense_success_ncq_log() does more that just reading the sense data for successful NCQ commands log page as it also sets the sense data for all commands listed in the log page. Rename this function to ata_eh_get_ncq_success_sense() to better describe what the function does. Furthermore, since this function is only called from ata_eh_get_success_sense() in libata-eh.c, there is no need to export it and its declaration can be moved to drivers/ata/libata.h. To be consistent with this change, the function ata_eh_read_sense_success_non_ncq() is also renamed to ata_eh_get_non_ncq_success_sense(). Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Reviewed-by: Hannes Reinecke --- include/linux/libata.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index d52ae7723c05..55a6b57742bc 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1234,7 +1234,6 @@ extern int sata_link_hardreset(struct ata_link *link, bool *online, int (*check_ready)(struct ata_link *)); extern int sata_link_resume(struct ata_link *link, const unsigned int *params, unsigned long deadline); -extern int ata_eh_read_sense_success_ncq_log(struct ata_link *link); extern void ata_eh_analyze_ncq_error(struct ata_link *link); #else static inline const unsigned int * @@ -1277,10 +1276,6 @@ static inline int sata_link_resume(struct ata_link *link, { return -EOPNOTSUPP; } -static inline int ata_eh_read_sense_success_ncq_log(struct ata_link *link) -{ - return -EOPNOTSUPP; -} static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { } #endif extern int sata_link_debounce(struct ata_link *link, -- cgit v1.2.3 From da65bbdd3bc1e8d2193e01167a413d90d9988c04 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 28 Aug 2024 11:07:43 +0900 Subject: ata: libata: Move sector_buf from struct ata_port to struct ata_device The 512B buffer sector_buf field of struct ata_port is used for scanning devices as well as during error recovery with ata EH. This buffer is thus useless if a port does not have a device connected to it. And also given that commands using this buffer are issued to devices, and not to ports, move this buffer definition from struct ata_port to struct ata_device. This change slightly increases system memory usage for systems using a port-multiplier as in that case we do not need a per-device buffer for scanning devices (PMP does not allow parallel scanning) nor for EH (as when entering EH we are guaranteed that all commands to all devices connected to the PMP have completed or have been aborted). However, this change reduces memory usage on systems that have many ports with only few devices rives connected, which is a much more common use case than the PMP use case. Suggested-by: Niklas Cassel Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Niklas Cassel --- include/linux/libata.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 55a6b57742bc..aac38dcd2230 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -769,6 +769,9 @@ struct ata_device { int spdn_cnt; /* ering is CLEAR_END, read comment above CLEAR_END */ struct ata_ering ering; + + /* For EH */ + u8 sector_buf[ATA_SECT_SIZE] ____cacheline_aligned; }; /* Fields between ATA_DEVICE_CLEAR_BEGIN and ATA_DEVICE_CLEAR_END are @@ -916,7 +919,6 @@ struct ata_port { #endif /* owned by EH */ u8 *ncq_sense_buf; - u8 sector_buf[ATA_SECT_SIZE] ____cacheline_aligned; }; /* The following initializer overrides a method to NULL whether one of -- cgit v1.2.3 From 602bcf212637633d537ee74bf39c6bc5722efb9b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 17 Jul 2024 17:55:31 +0900 Subject: ata: libata: Improve CDL resource management The ncq_sense_buf buffer field of struct ata_port is allocated and used only for devices that support the Command Duration Limits (CDL) feature. However, the cdl buffer of struct ata_device, which is used to cache the command duration limits log page for devices supporting CDL is always allocated as part of struct ata_device, which is wasteful of memory for devices that do not support this feature. Clean this up by defining both buffers as part of the new ata_cdl structure and allocating this structure only for devices that support the CDL feature. This new structure is attached to struct ata_device using the cdl pointer. The functions ata_dev_init_cdl_resources() and ata_dev_cleanup_cdl_resources() are defined to manage this new structure allocation, initialization and freeing when a port is removed or a device disabled. ata_dev_init_cdl_resources() is called from ata_dev_config_cdl() only for devices that support CDL. ata_dev_cleanup_cdl_resources() is called from ata_dev_free_resources() to free the ata_cdl structure when a device is being disabled by EH. Note that the name of the former cdl log buffer of struct ata_device is changed to desc_log_buf to make it clearer that it is a buffer for the limit descriptors log page. This change reduces the size of struct ata_device, thus reducing memory usage for ATA devices that do not support the CDL feature. Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel --- include/linux/libata.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index aac38dcd2230..9b4a6ff03235 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -700,6 +700,21 @@ struct ata_cpr_log { struct ata_cpr cpr[] __counted_by(nr_cpr); }; +struct ata_cdl { + /* + * Buffer to cache the CDL log page 18h (command duration descriptors) + * for SCSI-ATA translation. + */ + u8 desc_log_buf[ATA_LOG_CDL_SIZE]; + + /* + * Buffer to handle reading the sense data for successful NCQ Commands + * log page for commands using a CDL with one of the limits policy set + * to 0xD (successful completion with sense data available bit set). + */ + u8 ncq_sense_log_buf[ATA_LOG_SENSE_NCQ_SIZE]; +}; + struct ata_device { struct ata_link *link; unsigned int devno; /* 0 or 1 */ @@ -762,8 +777,8 @@ struct ata_device { /* Concurrent positioning ranges */ struct ata_cpr_log *cpr_log; - /* Command Duration Limits log support */ - u8 cdl[ATA_LOG_CDL_SIZE]; + /* Command Duration Limits support */ + struct ata_cdl *cdl; /* error history */ int spdn_cnt; @@ -917,8 +932,6 @@ struct ata_port { #ifdef CONFIG_ATA_ACPI struct ata_acpi_gtm __acpi_init_gtm; /* use ata_acpi_init_gtm() */ #endif - /* owned by EH */ - u8 *ncq_sense_buf; }; /* The following initializer overrides a method to NULL whether one of -- cgit v1.2.3 From b12891c7b6d2c29a07b2da5589ee469d7bf37254 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 6 Sep 2024 11:34:16 +0200 Subject: ALSA: IEC958 definition for consumer status channel update Add 128kHz, 352.4kHz, 384kHz and 705.6kHz. These definitions have been found working on eARC using a Murideo Seven Generator. Signed-off-by: Jerome Brunet Link: https://patch.msgid.link/20240906093422.2976550-1-jbrunet@baylibre.com Signed-off-by: Takashi Iwai --- include/sound/asoundef.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/asoundef.h b/include/sound/asoundef.h index 9fdeac19dadb..09b2c3dffb30 100644 --- a/include/sound/asoundef.h +++ b/include/sound/asoundef.h @@ -110,18 +110,22 @@ #define IEC958_AES2_CON_SOURCE_UNSPEC (0<<0) /* unspecified */ #define IEC958_AES2_CON_CHANNEL (15<<4) /* mask - channel number */ #define IEC958_AES2_CON_CHANNEL_UNSPEC (0<<4) /* unspecified */ -#define IEC958_AES3_CON_FS (15<<0) /* mask - sample frequency */ +#define IEC958_AES3_CON_FS ((1<<7) | (15<<0)) /* mask - sample frequency */ #define IEC958_AES3_CON_FS_44100 (0<<0) /* 44.1kHz */ #define IEC958_AES3_CON_FS_NOTID (1<<0) /* non indicated */ #define IEC958_AES3_CON_FS_48000 (2<<0) /* 48kHz */ #define IEC958_AES3_CON_FS_32000 (3<<0) /* 32kHz */ #define IEC958_AES3_CON_FS_22050 (4<<0) /* 22.05kHz */ +#define IEC958_AES3_CON_FS_384000 (5<<0) /* 384kHz */ #define IEC958_AES3_CON_FS_24000 (6<<0) /* 24kHz */ #define IEC958_AES3_CON_FS_88200 (8<<0) /* 88.2kHz */ #define IEC958_AES3_CON_FS_768000 (9<<0) /* 768kHz */ #define IEC958_AES3_CON_FS_96000 (10<<0) /* 96kHz */ #define IEC958_AES3_CON_FS_176400 (12<<0) /* 176.4kHz */ +#define IEC958_AES3_CON_FS_352400 (13<<0) /* 352.4kHz */ #define IEC958_AES3_CON_FS_192000 (14<<0) /* 192kHz */ +#define IEC958_AES3_CON_FS_128000 ((1<<7) | (11<<0)) /* 128kHz */ +#define IEC958_AES3_CON_FS_705600 ((1<<7) | (13<<0)) /* 705.6kHz */ #define IEC958_AES3_CON_CLOCK (3<<4) /* mask - clock accuracy */ #define IEC958_AES3_CON_CLOCK_1000PPM (0<<4) /* 1000 ppm */ #define IEC958_AES3_CON_CLOCK_50PPM (1<<4) /* 50 ppm */ -- cgit v1.2.3 From 446216bd8e5d4a3ffc9738f6adffc98fbfdcc582 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 8 Sep 2024 13:05:48 +0900 Subject: firewire: core: expose kernel API to schedule work item to process isochronous context In packet-per-buffer mode for isochronous context of 1394 OHCI, software can schedule hardIRQ to the buffer in which the content of isochronous packet is processed. The actual behaviour is different between isochronous receive (IR) and transmit (IT) contexts in respect to isochronous cycle in which the hardIRQ occurs. In IR context, the hardIRQ occurs when the buffer is filled actually by the content of received packet. If there are any isochronous cycles in which the packet transmission is skipped, it is postponed to generate the hardIRQ in respect to the isochronous cycle. In IT context, software can schedule the content of packet every isochronous cycle including skipping, therefore the hardIRQ occurs in the isochronous cycle to which the software scheduled. ALSA firewire stack uses the packet-per-buffer mode for both IR/IT contexts. To process time stamp per packet (or per sample in some cases) steadily for media clock recovery against unexpected transmission skips, it uses an IT context to operate all of isochronous contexts by calls of fw_iso_context_flush_completions() in the bottom-half of hardIRQ for the IT context. Although it looks well to handle all of isochronous contexts in a single bottom-half context, it relatively takes longer time. In the future code integration (not yet), it is possible to apply parallelism method to process these context. In the case, it is useful to allow unit drivers to schedule work items to process these isochronous contexts. As a preparation, this commit exposes fw_iso_context_schedule_flush_completions() as a kernel API available by unit drivers. It is renamed from fw_iso_context_queue_work() since it is a counter part of fw_iso_context_flush_completions(). Link: https://lore.kernel.org/r/20240908040549.75304-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 72f497b61739..f815d12deda0 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -531,6 +531,23 @@ int fw_iso_context_queue(struct fw_iso_context *ctx, unsigned long payload); void fw_iso_context_queue_flush(struct fw_iso_context *ctx); int fw_iso_context_flush_completions(struct fw_iso_context *ctx); + +/** + * fw_iso_context_schedule_flush_completions() - schedule work item to process isochronous context. + * @ctx: the isochronous context + * + * Schedule a work item on workqueue to process the isochronous context. The registered callback + * function is called in the worker if some packets have been already transferred since the last + * time. If it is required to process the context in the current context, + * fw_iso_context_flush_completions() is available instead. + * + * Context: Any context. + */ +static inline void fw_iso_context_schedule_flush_completions(struct fw_iso_context *ctx) +{ + queue_work(ctx->card->isoc_wq, &ctx->work); +} + int fw_iso_context_start(struct fw_iso_context *ctx, int cycle, int sync, int tags); int fw_iso_context_stop(struct fw_iso_context *ctx); -- cgit v1.2.3 From 1d07085402d122f223bda3f8b72bea37a46ee0c9 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 7 Sep 2024 16:27:20 +0800 Subject: smp: Mark smp_prepare_boot_cpu() __init smp_prepare_boot_cpu() is only called during boot, hence mark it as __init. Signed-off-by: Bibo Mao Signed-off-by: Thomas Gleixner Reviewed-by: Huacai Chen Link: https://lore.kernel.org/all/20240907082720.452148-1-maobibo@loongson.cn --- include/linux/smp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index fcd61dfe2af3..6a0813c905d0 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -109,7 +109,7 @@ static inline void on_each_cpu_cond(smp_cond_func_t cond_func, * Architecture specific boot CPU setup. Defined as empty weak function in * init/main.c. Architectures can override it. */ -void smp_prepare_boot_cpu(void); +void __init smp_prepare_boot_cpu(void); #ifdef CONFIG_SMP -- cgit v1.2.3 From c259acab839e57eab0318f32da4ae803a8d59397 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Wed, 4 Sep 2024 07:13:05 -0700 Subject: ptp/ioctl: support MONOTONIC{,_RAW} timestamps for PTP_SYS_OFFSET_EXTENDED The ability to read the PHC (Physical Hardware Clock) alongside multiple system clocks is currently dependent on the specific hardware architecture. This limitation restricts the use of PTP_SYS_OFFSET_PRECISE to certain hardware configurations. The generic soultion which would work across all architectures is to read the PHC along with the latency to perform PHC-read as offered by PTP_SYS_OFFSET_EXTENDED which provides pre and post timestamps. However, these timestamps are currently limited to the CLOCK_REALTIME timebase. Since CLOCK_REALTIME is affected by NTP (or similar time synchronization services), it can experience significant jumps forward or backward. This hinders the precise latency measurements that PTP_SYS_OFFSET_EXTENDED is designed to provide. This problem could be addressed by supporting MONOTONIC_RAW timestamps within PTP_SYS_OFFSET_EXTENDED. Unlike CLOCK_REALTIME or CLOCK_MONOTONIC, the MONOTONIC_RAW timebase is unaffected by NTP adjustments. This enhancement can be implemented by utilizing one of the three reserved words within the PTP_SYS_OFFSET_EXTENDED struct to pass the clock-id for timestamps. The current behavior aligns with clock-id for CLOCK_REALTIME timebase (value of 0), ensuring backward compatibility of the UAPI. Signed-off-by: Mahesh Bandewar Signed-off-by: Vadim Fedorenko Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 36 ++++++++++++++++++++++++++++++++---- include/uapi/linux/ptp_clock.h | 24 ++++++++++++++++++------ 2 files changed, 50 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 6e4b8206c7d0..c892d22ce0a7 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -47,10 +47,12 @@ struct system_device_crosststamp; * struct ptp_system_timestamp - system time corresponding to a PHC timestamp * @pre_ts: system timestamp before capturing PHC * @post_ts: system timestamp after capturing PHC + * @clockid: clock-base used for capturing the system timestamps */ struct ptp_system_timestamp { struct timespec64 pre_ts; struct timespec64 post_ts; + clockid_t clockid; }; /** @@ -457,14 +459,40 @@ static inline ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, static inline void ptp_read_system_prets(struct ptp_system_timestamp *sts) { - if (sts) - ktime_get_real_ts64(&sts->pre_ts); + if (sts) { + switch (sts->clockid) { + case CLOCK_REALTIME: + ktime_get_real_ts64(&sts->pre_ts); + break; + case CLOCK_MONOTONIC: + ktime_get_ts64(&sts->pre_ts); + break; + case CLOCK_MONOTONIC_RAW: + ktime_get_raw_ts64(&sts->pre_ts); + break; + default: + break; + } + } } static inline void ptp_read_system_postts(struct ptp_system_timestamp *sts) { - if (sts) - ktime_get_real_ts64(&sts->post_ts); + if (sts) { + switch (sts->clockid) { + case CLOCK_REALTIME: + ktime_get_real_ts64(&sts->post_ts); + break; + case CLOCK_MONOTONIC: + ktime_get_ts64(&sts->post_ts); + break; + case CLOCK_MONOTONIC_RAW: + ktime_get_raw_ts64(&sts->post_ts); + break; + default: + break; + } + } } #endif diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 053b40d642de..18eefa6d93d6 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -155,13 +155,25 @@ struct ptp_sys_offset { struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1]; }; +/* + * ptp_sys_offset_extended - data structure for IOCTL operation + * PTP_SYS_OFFSET_EXTENDED + * + * @n_samples: Desired number of measurements. + * @clockid: clockid of a clock-base used for pre/post timestamps. + * @rsv: Reserved for future use. + * @ts: Array of samples in the form [pre-TS, PHC, post-TS]. The + * kernel provides @n_samples. + * + * Starting from kernel 6.12 and onwards, the first word of the reserved-field + * is used for @clockid. That's backward compatible since previous kernel + * expect all three reserved words (@rsv[3]) to be 0 while the clockid (first + * word in the new structure) for CLOCK_REALTIME is '0'. + */ struct ptp_sys_offset_extended { - unsigned int n_samples; /* Desired number of measurements. */ - unsigned int rsv[3]; /* Reserved for future use. */ - /* - * Array of [system, phc, system] time stamps. The kernel will provide - * 3*n_samples time stamps. - */ + unsigned int n_samples; + __kernel_clockid_t clockid; + unsigned int rsv[2]; struct ptp_clock_time ts[PTP_MAX_SAMPLES][3]; }; -- cgit v1.2.3 From 1fcb932c8b5ce86219d7dedcd63659351a43291c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 4 Jul 2024 00:56:40 +0200 Subject: rcu/nocb: Simplify (de-)offloading state machine Now that the (de-)offloading process can only apply to offline CPUs, there is no more concurrency between rcu_core and nocb kthreads. Also the mutation now happens on empty queues. Therefore the state machine can be reduced to a single bit called SEGCBLIST_OFFLOADED. Simplify the transition as follows: * Upon offloading: queue the rdp to be added to the rcuog list and wait for the rcuog kthread to set the SEGCBLIST_OFFLOADED bit. Unpark rcuo kthread. * Upon de-offloading: Park rcuo kthread. Queue the rdp to be removed from the rcuog list and wait for the rcuog kthread to clear the SEGCBLIST_OFFLOADED bit. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Reviewed-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay --- include/linux/rcu_segcblist.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 1ef1bb54853d..2fdc2208f1ca 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -185,9 +185,7 @@ struct rcu_cblist { * ---------------------------------------------------------------------------- */ #define SEGCBLIST_ENABLED BIT(0) -#define SEGCBLIST_LOCKING BIT(1) -#define SEGCBLIST_KTHREAD_GP BIT(2) -#define SEGCBLIST_OFFLOADED BIT(3) +#define SEGCBLIST_OFFLOADED BIT(1) struct rcu_segcblist { struct rcu_head *head; -- cgit v1.2.3 From bd7c8ff9fef4b21a97f9b30a7364845ee6eaaf23 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Wed, 4 Sep 2024 15:04:53 +0200 Subject: treewide: Fix wrong singular form of jiffies in comments There are several comments all over the place, which uses a wrong singular form of jiffies. Replace 'jiffie' by 'jiffy'. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: Geert Uytterhoeven # m68k Link: https://lore.kernel.org/all/20240904-devel-anna-maria-b4-timers-flseep-v1-3-e98760256370@linutronix.de --- include/linux/jiffies.h | 2 +- include/linux/timekeeper_internal.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index d9f1435a5a13..1220f0fbe5bf 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -418,7 +418,7 @@ extern unsigned long preset_lpj; #define NSEC_CONVERSION ((unsigned long)((((u64)1 << NSEC_JIFFIE_SC) +\ TICK_NSEC -1) / (u64)TICK_NSEC)) /* - * The maximum jiffie value is (MAX_INT >> 1). Here we translate that + * The maximum jiffy value is (MAX_INT >> 1). Here we translate that * into seconds. The 64-bit case will overflow if we are not careful, * so use the messy SH_DIV macro to do it. Still all constants. */ diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 84ff2844df2a..902c20ef495a 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -73,7 +73,7 @@ struct tk_read_base { * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) * * Note: For timespec(64) based interfaces wall_to_monotonic is what - * we need to add to xtime (or xtime corrected for sub jiffie times) + * we need to add to xtime (or xtime corrected for sub jiffy times) * to get to monotonic time. Monotonic is pegged at zero at system * boot time, so wall_to_monotonic will be negative, however, we will * ALWAYS keep the tv_nsec part positive so we can use the usual -- cgit v1.2.3 From 4e1b5586051f0e9aef9b0ca375ef2cd1a8987e0c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 9 Sep 2024 11:03:08 +0200 Subject: wifi: cfg80211: fix kernel-doc for per-link data There cannot be brackets in kernel-doc, remove them. Reported-by: Stephen Rothwell Fixes: 62c16f219a73 ("wifi: cfg80211: move DFS related members to links[] in wireless_dev") Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index efdea667cb92..69ec1eb41a09 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6217,11 +6217,11 @@ enum ieee80211_ap_reg_power { * unprotected beacon report * @links: array of %IEEE80211_MLD_MAX_NUM_LINKS elements containing @addr * @ap and @client for each link - * @links[].cac_started: true if DFS channel availability check has been + * @links.cac_started: true if DFS channel availability check has been * started - * @links[].cac_start_time: timestamp (jiffies) when the dfs state was + * @links.cac_start_time: timestamp (jiffies) when the dfs state was * entered. - * @links[].cac_time_ms: CAC time in ms + * @links.cac_time_ms: CAC time in ms * @valid_links: bitmap describing what elements of @links are valid */ struct wireless_dev { -- cgit v1.2.3 From ca229bdbef29be207c8666f106acc3bf50736c05 Mon Sep 17 00:00:00 2001 From: Cheng Ming Lin Date: Mon, 9 Sep 2024 17:26:42 +0800 Subject: mtd: spinand: Add support for setting plane select bits Add two flags for inserting the Plane Select bit into the column address during the write_to_cache and the read_from_cache operation. Add the SPINAND_HAS_PROG_PLANE_SELECT_BIT flag for serial NAND flash that require inserting the Plane Select bit into the column address during the write_to_cache operation. Add the SPINAND_HAS_READ_PLANE_SELECT_BIT flag for serial NAND flash that require inserting the Plane Select bit into the column address during the read_from_cache operation. Signed-off-by: Cheng Ming Lin Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240909092643.2434479-2-linchengming884@gmail.com --- include/linux/mtd/spinand.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 8dbf67ca710a..702e5fb13dae 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -312,6 +312,8 @@ struct spinand_ecc_info { #define SPINAND_HAS_QE_BIT BIT(0) #define SPINAND_HAS_CR_FEAT_BIT BIT(1) +#define SPINAND_HAS_PROG_PLANE_SELECT_BIT BIT(2) +#define SPINAND_HAS_READ_PLANE_SELECT_BIT BIT(3) /** * struct spinand_ondie_ecc_conf - private SPI-NAND on-die ECC engine structure -- cgit v1.2.3 From d688d65a847ff910146eff51e70f4b7049895eb5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:49 +0200 Subject: fs: add generic_llseek_cookie() This is similar to generic_file_llseek() but allows the caller to specify a cookie that will be updated to indicate that a seek happened. Caller's requiring that information in their readdir implementations can use that. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-8-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 58c91a52cad1..3e6b3c1afb31 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3202,6 +3202,8 @@ extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof); +loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence, + u64 *cookie); extern loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size); extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t); -- cgit v1.2.3 From 88d2ae0e6eb84e1ed58f339c6a0de16c24fa2a60 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 8 Aug 2024 11:18:21 -0400 Subject: inode: make __iget() a static inline bcachefs is switching to an rhashtable for vfs inodes instead of the standard inode.c hashtable, so we need this exported, or - a static inline makes more sense for a single atomic_inc(). Signed-off-by: Kent Overstreet --- include/linux/fs.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index fd34b5755c0b..8fc4bad3b6ae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3094,7 +3094,14 @@ static inline bool is_zero_ino(ino_t ino) return (u32)ino == 0; } -extern void __iget(struct inode * inode); +/* + * inode->i_lock must be held + */ +static inline void __iget(struct inode *inode) +{ + atomic_inc(&inode->i_count); +} + extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); extern void __destroy_inode(struct inode *); -- cgit v1.2.3 From f6594633817374a64a5fb31007bd810cad1425ff Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Jun 2024 19:00:33 -0400 Subject: lib/generic-radix-tree.c: genradix_ptr_inlined() Provide an inlined fast path Signed-off-by: Kent Overstreet --- include/linux/generic-radix-tree.h | 75 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) (limited to 'include') diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h index f3512fddf3d7..8a3e1e886d1c 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -48,6 +48,49 @@ struct genradix_root; #define GENRADIX_NODE_SHIFT 9 #define GENRADIX_NODE_SIZE (1U << GENRADIX_NODE_SHIFT) +#define GENRADIX_ARY (GENRADIX_NODE_SIZE / sizeof(struct genradix_node *)) +#define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY) + +/* depth that's needed for a genradix that can address up to ULONG_MAX: */ +#define GENRADIX_MAX_DEPTH \ + DIV_ROUND_UP(BITS_PER_LONG - GENRADIX_NODE_SHIFT, GENRADIX_ARY_SHIFT) + +#define GENRADIX_DEPTH_MASK \ + ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1)) + +static inline int genradix_depth_shift(unsigned depth) +{ + return GENRADIX_NODE_SHIFT + GENRADIX_ARY_SHIFT * depth; +} + +/* + * Returns size (of data, in bytes) that a tree of a given depth holds: + */ +static inline size_t genradix_depth_size(unsigned depth) +{ + return 1UL << genradix_depth_shift(depth); +} + +static inline unsigned genradix_root_to_depth(struct genradix_root *r) +{ + return (unsigned long) r & GENRADIX_DEPTH_MASK; +} + +static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r) +{ + return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK); +} + +struct genradix_node { + union { + /* Interior node: */ + struct genradix_node *children[GENRADIX_ARY]; + + /* Leaf: */ + u8 data[GENRADIX_NODE_SIZE]; + }; +}; + struct __genradix { struct genradix_root *root; }; @@ -128,6 +171,30 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size) #define __genradix_idx_to_offset(_radix, _idx) \ __idx_to_offset(_idx, __genradix_obj_size(_radix)) +static inline void *__genradix_ptr_inlined(struct __genradix *radix, size_t offset) +{ + struct genradix_root *r = READ_ONCE(radix->root); + struct genradix_node *n = genradix_root_to_node(r); + unsigned level = genradix_root_to_depth(r); + unsigned shift = genradix_depth_shift(level); + + if (unlikely(ilog2(offset) >= genradix_depth_shift(level))) + return NULL; + + while (n && shift > GENRADIX_NODE_SHIFT) { + shift -= GENRADIX_ARY_SHIFT; + n = n->children[offset >> shift]; + offset &= (1UL << shift) - 1; + } + + return n ? &n->data[offset] : NULL; +} + +#define genradix_ptr_inlined(_radix, _idx) \ + (__genradix_cast(_radix) \ + __genradix_ptr_inlined(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx))) + void *__genradix_ptr(struct __genradix *, size_t); /** @@ -144,6 +211,14 @@ void *__genradix_ptr(struct __genradix *, size_t); void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t); +#define genradix_ptr_alloc_inlined(_radix, _idx, _gfp) \ + (__genradix_cast(_radix) \ + (__genradix_ptr_inlined(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx)) ?: \ + __genradix_ptr_alloc(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx), \ + _gfp))) + /** * genradix_ptr_alloc - get a pointer to a genradix entry, allocating it * if necessary -- cgit v1.2.3 From b3f9da79e7783ca4f1c1d4214af976c548629c1d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Aug 2024 23:14:44 -0400 Subject: lib/generic-radix-tree.c: add preallocation Signed-off-by: Kent Overstreet --- include/linux/generic-radix-tree.h | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h index 8a3e1e886d1c..5b51c3d582d6 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -41,6 +41,7 @@ #include #include #include +#include #include struct genradix_root; @@ -81,6 +82,10 @@ static inline struct genradix_node *genradix_root_to_node(struct genradix_root * return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK); } +struct __genradix { + struct genradix_root *root; +}; + struct genradix_node { union { /* Interior node: */ @@ -91,9 +96,15 @@ struct genradix_node { }; }; -struct __genradix { - struct genradix_root *root; -}; +static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask) +{ + return kzalloc(GENRADIX_NODE_SIZE, gfp_mask); +} + +static inline void genradix_free_node(struct genradix_node *node) +{ + kfree(node); +} /* * NOTE: currently, sizeof(_type) must not be larger than GENRADIX_NODE_SIZE: @@ -209,7 +220,8 @@ void *__genradix_ptr(struct __genradix *, size_t); __genradix_ptr(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx))) -void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t); +void *__genradix_ptr_alloc(struct __genradix *, size_t, + struct genradix_node **, gfp_t); #define genradix_ptr_alloc_inlined(_radix, _idx, _gfp) \ (__genradix_cast(_radix) \ @@ -217,7 +229,15 @@ void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t); __genradix_idx_to_offset(_radix, _idx)) ?: \ __genradix_ptr_alloc(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx), \ - _gfp))) + NULL, _gfp))) + +#define genradix_ptr_alloc_preallocated_inlined(_radix, _idx, _new_node, _gfp)\ + (__genradix_cast(_radix) \ + (__genradix_ptr_inlined(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx)) ?: \ + __genradix_ptr_alloc(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx), \ + _new_node, _gfp))) /** * genradix_ptr_alloc - get a pointer to a genradix entry, allocating it @@ -232,7 +252,13 @@ void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t); (__genradix_cast(_radix) \ __genradix_ptr_alloc(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx), \ - _gfp)) + NULL, _gfp)) + +#define genradix_ptr_alloc_preallocated(_radix, _idx, _new_node, _gfp)\ + (__genradix_cast(_radix) \ + __genradix_ptr_alloc(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx), \ + _new_node, _gfp)) struct genradix_iter { size_t offset; -- cgit v1.2.3 From f877f1d81b2ffcac341ff62ae076d7ad5ba83f46 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 9 Sep 2024 23:00:18 +0900 Subject: firewire: core: use mutex to coordinate concurrent calls to flush completions In current implementation, test_and_set_bit_lock() is used to mediate concurrent calls of ohci_flush_iso_completions(). However, the ad-hoc usage of atomic operations is not preferable. This commit uses mutex_trylock() as the similar operations. The core function is responsible for the mediation, instead of 1394 OHCI driver. Link: https://lore.kernel.org/r/20240909140018.65289-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index f815d12deda0..19e8c5f9537c 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -512,6 +512,7 @@ union fw_iso_callback { struct fw_iso_context { struct fw_card *card; struct work_struct work; + struct mutex flushing_completions_mutex; int type; int channel; int speed; -- cgit v1.2.3 From 34c626c3004a73ee7da5072be48ddff9c2902902 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Mon, 16 Jan 2023 01:16:43 +0200 Subject: net/mlx5: Added missing mlx5_ifc definition for HW Steering Add mlx5_ifc definitions that are required for HWS support. Note that due to change in the mlx5_ifc_flow_table_context_bits structure that now includes both SWS and HWS bits in a union, this patch also includes small change in one of SWS files that was required for compilation. Reviewed-by: Hamdan Agbariya Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 189 +++++++++++++++++++++++++++++++++++------- 1 file changed, 161 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 234ad6f16e92..b6f8e3834bd3 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -80,23 +80,15 @@ enum { enum { MLX5_OBJ_TYPE_SW_ICM = 0x0008, - MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT = 0x23, -}; - -enum { - MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM = (1ULL << MLX5_OBJ_TYPE_SW_ICM), - MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT = (1ULL << 11), - MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q = (1ULL << 13), - MLX5_GENERAL_OBJ_TYPES_CAP_HEADER_MODIFY_ARGUMENT = - (1ULL << MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT), - MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD = (1ULL << 39), -}; - -enum { MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b, MLX5_OBJ_TYPE_VIRTIO_NET_Q = 0x000d, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS = 0x001c, MLX5_OBJ_TYPE_MATCH_DEFINER = 0x0018, + MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT = 0x23, + MLX5_OBJ_TYPE_STC = 0x0040, + MLX5_OBJ_TYPE_RTC = 0x0041, + MLX5_OBJ_TYPE_STE = 0x0042, + MLX5_OBJ_TYPE_MODIFY_HDR_PATTERN = 0x0043, MLX5_OBJ_TYPE_PAGE_TRACK = 0x46, MLX5_OBJ_TYPE_MKEY = 0xff01, MLX5_OBJ_TYPE_QP = 0xff02, @@ -112,6 +104,16 @@ enum { MLX5_OBJ_TYPE_RQT = 0xff0e, MLX5_OBJ_TYPE_FLOW_COUNTER = 0xff0f, MLX5_OBJ_TYPE_CQ = 0xff10, + MLX5_OBJ_TYPE_FT_ALIAS = 0xff15, +}; + +enum { + MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM = (1ULL << MLX5_OBJ_TYPE_SW_ICM), + MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT = (1ULL << 11), + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q = (1ULL << 13), + MLX5_GENERAL_OBJ_TYPES_CAP_HEADER_MODIFY_ARGUMENT = + (1ULL << MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT), + MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD = (1ULL << 39), }; enum { @@ -313,6 +315,7 @@ enum { MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e, MLX5_CMD_OP_SYNC_CRYPTO = 0xb12, MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS = 0xb16, + MLX5_CMD_OP_GENERATE_WQE = 0xb17, MLX5_CMD_OP_MAX }; @@ -485,7 +488,13 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_66[0x2]; u8 reformat_add_macsec[0x1]; u8 reformat_remove_macsec[0x1]; - u8 reserved_at_6a[0xe]; + u8 reparse[0x1]; + u8 reserved_at_6b[0x1]; + u8 cross_vhca_object[0x1]; + u8 reformat_l2_to_l3_audp_tunnel[0x1]; + u8 reformat_l3_audp_tunnel_to_l2[0x1]; + u8 ignore_flow_level_rtc_valid[0x1]; + u8 reserved_at_70[0x8]; u8 log_max_ft_num[0x8]; u8 reserved_at_80[0x10]; @@ -522,7 +531,15 @@ struct mlx5_ifc_ipv6_layout_bits { u8 ipv6[16][0x8]; }; +struct mlx5_ifc_ipv6_simple_layout_bits { + u8 ipv6_127_96[0x20]; + u8 ipv6_95_64[0x20]; + u8 ipv6_63_32[0x20]; + u8 ipv6_31_0[0x20]; +}; + union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits { + struct mlx5_ifc_ipv6_simple_layout_bits ipv6_simple_layout; struct mlx5_ifc_ipv6_layout_bits ipv6_layout; struct mlx5_ifc_ipv4_layout_bits ipv4_layout; u8 reserved_at_0[0x80]; @@ -911,7 +928,9 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 reserved_at_8[0x5]; u8 fdb_uplink_hairpin[0x1]; u8 fdb_multi_path_any_table_limit_regc[0x1]; - u8 reserved_at_f[0x3]; + u8 reserved_at_f[0x1]; + u8 fdb_dynamic_tunnel[0x1]; + u8 reserved_at_11[0x1]; u8 fdb_multi_path_any_table[0x1]; u8 reserved_at_13[0x2]; u8 fdb_modify_header_fwd_to_table[0x1]; @@ -950,6 +969,73 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 reserved_at_1900[0x6700]; }; +struct mlx5_ifc_wqe_based_flow_table_cap_bits { + u8 reserved_at_0[0x3]; + u8 log_max_num_ste[0x5]; + u8 reserved_at_8[0x3]; + u8 log_max_num_stc[0x5]; + u8 reserved_at_10[0x3]; + u8 log_max_num_rtc[0x5]; + u8 reserved_at_18[0x3]; + u8 log_max_num_header_modify_pattern[0x5]; + + u8 rtc_hash_split_table[0x1]; + u8 rtc_linear_lookup_table[0x1]; + u8 reserved_at_22[0x1]; + u8 stc_alloc_log_granularity[0x5]; + u8 reserved_at_28[0x3]; + u8 stc_alloc_log_max[0x5]; + u8 reserved_at_30[0x3]; + u8 ste_alloc_log_granularity[0x5]; + u8 reserved_at_38[0x3]; + u8 ste_alloc_log_max[0x5]; + + u8 reserved_at_40[0xb]; + u8 rtc_reparse_mode[0x5]; + u8 reserved_at_50[0x3]; + u8 rtc_index_mode[0x5]; + u8 reserved_at_58[0x3]; + u8 rtc_log_depth_max[0x5]; + + u8 reserved_at_60[0x10]; + u8 ste_format[0x10]; + + u8 stc_action_type[0x80]; + + u8 header_insert_type[0x10]; + u8 header_remove_type[0x10]; + + u8 trivial_match_definer[0x20]; + + u8 reserved_at_140[0x1b]; + u8 rtc_max_num_hash_definer_gen_wqe[0x5]; + + u8 reserved_at_160[0x18]; + u8 access_index_mode[0x8]; + + u8 reserved_at_180[0x10]; + u8 ste_format_gen_wqe[0x10]; + + u8 linear_match_definer_reg_c3[0x20]; + + u8 fdb_jump_to_tir_stc[0x1]; + u8 reserved_at_1c1[0x1f]; +}; + +struct mlx5_ifc_esw_cap_bits { + u8 reserved_at_0[0x1d]; + u8 merged_eswitch[0x1]; + u8 reserved_at_1e[0x2]; + + u8 reserved_at_20[0x40]; + + u8 esw_manager_vport_number_valid[0x1]; + u8 reserved_at_61[0xf]; + u8 esw_manager_vport_number[0x10]; + + u8 reserved_at_80[0x780]; +}; + enum { MLX5_COUNTER_SOURCE_ESWITCH = 0x0, MLX5_COUNTER_FLOW_ESWITCH = 0x1, @@ -1443,9 +1529,13 @@ enum { }; enum { + MLX5_FLEX_IPV4_OVER_VXLAN_ENABLED = 1 << 0, + MLX5_FLEX_IPV6_OVER_VXLAN_ENABLED = 1 << 1, + MLX5_FLEX_IPV6_OVER_IP_ENABLED = 1 << 2, MLX5_FLEX_PARSER_GENEVE_ENABLED = 1 << 3, MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED = 1 << 4, MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED = 1 << 5, + MLX5_FLEX_P_BIT_VXLAN_GPE_ENABLED = 1 << 6, MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED = 1 << 7, MLX5_FLEX_PARSER_ICMP_V4_ENABLED = 1 << 8, MLX5_FLEX_PARSER_ICMP_V6_ENABLED = 1 << 9, @@ -1650,7 +1740,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 pci_sync_for_fw_update_event[0x1]; u8 reserved_at_1f2[0x6]; u8 init2_lag_tx_port_affinity[0x1]; - u8 reserved_at_1fa[0x3]; + u8 reserved_at_1fa[0x2]; + u8 wqe_based_flow_table_update_cap[0x1]; u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; @@ -1959,7 +2050,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_760[0x3]; u8 log_max_num_header_modify_argument[0x5]; - u8 reserved_at_768[0x4]; + u8 log_header_modify_argument_granularity_offset[0x4]; u8 log_header_modify_argument_granularity[0x4]; u8 reserved_at_770[0x3]; u8 log_header_modify_argument_max_alloc[0x5]; @@ -2006,7 +2097,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_140[0x60]; u8 flow_table_type_2_type[0x8]; - u8 reserved_at_1a8[0x3]; + u8 reserved_at_1a8[0x2]; + u8 format_select_dw_8_6_ext[0x1]; u8 log_min_mkey_entity_size[0x5]; u8 reserved_at_1b0[0x10]; @@ -2022,6 +2114,16 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_250[0x10]; u8 reserved_at_260[0x120]; + + u8 format_select_dw_gtpu_dw_0[0x8]; + u8 format_select_dw_gtpu_dw_1[0x8]; + u8 format_select_dw_gtpu_dw_2[0x8]; + u8 format_select_dw_gtpu_first_ext_dw_0[0x8]; + + u8 generate_wqe_type[0x20]; + + u8 reserved_at_2c0[0xc0]; + u8 reserved_at_380[0xb]; u8 min_mkey_log_entity_size_fixed_buffer[0x5]; u8 ec_vf_vport_base[0x10]; @@ -2037,9 +2139,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_400[0x1]; u8 min_mkey_log_entity_size_fixed_buffer_valid[0x1]; - u8 reserved_at_402[0x1e]; + u8 reserved_at_402[0xe]; + u8 return_reg_id[0x10]; - u8 reserved_at_420[0x20]; + u8 reserved_at_420[0x1c]; + u8 flow_table_hash_type[0x4]; u8 reserved_at_440[0x8]; u8 max_num_eqs_24b[0x18]; @@ -2086,7 +2190,7 @@ struct mlx5_ifc_extended_dest_format_bits { u8 reserved_at_60[0x20]; }; -union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits { +union mlx5_ifc_dest_format_flow_counter_list_auto_bits { struct mlx5_ifc_extended_dest_format_bits extended_dest_format; struct mlx5_ifc_flow_counter_list_bits flow_counter_list; }; @@ -2178,7 +2282,10 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_139[0x4]; u8 log_wqe_stride_size[0x3]; - u8 reserved_at_140[0x80]; + u8 dbr_umem_id[0x20]; + u8 wq_umem_id[0x20]; + + u8 wq_umem_offset[0x40]; u8 headers_mkey[0x20]; @@ -3562,6 +3669,8 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_per_protocol_networking_offload_caps_bits per_protocol_networking_offload_caps; struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; + struct mlx5_ifc_wqe_based_flow_table_cap_bits wqe_based_flow_table_cap; + struct mlx5_ifc_esw_cap_bits esw_cap; struct mlx5_ifc_e_switch_cap_bits e_switch_cap; struct mlx5_ifc_port_selection_cap_bits port_selection_cap; struct mlx5_ifc_qos_cap_bits qos_cap; @@ -3678,7 +3787,7 @@ struct mlx5_ifc_flow_context_bits { u8 reserved_at_1300[0x500]; - union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[]; + union mlx5_ifc_dest_format_flow_counter_list_auto_bits destination[]; }; enum { @@ -3919,7 +4028,8 @@ struct mlx5_ifc_sqc_bits { u8 reg_umr[0x1]; u8 allow_swp[0x1]; u8 hairpin[0x1]; - u8 reserved_at_f[0xb]; + u8 non_wire[0x1]; + u8 reserved_at_10[0xa]; u8 ts_format[0x2]; u8 reserved_at_1c[0x4]; @@ -4961,6 +5071,16 @@ struct mlx5_ifc_set_fte_in_bits { struct mlx5_ifc_flow_context_bits flow_context; }; +struct mlx5_ifc_dest_format_bits { + u8 destination_type[0x8]; + u8 destination_id[0x18]; + + u8 destination_eswitch_owner_vhca_id_valid[0x1]; + u8 packet_reformat[0x1]; + u8 reserved_at_22[0xe]; + u8 destination_eswitch_owner_vhca_id[0x10]; +}; + struct mlx5_ifc_rts2rts_qp_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6127,7 +6247,8 @@ struct mlx5_ifc_flow_table_context_bits { u8 termination_table[0x1]; u8 table_miss_action[0x4]; u8 level[0x8]; - u8 reserved_at_10[0x8]; + u8 rtc_valid[0x1]; + u8 reserved_at_11[0x7]; u8 log_size[0x8]; u8 reserved_at_20[0x8]; @@ -6137,11 +6258,21 @@ struct mlx5_ifc_flow_table_context_bits { u8 lag_master_next_table_id[0x18]; u8 reserved_at_60[0x60]; + union { + struct { + u8 sw_owner_icm_root_1[0x40]; + + u8 sw_owner_icm_root_0[0x40]; + } sws; + struct { + u8 rtc_id_0[0x20]; - u8 sw_owner_icm_root_1[0x40]; + u8 rtc_id_1[0x20]; - u8 sw_owner_icm_root_0[0x40]; + u8 reserved_at_100[0x40]; + } hws; + }; }; struct mlx5_ifc_query_flow_table_out_bits { @@ -8923,7 +9054,9 @@ struct mlx5_ifc_create_qp_in_bits { struct mlx5_ifc_qpc_bits qpc; - u8 reserved_at_800[0x60]; + u8 wq_umem_offset[0x40]; + + u8 wq_umem_id[0x20]; u8 wq_umem_valid[0x1]; u8 reserved_at_861[0x1f]; -- cgit v1.2.3 From 00b9f0daefd7670356e592d418c890acf9179c94 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 17 Jan 2023 17:14:51 +0200 Subject: net/mlx5: Added missing definitions in preparation for HW Steering As part of preparation for HWS, added missing definitions in qp.h and fs_core.h: - FS_FT_FDB_RX/TX table types that are used by HWS in addition to an existing FS_FT_FDB - MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE that is used by HWS to require fence in WQE Reviewed-by: Hamdan Agbariya Signed-off-by: Yevgeny Kliteynik Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/qp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index ad1ce650146c..fc7eeff99a8a 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -149,6 +149,7 @@ enum { MLX5_WQE_CTRL_CQ_UPDATE = 2 << 2, MLX5_WQE_CTRL_CQ_UPDATE_AND_EQE = 3 << 2, MLX5_WQE_CTRL_SOLICITED = 1 << 1, + MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE = 1 << 5, }; enum { -- cgit v1.2.3 From 452ef7f86036392005940de54228d42ca0044192 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Mon, 5 Aug 2024 10:03:20 +0300 Subject: net/mlx5: Add missing masks and QoS bit masks for scheduling elements Add the missing masks for supported element types and Transmit Scheduling Arbiter (TSAR) types in scheduling elements. Also, add the corresponding bit masks for these types in the QoS capabilities of a NIC scheduler. Fixes: 214baf22870c ("net/mlx5e: Support HTB offload") Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cab228cf51c6..cfdf984a95a8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1027,7 +1027,8 @@ struct mlx5_ifc_qos_cap_bits { u8 max_tsar_bw_share[0x20]; - u8 reserved_at_100[0x20]; + u8 nic_element_type[0x10]; + u8 nic_tsar_type[0x10]; u8 reserved_at_120[0x3]; u8 log_meter_aso_granularity[0x5]; @@ -3966,6 +3967,7 @@ enum { ELEMENT_TYPE_CAP_MASK_VPORT = 1 << 1, ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2, ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3, + ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP = 1 << 4, }; struct mlx5_ifc_scheduling_context_bits { @@ -4675,6 +4677,12 @@ enum { TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, }; +enum { + TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, + TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, + TSAR_TYPE_CAP_MASK_ETS = 1 << 2, +}; + struct mlx5_ifc_tsar_element_bits { u8 reserved_at_0[0x8]; u8 tsar_type[0x8]; -- cgit v1.2.3 From 080d72f471c86f8906845bc822051f5790d0a90d Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 4 Sep 2024 17:47:43 +0200 Subject: libeth: add Tx buffer completion helpers Software-side Tx buffers for storing DMA, frame size, skb pointers etc. are pretty much generic and every driver defines them the same way. The same can be said for software Tx completions -- same napi_consume_skb()s and all that... Add a couple simple wrappers for doing that to stop repeating the old tale at least within the Intel code. Drivers are free to use 'priv' member at the end of the structure. Reviewed-by: Przemek Kitszel Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/tx.h | 129 +++++++++++++++++++++++++++++++++++++++++++++ include/net/libeth/types.h | 25 +++++++++ 2 files changed, 154 insertions(+) create mode 100644 include/net/libeth/tx.h create mode 100644 include/net/libeth/types.h (limited to 'include') diff --git a/include/net/libeth/tx.h b/include/net/libeth/tx.h new file mode 100644 index 000000000000..35614f9523f6 --- /dev/null +++ b/include/net/libeth/tx.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef __LIBETH_TX_H +#define __LIBETH_TX_H + +#include + +#include + +/* Tx buffer completion */ + +/** + * enum libeth_sqe_type - type of &libeth_sqe to act on Tx completion + * @LIBETH_SQE_EMPTY: unused/empty, no action required + * @LIBETH_SQE_CTX: context descriptor with empty SQE, no action required + * @LIBETH_SQE_SLAB: kmalloc-allocated buffer, unmap and kfree() + * @LIBETH_SQE_FRAG: mapped skb frag, only unmap DMA + * @LIBETH_SQE_SKB: &sk_buff, unmap and napi_consume_skb(), update stats + */ +enum libeth_sqe_type { + LIBETH_SQE_EMPTY = 0U, + LIBETH_SQE_CTX, + LIBETH_SQE_SLAB, + LIBETH_SQE_FRAG, + LIBETH_SQE_SKB, +}; + +/** + * struct libeth_sqe - represents a Send Queue Element / Tx buffer + * @type: type of the buffer, see the enum above + * @rs_idx: index of the last buffer from the batch this one was sent in + * @raw: slab buffer to free via kfree() + * @skb: &sk_buff to consume + * @dma: DMA address to unmap + * @len: length of the mapped region to unmap + * @nr_frags: number of frags in the frame this buffer belongs to + * @packets: number of physical packets sent for this frame + * @bytes: number of physical bytes sent for this frame + * @priv: driver-private scratchpad + */ +struct libeth_sqe { + enum libeth_sqe_type type:32; + u32 rs_idx; + + union { + void *raw; + struct sk_buff *skb; + }; + + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); + + u32 nr_frags; + u32 packets; + u32 bytes; + + unsigned long priv; +} __aligned_largest; + +/** + * LIBETH_SQE_CHECK_PRIV - check the driver's private SQE data + * @p: type or name of the object the driver wants to fit into &libeth_sqe + * + * Make sure the driver's private data fits into libeth_sqe::priv. To be used + * right after its declaration. + */ +#define LIBETH_SQE_CHECK_PRIV(p) \ + static_assert(sizeof(p) <= sizeof_field(struct libeth_sqe, priv)) + +/** + * struct libeth_cq_pp - completion queue poll params + * @dev: &device to perform DMA unmapping + * @ss: onstack NAPI stats to fill + * @napi: whether it's called from the NAPI context + * + * libeth uses this structure to access objects needed for performing full + * Tx complete operation without passing lots of arguments and change the + * prototypes each time a new one is added. + */ +struct libeth_cq_pp { + struct device *dev; + struct libeth_sq_napi_stats *ss; + + bool napi; +}; + +/** + * libeth_tx_complete - perform Tx completion for one SQE + * @sqe: SQE to complete + * @cp: poll params + * + * Do Tx complete for all the types of buffers, incl. freeing, unmapping, + * updating the stats etc. + */ +static inline void libeth_tx_complete(struct libeth_sqe *sqe, + const struct libeth_cq_pp *cp) +{ + switch (sqe->type) { + case LIBETH_SQE_EMPTY: + return; + case LIBETH_SQE_SKB: + case LIBETH_SQE_FRAG: + case LIBETH_SQE_SLAB: + dma_unmap_page(cp->dev, dma_unmap_addr(sqe, dma), + dma_unmap_len(sqe, len), DMA_TO_DEVICE); + break; + default: + break; + } + + switch (sqe->type) { + case LIBETH_SQE_SKB: + cp->ss->packets += sqe->packets; + cp->ss->bytes += sqe->bytes; + + napi_consume_skb(sqe->skb, cp->napi); + break; + case LIBETH_SQE_SLAB: + kfree(sqe->raw); + break; + default: + break; + } + + sqe->type = LIBETH_SQE_EMPTY; +} + +#endif /* __LIBETH_TX_H */ diff --git a/include/net/libeth/types.h b/include/net/libeth/types.h new file mode 100644 index 000000000000..603825e45133 --- /dev/null +++ b/include/net/libeth/types.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef __LIBETH_TYPES_H +#define __LIBETH_TYPES_H + +#include + +/** + * struct libeth_sq_napi_stats - "hot" counters to update in Tx completion loop + * @packets: completed frames counter + * @bytes: sum of bytes of completed frames above + * @raw: alias to access all the fields as an array + */ +struct libeth_sq_napi_stats { + union { + struct { + u32 packets; + u32 bytes; + }; + DECLARE_FLEX_ARRAY(u32, raw); + }; +}; + +#endif /* __LIBETH_TYPES_H */ -- cgit v1.2.3 From 3dc95a3edd0a86b4a59670b3fafcc64c7d83e2e7 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 4 Sep 2024 17:47:45 +0200 Subject: netdevice: add netdev_tx_reset_subqueue() shorthand Add a shorthand similar to other net*_subqueue() helpers for resetting the queue by its index w/o obtaining &netdev_tx_queue beforehand manually. Reviewed-by: Przemek Kitszel Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/linux/netdevice.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b47c00657bd0..44d1dbb54ffe 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3568,6 +3568,17 @@ static inline void netdev_tx_reset_queue(struct netdev_queue *q) #endif } +/** + * netdev_tx_reset_subqueue - reset the BQL stats and state of a netdev queue + * @dev: network device + * @qid: stack index of the queue to reset + */ +static inline void netdev_tx_reset_subqueue(const struct net_device *dev, + u32 qid) +{ + netdev_tx_reset_queue(netdev_get_tx_queue(dev, qid)); +} + /** * netdev_reset_queue - reset the packets and bytes count of a network device * @dev_queue: network device @@ -3577,7 +3588,7 @@ static inline void netdev_tx_reset_queue(struct netdev_queue *q) */ static inline void netdev_reset_queue(struct net_device *dev_queue) { - netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0)); + netdev_tx_reset_subqueue(dev_queue, 0); } /** -- cgit v1.2.3 From 40a895fd9a358eea16901026360bd2cd3ca691a7 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 5 Sep 2024 15:35:45 -0700 Subject: cxl: move cxl headers to new include/cxl/ directory Group all cxl related kernel headers into include/cxl/ directory. Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20240905223711.1990186-2-dave.jiang@intel.com Signed-off-by: Dave Jiang --- include/cxl/einj.h | 44 ++++++++++++ include/cxl/event.h | 175 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/cxl-event.h | 175 ---------------------------------------------- include/linux/einj-cxl.h | 44 ------------ 4 files changed, 219 insertions(+), 219 deletions(-) create mode 100644 include/cxl/einj.h create mode 100644 include/cxl/event.h delete mode 100644 include/linux/cxl-event.h delete mode 100644 include/linux/einj-cxl.h (limited to 'include') diff --git a/include/cxl/einj.h b/include/cxl/einj.h new file mode 100644 index 000000000000..624ff6ff41f9 --- /dev/null +++ b/include/cxl/einj.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * CXL protocol Error INJection support. + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Ben Cheatham + */ +#ifndef EINJ_CXL_H +#define EINJ_CXL_H + +#include +#include + +struct pci_dev; +struct seq_file; + +#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) +int einj_cxl_available_error_type_show(struct seq_file *m, void *v); +int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type); +int einj_cxl_inject_rch_error(u64 rcrb, u64 type); +bool einj_cxl_is_initialized(void); +#else /* !IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) */ +static inline int einj_cxl_available_error_type_show(struct seq_file *m, + void *v) +{ + return -ENXIO; +} + +static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type) +{ + return -ENXIO; +} + +static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type) +{ + return -ENXIO; +} + +static inline bool einj_cxl_is_initialized(void) { return false; } +#endif /* CONFIG_ACPI_APEI_EINJ_CXL */ + +#endif /* EINJ_CXL_H */ diff --git a/include/cxl/event.h b/include/cxl/event.h new file mode 100644 index 000000000000..0bea1afbd747 --- /dev/null +++ b/include/cxl/event.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2023 Intel Corporation. */ +#ifndef _LINUX_CXL_EVENT_H +#define _LINUX_CXL_EVENT_H + +#include +#include +#include + +/* + * Common Event Record Format + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_hdr { + u8 length; + u8 flags[3]; + __le16 handle; + __le16 related_handle; + __le64 timestamp; + u8 maint_op_class; + u8 reserved[15]; +} __packed; + +struct cxl_event_media_hdr { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + /* + * The meaning of Validity Flags from bit 2 is + * different across DRAM and General Media records + */ + u8 validity_flags[2]; + u8 channel; + u8 rank; +} __packed; + +#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 +struct cxl_event_generic { + struct cxl_event_record_hdr hdr; + u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; +} __packed; + +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 +struct cxl_event_gen_media { + struct cxl_event_media_hdr media_hdr; + u8 device[3]; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 reserved[46]; +} __packed; + +/* + * DRAM Event Record - DER + * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44 + */ +#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 +struct cxl_event_dram { + struct cxl_event_media_hdr media_hdr; + u8 nibble_mask[3]; + u8 bank_group; + u8 bank; + u8 row[3]; + u8 column[2]; + u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; + u8 reserved[0x17]; +} __packed; + +/* + * Get Health Info Record + * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100 + */ +struct cxl_get_health_info { + u8 health_status; + u8 media_status; + u8 add_status; + u8 life_used; + u8 device_temp[2]; + u8 dirty_shutdown_cnt[4]; + u8 cor_vol_err_cnt[4]; + u8 cor_per_err_cnt[4]; +} __packed; + +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +struct cxl_event_mem_module { + struct cxl_event_record_hdr hdr; + u8 event_type; + struct cxl_get_health_info info; + u8 reserved[0x3d]; +} __packed; + +union cxl_event { + struct cxl_event_generic generic; + struct cxl_event_gen_media gen_media; + struct cxl_event_dram dram; + struct cxl_event_mem_module mem_module; + /* dram & gen_media event header */ + struct cxl_event_media_hdr media_hdr; +} __packed; + +/* + * Common Event Record Format; in event logs + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_raw { + uuid_t id; + union cxl_event event; +} __packed; + +enum cxl_event_type { + CXL_CPER_EVENT_GENERIC, + CXL_CPER_EVENT_GEN_MEDIA, + CXL_CPER_EVENT_DRAM, + CXL_CPER_EVENT_MEM_MODULE, +}; + +#define CPER_CXL_DEVICE_ID_VALID BIT(0) +#define CPER_CXL_DEVICE_SN_VALID BIT(1) +#define CPER_CXL_COMP_EVENT_LOG_VALID BIT(2) +struct cxl_cper_event_rec { + struct { + u32 length; + u64 validation_bits; + struct cper_cxl_event_devid { + u16 vendor_id; + u16 device_id; + u8 func_num; + u8 device_num; + u8 bus_num; + u16 segment_num; + u16 slot_num; /* bits 2:0 reserved */ + u8 reserved; + } __packed device_id; + struct cper_cxl_event_sn { + u32 lower_dw; + u32 upper_dw; + } __packed dev_serial_num; + } __packed hdr; + + union cxl_event event; +} __packed; + +struct cxl_cper_work_data { + enum cxl_event_type event_type; + struct cxl_cper_event_rec rec; +}; + +#ifdef CONFIG_ACPI_APEI_GHES +int cxl_cper_register_work(struct work_struct *work); +int cxl_cper_unregister_work(struct work_struct *work); +int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd); +#else +static inline int cxl_cper_register_work(struct work_struct *work) +{ + return 0; +} + +static inline int cxl_cper_unregister_work(struct work_struct *work) +{ + return 0; +} +static inline int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd) +{ + return 0; +} +#endif + +#endif /* _LINUX_CXL_EVENT_H */ diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h deleted file mode 100644 index 0bea1afbd747..000000000000 --- a/include/linux/cxl-event.h +++ /dev/null @@ -1,175 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 2023 Intel Corporation. */ -#ifndef _LINUX_CXL_EVENT_H -#define _LINUX_CXL_EVENT_H - -#include -#include -#include - -/* - * Common Event Record Format - * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 - */ -struct cxl_event_record_hdr { - u8 length; - u8 flags[3]; - __le16 handle; - __le16 related_handle; - __le64 timestamp; - u8 maint_op_class; - u8 reserved[15]; -} __packed; - -struct cxl_event_media_hdr { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; - u8 descriptor; - u8 type; - u8 transaction_type; - /* - * The meaning of Validity Flags from bit 2 is - * different across DRAM and General Media records - */ - u8 validity_flags[2]; - u8 channel; - u8 rank; -} __packed; - -#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 -struct cxl_event_generic { - struct cxl_event_record_hdr hdr; - u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; -} __packed; - -/* - * General Media Event Record - * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 - */ -#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 -struct cxl_event_gen_media { - struct cxl_event_media_hdr media_hdr; - u8 device[3]; - u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; - u8 reserved[46]; -} __packed; - -/* - * DRAM Event Record - DER - * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44 - */ -#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 -struct cxl_event_dram { - struct cxl_event_media_hdr media_hdr; - u8 nibble_mask[3]; - u8 bank_group; - u8 bank; - u8 row[3]; - u8 column[2]; - u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; - u8 reserved[0x17]; -} __packed; - -/* - * Get Health Info Record - * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100 - */ -struct cxl_get_health_info { - u8 health_status; - u8 media_status; - u8 add_status; - u8 life_used; - u8 device_temp[2]; - u8 dirty_shutdown_cnt[4]; - u8 cor_vol_err_cnt[4]; - u8 cor_per_err_cnt[4]; -} __packed; - -/* - * Memory Module Event Record - * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 - */ -struct cxl_event_mem_module { - struct cxl_event_record_hdr hdr; - u8 event_type; - struct cxl_get_health_info info; - u8 reserved[0x3d]; -} __packed; - -union cxl_event { - struct cxl_event_generic generic; - struct cxl_event_gen_media gen_media; - struct cxl_event_dram dram; - struct cxl_event_mem_module mem_module; - /* dram & gen_media event header */ - struct cxl_event_media_hdr media_hdr; -} __packed; - -/* - * Common Event Record Format; in event logs - * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 - */ -struct cxl_event_record_raw { - uuid_t id; - union cxl_event event; -} __packed; - -enum cxl_event_type { - CXL_CPER_EVENT_GENERIC, - CXL_CPER_EVENT_GEN_MEDIA, - CXL_CPER_EVENT_DRAM, - CXL_CPER_EVENT_MEM_MODULE, -}; - -#define CPER_CXL_DEVICE_ID_VALID BIT(0) -#define CPER_CXL_DEVICE_SN_VALID BIT(1) -#define CPER_CXL_COMP_EVENT_LOG_VALID BIT(2) -struct cxl_cper_event_rec { - struct { - u32 length; - u64 validation_bits; - struct cper_cxl_event_devid { - u16 vendor_id; - u16 device_id; - u8 func_num; - u8 device_num; - u8 bus_num; - u16 segment_num; - u16 slot_num; /* bits 2:0 reserved */ - u8 reserved; - } __packed device_id; - struct cper_cxl_event_sn { - u32 lower_dw; - u32 upper_dw; - } __packed dev_serial_num; - } __packed hdr; - - union cxl_event event; -} __packed; - -struct cxl_cper_work_data { - enum cxl_event_type event_type; - struct cxl_cper_event_rec rec; -}; - -#ifdef CONFIG_ACPI_APEI_GHES -int cxl_cper_register_work(struct work_struct *work); -int cxl_cper_unregister_work(struct work_struct *work); -int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd); -#else -static inline int cxl_cper_register_work(struct work_struct *work) -{ - return 0; -} - -static inline int cxl_cper_unregister_work(struct work_struct *work) -{ - return 0; -} -static inline int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd) -{ - return 0; -} -#endif - -#endif /* _LINUX_CXL_EVENT_H */ diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h deleted file mode 100644 index 624ff6ff41f9..000000000000 --- a/include/linux/einj-cxl.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * CXL protocol Error INJection support. - * - * Copyright (c) 2023 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Author: Ben Cheatham - */ -#ifndef EINJ_CXL_H -#define EINJ_CXL_H - -#include -#include - -struct pci_dev; -struct seq_file; - -#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) -int einj_cxl_available_error_type_show(struct seq_file *m, void *v); -int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type); -int einj_cxl_inject_rch_error(u64 rcrb, u64 type); -bool einj_cxl_is_initialized(void); -#else /* !IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) */ -static inline int einj_cxl_available_error_type_show(struct seq_file *m, - void *v) -{ - return -ENXIO; -} - -static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type) -{ - return -ENXIO; -} - -static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type) -{ - return -ENXIO; -} - -static inline bool einj_cxl_is_initialized(void) { return false; } -#endif /* CONFIG_ACPI_APEI_EINJ_CXL */ - -#endif /* EINJ_CXL_H */ -- cgit v1.2.3 From eb3b3f520518003cd363239fc160bdd7ed327319 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 10 Sep 2024 00:31:49 +0200 Subject: dt-bindings: clock, reset: fix top-comment indentation rk3576 headers Block comments should align the * on each line, as checkpatch rightfully pointed out, so fix that style issue on the newly added rk3576 headers. Fixes: 49c04453db81 ("dt-bindings: clock, reset: Add support for rk3576") Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240909223149.85364-1-heiko@sntech.de Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/rockchip,rk3576-cru.h | 12 ++++++------ include/dt-bindings/reset/rockchip,rk3576-cru.h | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/rockchip,rk3576-cru.h b/include/dt-bindings/clock/rockchip,rk3576-cru.h index a2933021be89..25aed298ac2c 100644 --- a/include/dt-bindings/clock/rockchip,rk3576-cru.h +++ b/include/dt-bindings/clock/rockchip,rk3576-cru.h @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ /* -* Copyright (c) 2023 Rockchip Electronics Co. Ltd. -* Copyright (c) 2024 Collabora Ltd. -* -* Author: Elaine Zhang -* Author: Detlev Casanova -*/ + * Copyright (c) 2023 Rockchip Electronics Co. Ltd. + * Copyright (c) 2024 Collabora Ltd. + * + * Author: Elaine Zhang + * Author: Detlev Casanova + */ #ifndef _DT_BINDINGS_CLK_ROCKCHIP_RK3576_H #define _DT_BINDINGS_CLK_ROCKCHIP_RK3576_H diff --git a/include/dt-bindings/reset/rockchip,rk3576-cru.h b/include/dt-bindings/reset/rockchip,rk3576-cru.h index 291fec0ecba0..ae856906f3a3 100644 --- a/include/dt-bindings/reset/rockchip,rk3576-cru.h +++ b/include/dt-bindings/reset/rockchip,rk3576-cru.h @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ /* -* Copyright (c) 2023 Rockchip Electronics Co. Ltd. -* Copyright (c) 2024 Collabora Ltd. -* -* Author: Elaine Zhang -* Author: Detlev Casanova -*/ + * Copyright (c) 2023 Rockchip Electronics Co. Ltd. + * Copyright (c) 2024 Collabora Ltd. + * + * Author: Elaine Zhang + * Author: Detlev Casanova + */ #ifndef _DT_BINDINGS_RESET_ROCKCHIP_RK3576_H #define _DT_BINDINGS_RESET_ROCKCHIP_RK3576_H -- cgit v1.2.3 From 246d3aa3e53151fa150f10257ddd8a4facd31a6a Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 8 Aug 2024 12:18:46 +0100 Subject: mm: cleanup count_mthp_stat() definition Patch series "Shmem mTHP controls and stats improvements", v3. This is a small series to tidy up the way the shmem controls and stats are exposed. These patches were previously part of the series at [2], but I decided to split them out since they can go in independently. This patch (of 2): Let's move count_mthp_stat() so that it's always defined, even when THP is disabled. Previously uses of the function in files such as shmem.c, which are compiled even when THP is disabled, required ugly THP ifdeferry. With this cleanup, we can remove those ifdefs and the function resolves to a nop when THP is disabled. I shortly plan to call count_mthp_stat() from more THP-invariant source files. Link: https://lkml.kernel.org/r/20240808111849.651867-1-ryan.roberts@arm.com Link: https://lkml.kernel.org/r/20240808111849.651867-2-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Acked-by: Barry Song Reviewed-by: Baolin Wang Reviewed-by: Lance Yang Acked-by: David Hildenbrand Cc: Gavin Shan Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 70 ++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 6370026689e0..4c32058cacfe 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -114,6 +114,41 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; #define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1)) #define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT) +enum mthp_stat_item { + MTHP_STAT_ANON_FAULT_ALLOC, + MTHP_STAT_ANON_FAULT_FALLBACK, + MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, + MTHP_STAT_SWPOUT, + MTHP_STAT_SWPOUT_FALLBACK, + MTHP_STAT_SHMEM_ALLOC, + MTHP_STAT_SHMEM_FALLBACK, + MTHP_STAT_SHMEM_FALLBACK_CHARGE, + MTHP_STAT_SPLIT, + MTHP_STAT_SPLIT_FAILED, + MTHP_STAT_SPLIT_DEFERRED, + __MTHP_STAT_COUNT +}; + +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS) +struct mthp_stat { + unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT]; +}; + +DECLARE_PER_CPU(struct mthp_stat, mthp_stats); + +static inline void count_mthp_stat(int order, enum mthp_stat_item item) +{ + if (order <= 0 || order > PMD_ORDER) + return; + + this_cpu_inc(mthp_stats.stats[order][item]); +} +#else +static inline void count_mthp_stat(int order, enum mthp_stat_item item) +{ +} +#endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern unsigned long transparent_hugepage_flags; @@ -269,41 +304,6 @@ struct thpsize { #define to_thpsize(kobj) container_of(kobj, struct thpsize, kobj) -enum mthp_stat_item { - MTHP_STAT_ANON_FAULT_ALLOC, - MTHP_STAT_ANON_FAULT_FALLBACK, - MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, - MTHP_STAT_SWPOUT, - MTHP_STAT_SWPOUT_FALLBACK, - MTHP_STAT_SHMEM_ALLOC, - MTHP_STAT_SHMEM_FALLBACK, - MTHP_STAT_SHMEM_FALLBACK_CHARGE, - MTHP_STAT_SPLIT, - MTHP_STAT_SPLIT_FAILED, - MTHP_STAT_SPLIT_DEFERRED, - __MTHP_STAT_COUNT -}; - -struct mthp_stat { - unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT]; -}; - -#ifdef CONFIG_SYSFS -DECLARE_PER_CPU(struct mthp_stat, mthp_stats); - -static inline void count_mthp_stat(int order, enum mthp_stat_item item) -{ - if (order <= 0 || order > PMD_ORDER) - return; - - this_cpu_inc(mthp_stats.stats[order][item]); -} -#else -static inline void count_mthp_stat(int order, enum mthp_stat_item item) -{ -} -#endif - #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ (1< Date: Sat, 24 Aug 2024 13:04:40 +1200 Subject: mm: count the number of anonymous THPs per size Patch series "mm: count the number of anonymous THPs per size", v4. Knowing the number of transparent anon THPs in the system is crucial for performance analysis. It helps in understanding the ratio and distribution of THPs versus small folios throughout the system. Additionally, partial unmapping by userspace can lead to significant waste of THPs over time and increase memory reclamation pressure. We need this information for comprehensive system tuning. This patch (of 2): Let's track for each anonymous THP size, how many of them are currently allocated. We'll track the complete lifespan of an anon THP, starting when it becomes an anon THP ("large anon folio") (->mapping gets set), until it gets freed (->mapping gets cleared). Introduce a new "nr_anon" counter per THP size and adjust the corresponding counter in the following cases: * We allocate a new THP and call folio_add_new_anon_rmap() to map it the first time and turn it into an anon THP. * We split an anon THP into multiple smaller ones. * We migrate an anon THP, when we prepare the destination. * We free an anon THP back to the buddy. Note that AnonPages in /proc/meminfo currently tracks the total number of *mapped* anonymous *pages*, and therefore has slightly different semantics. In the future, we might also want to track "nr_anon_mapped" for each THP size, which might be helpful when comparing it to the number of allocated anon THPs (long-term pinning, stuck in swapcache, memory leaks, ...). Further note that for now, we only track anon THPs after they got their ->mapping set, for example via folio_add_new_anon_rmap(). If we would allocate some in the swapcache, they will only show up in the statistics for now after they have been mapped to user space the first time, where we call folio_add_new_anon_rmap(). [akpm@linux-foundation.org: documentation fixups, per David] Link: https://lkml.kernel.org/r/3e8add35-e26b-443b-8a04-1078f4bc78f6@redhat.com Link: https://lkml.kernel.org/r/20240824010441.21308-1-21cnbao@gmail.com Link: https://lkml.kernel.org/r/20240824010441.21308-2-21cnbao@gmail.com Signed-off-by: Barry Song Acked-by: David Hildenbrand Cc: Baolin Wang Cc: Chris Li Cc: Chuanhua Han Cc: Kairui Song Cc: Kalesh Singh Cc: Lance Yang Cc: Ryan Roberts Cc: Shuai Yuan Cc: Usama Arif Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 4c32058cacfe..2ee2971e4e10 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -126,6 +126,7 @@ enum mthp_stat_item { MTHP_STAT_SPLIT, MTHP_STAT_SPLIT_FAILED, MTHP_STAT_SPLIT_DEFERRED, + MTHP_STAT_NR_ANON, __MTHP_STAT_COUNT }; @@ -136,14 +137,24 @@ struct mthp_stat { DECLARE_PER_CPU(struct mthp_stat, mthp_stats); -static inline void count_mthp_stat(int order, enum mthp_stat_item item) +static inline void mod_mthp_stat(int order, enum mthp_stat_item item, int delta) { if (order <= 0 || order > PMD_ORDER) return; - this_cpu_inc(mthp_stats.stats[order][item]); + this_cpu_add(mthp_stats.stats[order][item], delta); +} + +static inline void count_mthp_stat(int order, enum mthp_stat_item item) +{ + mod_mthp_stat(order, item, 1); } + #else +static inline void mod_mthp_stat(int order, enum mthp_stat_item item, int delta) +{ +} + static inline void count_mthp_stat(int order, enum mthp_stat_item item) { } -- cgit v1.2.3 From 8175ebfd302abe6fbdca9037f763ecbfdb8db572 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sat, 24 Aug 2024 13:04:41 +1200 Subject: mm: count the number of partially mapped anonymous THPs per size When a THP is added to the deferred_list due to partially mapped, its partial pages are unused, leading to wasted memory and potentially increasing memory reclamation pressure. Detailing the specifics of how unmapping occurs is quite difficult and not that useful, so we adopt a simple approach: each time a THP enters the deferred_list, we increment the count by 1; whenever it leaves for any reason, we decrement the count by 1. Link: https://lkml.kernel.org/r/20240824010441.21308-3-21cnbao@gmail.com Signed-off-by: Barry Song Acked-by: David Hildenbrand Cc: Baolin Wang Cc: Chris Li Cc: Chuanhua Han Cc: Kairui Song Cc: Kalesh Singh Cc: Lance Yang Cc: Ryan Roberts Cc: Shuai Yuan Cc: Usama Arif Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 2ee2971e4e10..4902e2f7e896 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -127,6 +127,7 @@ enum mthp_stat_item { MTHP_STAT_SPLIT_FAILED, MTHP_STAT_SPLIT_DEFERRED, MTHP_STAT_NR_ANON, + MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, __MTHP_STAT_COUNT }; -- cgit v1.2.3 From b7315fbb64736acad3cd33851884b222b00dc0f4 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 25 Aug 2024 21:23:20 -0700 Subject: mm/damon/core: introduce per-context region priorities histogram buffer Patch series "replace per-quota region priorities histogram buffer with per-context one". Each DAMOS quota (struct damos_quota) maintains a histogram for total regions size per its prioritization score. DAMOS calcultes minimum prioritization score of regions that are ok to apply the DAMOS action to while respecting the quota. The histogram is constructed only for the calculation of the minimum score in damos_adjust_quota() for each quota which called by kdamond_fn(). Hence, there is no real reason to have per-quota histogram. Only per-kdamond histogram is needed, since parallel kdamonds could have races otherwise. The current implementation is only wasting the memory, and can easily cause unintended stack usage[1]. So, introducing a per-kdamond histogram and replacing the per-quota one with it would be the right solution for the issue. However, supporting multiple DAMON contexts per kdamond is still an ongoing work[2] without a clear estimated time of arrival. Meanwhile, per-context histogram could be an effective and straightforward solution having no blocker. Let's fix the problem first in the way. This patch (of 4): Introduce per-context buffer for region priority scores-total size histogram. Same to the per-quota one (->histogram of struct damos_quota), the new buffer is hidden from DAMON API users by being defined as a private field of DAMON context structure. It is dynamically allocated and de-allocated at the beginning and ending of the execution of the kdamond by kdamond_fn() itself. [1] commit 0742cadf5e4c ("mm/damon/lru_sort: adjust local variable to dynamic allocation") [2] https://lore.kernel.org/20240531122320.909060-1-yorha.op@gmail.com Link: https://lkml.kernel.org/r/20240826042323.87025-1-sj@kernel.org Link: https://lkml.kernel.org/r/20240826042323.87025-2-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 27c546bfc6d4..60cb8eada3d6 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -630,6 +630,8 @@ struct damon_ctx { unsigned long next_ops_update_sis; /* for waiting until the execution of the kdamond_fn is started */ struct completion kdamond_started; + /* for scheme quotas prioritization */ + unsigned long *regions_score_histogram; /* public: */ struct task_struct *kdamond; -- cgit v1.2.3 From e3bcb1672583f2e1cf456e4303ce562a3cc775c0 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 25 Aug 2024 21:23:22 -0700 Subject: mm/damon/core: remove per-scheme region priority histogram buffer Nobody is reading from or writing to the per-scheme region priorities histogram buffer. It is only wasting memory. Remove it. Link: https://lkml.kernel.org/r/20240826042323.87025-4-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 60cb8eada3d6..a67f2c4940e9 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -233,7 +233,6 @@ struct damos_quota { unsigned long charge_addr_from; /* For prioritization */ - unsigned long histogram[DAMOS_MAX_SCORE + 1]; unsigned int min_score; /* For feedback loop */ -- cgit v1.2.3 From 17d75422604f0b92869aa17cb44f60958212f033 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sat, 31 Aug 2024 08:28:22 +1200 Subject: mm: document __GFP_NOFAIL must be blockable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Non-blocking allocation with __GFP_NOFAIL is not supported and may still result in NULL pointers (if we don't return NULL, we result in busy-loop within non-sleepable contexts): static inline struct page * __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct alloc_context *ac) { ... /* * Make sure that __GFP_NOFAIL request doesn't leak out and make sure * we always retry */ if (gfp_mask & __GFP_NOFAIL) { /* * All existing users of the __GFP_NOFAIL are blockable, so warn * of any new users that actually require GFP_NOWAIT */ if (WARN_ON_ONCE_GFP(!can_direct_reclaim, gfp_mask)) goto fail; ... } ... fail: warn_alloc(gfp_mask, ac->nodemask, "page allocation failure: order:%u", order); got_pg: return page; } Highlight this in the documentation of __GFP_NOFAIL so that non-mm subsystems can reject any illegal usage of __GFP_NOFAIL with GFP_ATOMIC, GFP_NOWAIT, etc. Link: https://lkml.kernel.org/r/20240830202823.21478-3-21cnbao@gmail.com Signed-off-by: Barry Song Acked-by: Michal Hocko Reviewed-by: Christoph Hellwig Acked-by: Vlastimil Babka Acked-by: Davidlohr Bueso Acked-by: David Hildenbrand Cc: Christoph Lameter Cc: David Rientjes Cc: "Eugenio Pérez" Cc: Hailong.Liu Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Jason Wang Cc: Joonsoo Kim Cc: Kees Cook Cc: Linus Torvalds Cc: Lorenzo Stoakes Cc: Maxime Coquelin Cc: "Michael S. Tsirkin" Cc: Pekka Enberg Cc: Roman Gushchin Cc: Uladzislau Rezki (Sony) Cc: Xuan Zhuo Cc: Christoph Hellwig Cc: Xie Yongji Cc: Yafang Shao Signed-off-by: Andrew Morton --- include/linux/gfp_types.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 313be4ad79fd..4a1fa7706b0c 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -215,7 +215,8 @@ enum { * the caller still has to check for failures) while costly requests try to be * not disruptive and back off even without invoking the OOM killer. * The following three modifiers might be used to override some of these - * implicit rules. + * implicit rules. Please note that all of them must be used along with + * %__GFP_DIRECT_RECLAIM flag. * * %__GFP_NORETRY: The VM implementation will try only very lightweight * memory direct reclaim to get some memory under memory pressure (thus @@ -246,6 +247,8 @@ enum { * cannot handle allocation failures. The allocation could block * indefinitely but will never return with failure. Testing for * failure is pointless. + * It _must_ be blockable and used together with __GFP_DIRECT_RECLAIM. + * It should _never_ be used in non-sleepable contexts. * New users should be evaluated carefully (and the flag should be * used only when there is no reasonable failure policy) but it is * definitely preferable to use the flag rather than opencode endless -- cgit v1.2.3 From 903edea6c53f097f5f0c847fdbbfab0c6c44f241 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sat, 31 Aug 2024 08:28:23 +1200 Subject: mm: warn about illegal __GFP_NOFAIL usage in a more appropriate location and manner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three points for this change: 1. We should consolidate all warnings in one place. Currently, the order > 1 warning is in the hotpath, while others are in less likely scenarios. Moving all warnings to the slowpath will reduce the overhead for order > 1 and increase the visibility of other warnings. 2. We currently have two warnings for order: one for order > 1 in the hotpath and another for order > costly_order in the laziest path. I suggest standardizing on order > 1 since it's been in use for a long time. 3. We don't need to check for __GFP_NOWARN in this case. __GFP_NOWARN is meant to suppress allocation failure reports, but here we're dealing with bug detection, not allocation failures. So replace WARN_ON_ONCE_GFP by WARN_ON_ONCE. [v-songbaohua@oppo.com: also update the doc for __GFP_NOFAIL with order > 1] Link: https://lkml.kernel.org/r/20240903223935.1697-1-21cnbao@gmail.com Link: https://lkml.kernel.org/r/20240830202823.21478-4-21cnbao@gmail.com Signed-off-by: Barry Song Suggested-by: Vlastimil Babka Reviewed-by: Vlastimil Babka Acked-by: David Hildenbrand Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Christoph Lameter Cc: Davidlohr Bueso Cc: David Rientjes Cc: "Eugenio Pérez" Cc: Hailong.Liu Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Jason Wang Cc: Joonsoo Kim Cc: Kees Cook Cc: Linus Torvalds Cc: Lorenzo Stoakes Cc: Maxime Coquelin Cc: "Michael S. Tsirkin" Cc: Pekka Enberg Cc: Roman Gushchin Cc: Uladzislau Rezki (Sony) Cc: Xie Yongji Cc: Xuan Zhuo Cc: Yafang Shao Signed-off-by: Andrew Morton --- include/linux/gfp_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 4a1fa7706b0c..65db9349f905 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -253,7 +253,8 @@ enum { * used only when there is no reasonable failure policy) but it is * definitely preferable to use the flag rather than opencode endless * loop around allocator. - * Using this flag for costly allocations is _highly_ discouraged. + * Allocating pages from the buddy with __GFP_NOFAIL and order > 1 is + * not supported. Please consider using kvmalloc() instead. */ #define __GFP_IO ((__force gfp_t)___GFP_IO) #define __GFP_FS ((__force gfp_t)___GFP_FS) -- cgit v1.2.3 From b1f202060afeb7fcb98473929d26fd3d2093b067 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 30 Aug 2024 11:03:36 +0100 Subject: mm: remap unused subpages to shared zeropage when splitting isolated thp Patch series "mm: split underused THPs", v5. The current upstream default policy for THP is always. However, Meta uses madvise in production as the current THP=always policy vastly overprovisions THPs in sparsely accessed memory areas, resulting in excessive memory pressure and premature OOM killing. Using madvise + relying on khugepaged has certain drawbacks over THP=always. Using madvise hints mean THPs aren't "transparent" and require userspace changes. Waiting for khugepaged to scan memory and collapse pages into THP can be slow and unpredictable in terms of performance (i.e. you dont know when the collapse will happen), while production environments require predictable performance. If there is enough memory available, its better for both performance and predictability to have a THP from fault time, i.e. THP=always rather than wait for khugepaged to collapse it, and deal with sparsely populated THPs when the system is running out of memory. This patch series is an attempt to mitigate the issue of running out of memory when THP is always enabled. During runtime whenever a THP is being faulted in or collapsed by khugepaged, the THP is added to a list. Whenever memory reclaim happens, the kernel runs the deferred_split shrinker which goes through the list and checks if the THP was underused, i.e. how many of the base 4K pages of the entire THP were zero-filled. If this number goes above a certain threshold, the shrinker will attempt to split that THP. Then at remap time, the pages that were zero-filled are mapped to the shared zeropage, hence saving memory. This method avoids the downside of wasting memory in areas where THP is sparsely filled when THP is always enabled, while still providing the upside THPs like reduced TLB misses without having to use madvise. Meta production workloads that were CPU bound (>99% CPU utilzation) were tested with THP shrinker. The results after 2 hours are as follows: | THP=madvise | THP=always | THP=always | | | + shrinker series | | | + max_ptes_none=409 ----------------------------------------------------------------------------- Performance improvement | - | +1.8% | +1.7% (over THP=madvise) | | | ----------------------------------------------------------------------------- Memory usage | 54.6G | 58.8G (+7.7%) | 55.9G (+2.4%) ----------------------------------------------------------------------------- max_ptes_none=409 means that any THP that has more than 409 out of 512 (80%) zero filled filled pages will be split. To test out the patches, the below commands without the shrinker will invoke OOM killer immediately and kill stress, but will not fail with the shrinker: echo 450 > /sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none mkdir /sys/fs/cgroup/test echo $$ > /sys/fs/cgroup/test/cgroup.procs echo 20M > /sys/fs/cgroup/test/memory.max echo 0 > /sys/fs/cgroup/test/memory.swap.max # allocate twice memory.max for each stress worker and touch 40/512 of # each THP, i.e. vm-stride 50K. # With the shrinker, max_ptes_none of 470 and below won't invoke OOM # killer. # Without the shrinker, OOM killer is invoked immediately irrespective # of max_ptes_none value and kills stress. stress --vm 1 --vm-bytes 40M --vm-stride 50K This patch (of 5): Here being unused means containing only zeros and inaccessible to userspace. When splitting an isolated thp under reclaim or migration, the unused subpages can be mapped to the shared zeropage, hence saving memory. This is particularly helpful when the internal fragmentation of a thp is high, i.e. it has many untouched subpages. This is also a prerequisite for THP low utilization shrinker which will be introduced in later patches, where underutilized THPs are split, and the zero-filled pages are freed saving memory. Link: https://lkml.kernel.org/r/20240830100438.3623486-1-usamaarif642@gmail.com Link: https://lkml.kernel.org/r/20240830100438.3623486-3-usamaarif642@gmail.com Signed-off-by: Yu Zhao Signed-off-by: Usama Arif Tested-by: Shuang Zhai Cc: Alexander Zhu Cc: Barry Song Cc: David Hildenbrand Cc: Domenico Cerasuolo Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kairui Song Cc: Matthew Wilcox Cc: Mike Rapoport Cc: Nico Pache Cc: Rik van Riel Cc: Roman Gushchin Cc: Ryan Roberts Cc: Shakeel Butt Cc: Shuang Zhai Cc: Hugh Dickins Signed-off-by: Andrew Morton --- include/linux/rmap.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 91b5935e8485..d5e93e44322e 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -745,7 +745,12 @@ int folio_mkclean(struct folio *); int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, struct vm_area_struct *vma); -void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked); +enum rmp_flags { + RMP_LOCKED = 1 << 0, + RMP_USE_SHARED_ZEROPAGE = 1 << 1, +}; + +void remove_migration_ptes(struct folio *src, struct folio *dst, int flags); /* * rmap_walk_control: To control rmap traversing for specific needs -- cgit v1.2.3 From 8422acdc97ed5839692b45f800dbfb78abe65a94 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Fri, 30 Aug 2024 11:03:38 +0100 Subject: mm: introduce a pageflag for partially mapped folios Currently folio->_deferred_list is used to keep track of partially_mapped folios that are going to be split under memory pressure. In the next patch, all THPs that are faulted in and collapsed by khugepaged are also going to be tracked using _deferred_list. This patch introduces a pageflag to be able to distinguish between partially mapped folios and others in the deferred_list at split time in deferred_split_scan. Its needed as __folio_remove_rmap decrements _mapcount, _large_mapcount and _entire_mapcount, hence it won't be possible to distinguish between partially mapped folios and others in deferred_split_scan. Eventhough it introduces an extra flag to track if the folio is partially mapped, there is no functional change intended with this patch and the flag is not useful in this patch itself, it will become useful in the next patch when _deferred_list has non partially mapped folios. Link: https://lkml.kernel.org/r/20240830100438.3623486-5-usamaarif642@gmail.com Signed-off-by: Usama Arif Cc: Alexander Zhu Cc: Barry Song Cc: David Hildenbrand Cc: Domenico Cerasuolo Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kairui Song Cc: Matthew Wilcox Cc: Mike Rapoport Cc: Nico Pache Cc: Rik van Riel Cc: Roman Gushchin Cc: Ryan Roberts Cc: Shakeel Butt Cc: Shuang Zhai Cc: Yu Zhao Cc: Shuang Zhai Cc: Hugh Dickins Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 4 ++-- include/linux/page-flags.h | 13 ++++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 4902e2f7e896..e3896ebf0f94 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -333,7 +333,7 @@ static inline int split_huge_page(struct page *page) { return split_huge_page_to_list_to_order(page, NULL, 0); } -void deferred_split_folio(struct folio *folio); +void deferred_split_folio(struct folio *folio, bool partially_mapped); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze, struct folio *folio); @@ -507,7 +507,7 @@ static inline int split_huge_page(struct page *page) { return 0; } -static inline void deferred_split_folio(struct folio *folio) {} +static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 2175ebceb41c..1b3a76710487 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -186,6 +186,7 @@ enum pageflags { /* At least one page in this folio has the hwpoison flag set */ PG_has_hwpoisoned = PG_active, PG_large_rmappable = PG_workingset, /* anon or file-backed */ + PG_partially_mapped = PG_reclaim, /* was identified to be partially mapped */ }; #define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) @@ -859,8 +860,18 @@ static inline void ClearPageCompound(struct page *page) ClearPageHead(page); } FOLIO_FLAG(large_rmappable, FOLIO_SECOND_PAGE) +FOLIO_TEST_FLAG(partially_mapped, FOLIO_SECOND_PAGE) +/* + * PG_partially_mapped is protected by deferred_split split_queue_lock, + * so its safe to use non-atomic set/clear. + */ +__FOLIO_SET_FLAG(partially_mapped, FOLIO_SECOND_PAGE) +__FOLIO_CLEAR_FLAG(partially_mapped, FOLIO_SECOND_PAGE) #else FOLIO_FLAG_FALSE(large_rmappable) +FOLIO_TEST_FLAG_FALSE(partially_mapped) +__FOLIO_SET_FLAG_NOOP(partially_mapped) +__FOLIO_CLEAR_FLAG_NOOP(partially_mapped) #endif #define PG_head_mask ((1UL << PG_head)) @@ -1171,7 +1182,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) */ #define PAGE_FLAGS_SECOND \ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ - 1UL << PG_large_rmappable) + 1UL << PG_large_rmappable | 1UL << PG_partially_mapped) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) -- cgit v1.2.3 From dafff3f4c850c98e15501bc6ee20581f9a013dcc Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Fri, 30 Aug 2024 11:03:39 +0100 Subject: mm: split underused THPs This is an attempt to mitigate the issue of running out of memory when THP is always enabled. During runtime whenever a THP is being faulted in (__do_huge_pmd_anonymous_page) or collapsed by khugepaged (collapse_huge_page), the THP is added to _deferred_list. Whenever memory reclaim happens in linux, the kernel runs the deferred_split shrinker which goes through the _deferred_list. If the folio was partially mapped, the shrinker attempts to split it. If the folio is not partially mapped, the shrinker checks if the THP was underused, i.e. how many of the base 4K pages of the entire THP were zero-filled. If this number goes above a certain threshold (decided by /sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none), the shrinker will attempt to split that THP. Then at remap time, the pages that were zero-filled are mapped to the shared zeropage, hence saving memory. Link: https://lkml.kernel.org/r/20240830100438.3623486-6-usamaarif642@gmail.com Signed-off-by: Usama Arif Suggested-by: Rik van Riel Co-authored-by: Johannes Weiner Cc: Alexander Zhu Cc: Barry Song Cc: David Hildenbrand Cc: Domenico Cerasuolo Cc: Jonathan Corbet Cc: Kairui Song Cc: Matthew Wilcox Cc: Mike Rapoport Cc: Nico Pache Cc: Roman Gushchin Cc: Ryan Roberts Cc: Shakeel Butt Cc: Shuang Zhai Cc: Yu Zhao Cc: Shuang Zhai Cc: Hugh Dickins Signed-off-by: Andrew Morton --- include/linux/khugepaged.h | 1 + include/linux/vm_event_item.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index f68865e19b0b..30baae91b225 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -4,6 +4,7 @@ #include /* MMF_VM_HUGEPAGE */ +extern unsigned int khugepaged_max_ptes_none __read_mostly; #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern struct attribute_group khugepaged_attr_group; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index aae5c7c5cfb4..aed952d04132 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -105,6 +105,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_SPLIT_PAGE, THP_SPLIT_PAGE_FAILED, THP_DEFERRED_SPLIT_PAGE, + THP_UNDERUSED_SPLIT_PAGE, THP_SPLIT_PMD, THP_SCAN_EXCEED_NONE_PTE, THP_SCAN_EXCEED_SWAP_PTE, -- cgit v1.2.3 From 0a6fff20d36bc81156a49649f622b152330fed3c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 1 Sep 2024 16:24:59 -0400 Subject: mm: fix folio_alloc_noprof() folio_alloc_noprof) wasn't calling the _noprof version, causing allocations to be accounted here instead of to the caller Link: https://lkml.kernel.org/r/20240901202459.4867-1-kent.overstreet@linux.dev Signed-off-by: Kent Overstreet Signed-off-by: Andrew Morton --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 03ba9563c6db..a951de920e20 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -319,7 +319,7 @@ static inline struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order } static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order) { - return __folio_alloc_node(gfp, order, numa_node_id()); + return __folio_alloc_node_noprof(gfp, order, numa_node_id()); } static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *mpol, pgoff_t ilx, int nid) -- cgit v1.2.3 From 6050df6d706f58b2240bfabece9f406170104d57 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 9 Aug 2024 02:01:15 +0000 Subject: maple_tree: fix comment typo on ma_flag of allocation tree The maple tree flag of allocation tree is MT_FLAGS_ALLOC_RANGE. Just correct it. Link: https://lkml.kernel.org/r/20240809020115.31575-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 8e1504a81cd2..c2c11004085e 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -52,9 +52,9 @@ * bit in the node type. This is possible by using bit 1 to indicate if bit 2 * is part of the type or the slot. * - * Once the type is decided, the decision of an allocation range type or a range - * type is done by examining the immutable tree flag for the MAPLE_ALLOC_RANGE - * flag. + * Once the type is decided, the decision of an allocation range type or a + * range type is done by examining the immutable tree flag for the + * MT_FLAGS_ALLOC_RANGE flag. * * Node types: * 0x??1 = Root -- cgit v1.2.3 From 4fc4187984e5dbd7ad63c3acf0b228fa9bf298d3 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 2 Sep 2024 19:55:49 +0900 Subject: lib: zstd: export API needed for dictionary support Patch series "zram: introduce custom comp backends API", v7. This series introduces support for run-time compression algorithms tuning, so users, for instance, can adjust compression/acceleration levels and provide pre-trained compression/decompression dictionaries which certain algorithms support. At this point we stop supporting (old/deprecated) comp API. We may add new acomp API support in the future, but before that zram needs to undergo some major rework (we are not ready for async compression). Some benchmarks for reference (look at column #2) *** init zstd /sys/block/zram0/mm_stat 1750659072 504622188 514355200 0 514355200 1 0 34204 34204 *** init zstd dict=/home/ss/zstd-dict-amd64 /sys/block/zram0/mm_stat 1750650880 465908890 475398144 0 475398144 1 0 34185 34185 *** init zstd level=8 dict=/home/ss/zstd-dict-amd64 /sys/block/zram0/mm_stat 1750654976 430803319 439873536 0 439873536 1 0 34185 34185 *** init lz4 /sys/block/zram0/mm_stat 1750646784 664266564 677060608 0 677060608 1 0 34288 34288 *** init lz4 dict=/home/ss/lz4-dict-amd64 /sys/block/zram0/mm_stat 1750650880 619990300 632102912 0 632102912 1 0 34278 34278 *** init lz4hc /sys/block/zram0/mm_stat 1750630400 609023822 621232128 0 621232128 1 0 34288 34288 *** init lz4hc dict=/home/ss/lz4-dict-amd64 /sys/block/zram0/mm_stat 1750659072 505133172 515231744 0 515231744 1 0 34278 34278 Recompress init zram zstd (prio=0), zstd level=5 (prio 1), zstd with dict (prio 2) *** zstd /sys/block/zram0/mm_stat 1750982656 504630584 514269184 0 514269184 1 0 34204 34204 *** idle recompress priority=1 (zstd level=5) /sys/block/zram0/mm_stat 1750982656 488645601 525438976 0 514269184 1 0 34204 34204 *** idle recompress priority=2 (zstd dict) /sys/block/zram0/mm_stat 1750982656 460869640 517914624 0 514269184 1 0 34185 34204 This patch (of 24): We need to export a number of API functions that enable advanced zstd usage - C/D dictionaries, dictionaries sharing between contexts, etc. Link: https://lkml.kernel.org/r/20240902105656.1383858-1-senozhatsky@chromium.org Link: https://lkml.kernel.org/r/20240902105656.1383858-2-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Cc: Nick Terrell Cc: Minchan Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton --- include/linux/zstd.h | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) (limited to 'include') diff --git a/include/linux/zstd.h b/include/linux/zstd.h index 113408eef6ec..b2c7cf310c8f 100644 --- a/include/linux/zstd.h +++ b/include/linux/zstd.h @@ -77,6 +77,30 @@ int zstd_min_clevel(void); */ int zstd_max_clevel(void); +/** + * zstd_default_clevel() - default compression level + * + * Return: Default compression level. + */ +int zstd_default_clevel(void); + +/** + * struct zstd_custom_mem - custom memory allocation + */ +typedef ZSTD_customMem zstd_custom_mem; + +/** + * struct zstd_dict_load_method - Dictionary load method. + * See zstd_lib.h. + */ +typedef ZSTD_dictLoadMethod_e zstd_dict_load_method; + +/** + * struct zstd_dict_content_type - Dictionary context type. + * See zstd_lib.h. + */ +typedef ZSTD_dictContentType_e zstd_dict_content_type; + /* ====== Parameter Selection ====== */ /** @@ -136,6 +160,19 @@ typedef ZSTD_parameters zstd_parameters; zstd_parameters zstd_get_params(int level, unsigned long long estimated_src_size); + +/** + * zstd_get_cparams() - returns zstd_compression_parameters for selected level + * @level: The compression level + * @estimated_src_size: The estimated source size to compress or 0 + * if unknown. + * @dict_size: Dictionary size. + * + * Return: The selected zstd_compression_parameters. + */ +zstd_compression_parameters zstd_get_cparams(int level, + unsigned long long estimated_src_size, size_t dict_size); + /* ====== Single-pass Compression ====== */ typedef ZSTD_CCtx zstd_cctx; @@ -180,6 +217,71 @@ zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size); size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity, const void *src, size_t src_size, const zstd_parameters *parameters); +/** + * zstd_create_cctx_advanced() - Create compression context + * @custom_mem: Custom allocator. + * + * Return: NULL on error, pointer to compression context otherwise. + */ +zstd_cctx *zstd_create_cctx_advanced(zstd_custom_mem custom_mem); + +/** + * zstd_free_cctx() - Free compression context + * @cdict: Pointer to compression context. + * + * Return: Always 0. + */ +size_t zstd_free_cctx(zstd_cctx* cctx); + +/** + * struct zstd_cdict - Compression dictionary. + * See zstd_lib.h. + */ +typedef ZSTD_CDict zstd_cdict; + +/** + * zstd_create_cdict_byreference() - Create compression dictionary + * @dict: Pointer to dictionary buffer. + * @dict_size: Size of the dictionary buffer. + * @dict_load_method: Dictionary load method. + * @dict_content_type: Dictionary content type. + * @custom_mem: Memory allocator. + * + * Note, this uses @dict by reference (ZSTD_dlm_byRef), so it should be + * free before zstd_cdict is destroyed. + * + * Return: NULL on error, pointer to compression dictionary + * otherwise. + */ +zstd_cdict *zstd_create_cdict_byreference(const void *dict, size_t dict_size, + zstd_compression_parameters cparams, + zstd_custom_mem custom_mem); + +/** + * zstd_free_cdict() - Free compression dictionary + * @cdict: Pointer to compression dictionary. + * + * Return: Always 0. + */ +size_t zstd_free_cdict(zstd_cdict* cdict); + +/** + * zstd_compress_using_cdict() - compress src into dst using a dictionary + * @cctx: The context. Must have been initialized with zstd_init_cctx(). + * @dst: The buffer to compress src into. + * @dst_capacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @src_size: The size of the data to compress. + * @cdict: The dictionary to be used. + * + * Return: The compressed size or an error, which can be checked using + * zstd_is_error(). + */ +size_t zstd_compress_using_cdict(zstd_cctx *cctx, void *dst, + size_t dst_capacity, const void *src, size_t src_size, + const zstd_cdict *cdict); + /* ====== Single-pass Decompression ====== */ typedef ZSTD_DCtx zstd_dctx; @@ -220,6 +322,71 @@ zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size); size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity, const void *src, size_t src_size); +/** + * struct zstd_ddict - Decompression dictionary. + * See zstd_lib.h. + */ +typedef ZSTD_DDict zstd_ddict; + +/** + * zstd_create_ddict_byreference() - Create decompression dictionary + * @dict: Pointer to dictionary buffer. + * @dict_size: Size of the dictionary buffer. + * @dict_load_method: Dictionary load method. + * @dict_content_type: Dictionary content type. + * @custom_mem: Memory allocator. + * + * Note, this uses @dict by reference (ZSTD_dlm_byRef), so it should be + * free before zstd_ddict is destroyed. + * + * Return: NULL on error, pointer to decompression dictionary + * otherwise. + */ +zstd_ddict *zstd_create_ddict_byreference(const void *dict, size_t dict_size, + zstd_custom_mem custom_mem); +/** + * zstd_free_ddict() - Free decompression dictionary + * @dict: Pointer to the dictionary. + * + * Return: Always 0. + */ +size_t zstd_free_ddict(zstd_ddict *ddict); + +/** + * zstd_create_dctx_advanced() - Create decompression context + * @custom_mem: Custom allocator. + * + * Return: NULL on error, pointer to decompression context otherwise. + */ +zstd_dctx *zstd_create_dctx_advanced(zstd_custom_mem custom_mem); + +/** + * zstd_free_dctx() -- Free decompression context + * @dctx: Pointer to decompression context. + * Return: Always 0. + */ +size_t zstd_free_dctx(zstd_dctx *dctx); + +/** + * zstd_decompress_using_ddict() - decompress src into dst using a dictionary + * @dctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dst_capacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @src_size: The exact size of the data to decompress. + * @ddict: The dictionary to be used. + * + * Return: The decompressed size or an error, which can be checked using + * zstd_is_error(). + */ +size_t zstd_decompress_using_ddict(zstd_dctx *dctx, + void *dst, size_t dst_capacity, const void *src, size_t src_size, + const zstd_ddict *ddict); + + /* ====== Streaming Buffers ====== */ /** -- cgit v1.2.3 From e1e4cfd01a6e75dd4c810aeac115340805cf63ff Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 3 Sep 2024 11:19:28 -0400 Subject: mm,tmpfs: consider end of file write in shmem_is_huge Take the end of a file write into consideration when deciding whether or not to use huge pages for tmpfs files when the tmpfs filesystem is mounted with huge=within_size This allows large writes that append to the end of a file to automatically use large pages. Doing 4MB sequential writes without fallocate to a 16GB tmpfs file with fio. The numbers without THP or with huge=always stay the same, but the performance with huge=within_size now matches that of huge=always. huge before after 4kB pages 1560 MB/s 1560 MB/s within_size 1560 MB/s 4720 MB/s always: 4720 MB/s 4720 MB/s [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20240903111928.7171e60c@imladris.surriel.com Signed-off-by: Rik van Riel Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Darrick J. Wong Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 1564d7d3ca61..515a9a6a3c6f 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -113,11 +113,11 @@ int shmem_unuse(unsigned int type); #ifdef CONFIG_TRANSPARENT_HUGEPAGE unsigned long shmem_allowable_huge_orders(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, - bool shmem_huge_force); + loff_t write_end, bool shmem_huge_force); #else static inline unsigned long shmem_allowable_huge_orders(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, - bool shmem_huge_force) + loff_t write_end, bool shmem_huge_force) { return 0; } @@ -143,8 +143,8 @@ enum sgp_type { SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ }; -int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop, - enum sgp_type sgp); +int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end, + struct folio **foliop, enum sgp_type sgp); struct folio *shmem_read_folio_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp); -- cgit v1.2.3 From 25d4054cc97484f2555709ac233f955f674e026a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 4 Sep 2024 17:57:59 +0100 Subject: mm: make arch_get_unmapped_area() take vm_flags by default Patch series "mm: Care about shadow stack guard gap when getting an unmapped area", v2. As covered in the commit log for c44357c2e76b ("x86/mm: care about shadow stack guard gap during placement") our current mmap() implementation does not take care to ensure that a new mapping isn't placed with existing mappings inside it's own guard gaps. This is particularly important for shadow stacks since if two shadow stacks end up getting placed adjacent to each other then they can overflow into each other which weakens the protection offered by the feature. On x86 there is a custom arch_get_unmapped_area() which was updated by the above commit to cover this case by specifying a start_gap for allocations with VM_SHADOW_STACK. Both arm64 and RISC-V have equivalent features and use the generic implementation of arch_get_unmapped_area() so let's make the equivalent change there so they also don't get shadow stack pages placed without guard pages. The arm64 and RISC-V shadow stack implementations are currently on the list: https://lore.kernel.org/r/20240829-arm64-gcs-v12-0-42fec94743 https://lore.kernel.org/lkml/20240403234054.2020347-1-debug@rivosinc.com/ Given the addition of the use of vm_flags in the generic implementation we also simplify the set of possibilities that have to be dealt with in the core code by making arch_get_unmapped_area() take vm_flags as standard. This is a bit invasive since the prototype change touches quite a few architectures but since the parameter is ignored the change is straightforward, the simplification for the generic code seems worth it. This patch (of 3): When we introduced arch_get_unmapped_area_vmflags() in 961148704acd ("mm: introduce arch_get_unmapped_area_vmflags()") we did so as part of properly supporting guard pages for shadow stacks on x86_64, which uses a custom arch_get_unmapped_area(). Equivalent features are also present on both arm64 and RISC-V, both of which use the generic implementation of arch_get_unmapped_area() and will require equivalent modification there. Rather than continue to deal with having two versions of the functions let's bite the bullet and have all implementations of arch_get_unmapped_area() take vm_flags as a parameter. The new parameter is currently ignored by all implementations other than x86. The only caller that doesn't have a vm_flags available is mm_get_unmapped_area(), as for the x86 implementation and the wrapper used on other architectures this is modified to supply no flags. No functional changes. Link: https://lkml.kernel.org/r/20240904-mm-generic-shadow-stack-guard-v2-0-a46b8b6dc0ed@kernel.org Link: https://lkml.kernel.org/r/20240904-mm-generic-shadow-stack-guard-v2-1-a46b8b6dc0ed@kernel.org Signed-off-by: Mark Brown Acked-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Acked-by: Helge Deller [parisc] Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Chris Zankel Cc: Dave Hansen Cc: David S. Miller Cc: "Edgecombe, Rick P" Cc: Gerald Schaefer Cc: Guo Ren Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James Bottomley Cc: John Paul Adrian Glaubitz Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Naveen N Rao Cc: Nicholas Piggin Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Sven Schnelle Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Vlastimil Babka Cc: WANG Xuerui Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- include/linux/sched/mm.h | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 91546493c43d..c4d34abc45d4 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -179,27 +179,20 @@ static inline void mm_update_next_owner(struct mm_struct *mm) extern void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack); -extern unsigned long -arch_get_unmapped_area(struct file *, unsigned long, unsigned long, - unsigned long, unsigned long); -extern unsigned long + +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags, vm_flags_t vm_flags); +unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); + unsigned long len, unsigned long pgoff, + unsigned long flags, vm_flags_t); unsigned long mm_get_unmapped_area(struct mm_struct *mm, struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); -unsigned long -arch_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags, vm_flags_t vm_flags); -unsigned long -arch_get_unmapped_area_topdown_vmflags(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags, vm_flags_t); - unsigned long mm_get_unmapped_area_vmflags(struct mm_struct *mm, struct file *filp, unsigned long addr, -- cgit v1.2.3 From 540e00a729dfbefe0aa0d64b6dac1d1b620a1787 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 4 Sep 2024 17:58:00 +0100 Subject: mm: pass vm_flags to generic_get_unmapped_area() In preparation for using vm_flags to ensure guard pages for shadow stacks supply them as an argument to generic_get_unmapped_area(). The only user outside of the core code is the PowerPC book3s64 implementation which is trivially wrapping the generic implementation in the radix_enabled() case. No functional changes. Link: https://lkml.kernel.org/r/20240904-mm-generic-shadow-stack-guard-v2-2-a46b8b6dc0ed@kernel.org Signed-off-by: Mark Brown Acked-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Acked-by: Michael Ellerman Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Chris Zankel Cc: Dave Hansen Cc: David S. Miller Cc: "Edgecombe, Rick P" Cc: Gerald Schaefer Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James Bottomley Cc: John Paul Adrian Glaubitz Cc: Matt Turner Cc: Max Filippov Cc: Naveen N Rao Cc: Nicholas Piggin Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Sven Schnelle Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Vlastimil Babka Cc: WANG Xuerui Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- include/linux/sched/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index c4d34abc45d4..07bb8d4181d7 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -204,11 +204,11 @@ unsigned long mm_get_unmapped_area_vmflags(struct mm_struct *mm, unsigned long generic_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, - unsigned long flags); + unsigned long flags, vm_flags_t vm_flags); unsigned long generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, - unsigned long flags); + unsigned long flags, vm_flags_t vm_flags); #else static inline void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) {} -- cgit v1.2.3 From 08e28de1160a712724268fd33d77b32f1bc84d1c Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 3 Sep 2024 09:36:28 +0200 Subject: uprobes: use vm_special_mapping close() functionality The following KASAN splat was shown: [ 44.505448] ================================================================== 20:37:27 [3421/145075] [ 44.505455] BUG: KASAN: slab-use-after-free in special_mapping_close+0x9c/0xc8 [ 44.505471] Read of size 8 at addr 00000000868dac48 by task sh/1384 [ 44.505479] [ 44.505486] CPU: 51 UID: 0 PID: 1384 Comm: sh Not tainted 6.11.0-rc6-next-20240902-dirty #1496 [ 44.505503] Hardware name: IBM 3931 A01 704 (z/VM 7.3.0) [ 44.505508] Call Trace: [ 44.505511] [<000b0324d2f78080>] dump_stack_lvl+0xd0/0x108 [ 44.505521] [<000b0324d2f5435c>] print_address_description.constprop.0+0x34/0x2e0 [ 44.505529] [<000b0324d2f5464c>] print_report+0x44/0x138 [ 44.505536] [<000b0324d1383192>] kasan_report+0xc2/0x140 [ 44.505543] [<000b0324d2f52904>] special_mapping_close+0x9c/0xc8 [ 44.505550] [<000b0324d12c7978>] remove_vma+0x78/0x120 [ 44.505557] [<000b0324d128a2c6>] exit_mmap+0x326/0x750 [ 44.505563] [<000b0324d0ba655a>] __mmput+0x9a/0x370 [ 44.505570] [<000b0324d0bbfbe0>] exit_mm+0x240/0x340 [ 44.505575] [<000b0324d0bc0228>] do_exit+0x548/0xd70 [ 44.505580] [<000b0324d0bc1102>] do_group_exit+0x132/0x390 [ 44.505586] [<000b0324d0bc13b6>] __s390x_sys_exit_group+0x56/0x60 [ 44.505592] [<000b0324d0adcbd6>] do_syscall+0x2f6/0x430 [ 44.505599] [<000b0324d2f78434>] __do_syscall+0xa4/0x170 [ 44.505606] [<000b0324d2f9454c>] system_call+0x74/0x98 [ 44.505614] [ 44.505616] Allocated by task 1384: [ 44.505621] kasan_save_stack+0x40/0x70 [ 44.505630] kasan_save_track+0x28/0x40 [ 44.505636] __kasan_kmalloc+0xa0/0xc0 [ 44.505642] __create_xol_area+0xfa/0x410 [ 44.505648] get_xol_area+0xb0/0xf0 [ 44.505652] uprobe_notify_resume+0x27a/0x470 [ 44.505657] irqentry_exit_to_user_mode+0x15e/0x1d0 [ 44.505664] pgm_check_handler+0x122/0x170 [ 44.505670] [ 44.505672] Freed by task 1384: [ 44.505676] kasan_save_stack+0x40/0x70 [ 44.505682] kasan_save_track+0x28/0x40 [ 44.505687] kasan_save_free_info+0x4a/0x70 [ 44.505693] __kasan_slab_free+0x5a/0x70 [ 44.505698] kfree+0xe8/0x3f0 [ 44.505704] __mmput+0x20/0x370 [ 44.505709] exit_mm+0x240/0x340 [ 44.505713] do_exit+0x548/0xd70 [ 44.505718] do_group_exit+0x132/0x390 [ 44.505722] __s390x_sys_exit_group+0x56/0x60 [ 44.505727] do_syscall+0x2f6/0x430 [ 44.505732] __do_syscall+0xa4/0x170 [ 44.505738] system_call+0x74/0x98 The problem is that uprobe_clear_state() kfree's struct xol_area, which contains struct vm_special_mapping *xol_mapping. This one is passed to _install_special_mapping() in xol_add_vma(). __mput reads: static inline void __mmput(struct mm_struct *mm) { VM_BUG_ON(atomic_read(&mm->mm_users)); uprobe_clear_state(mm); exit_aio(mm); ksm_exit(mm); khugepaged_exit(mm); /* must run before exit_mmap */ exit_mmap(mm); ... } So uprobe_clear_state() in the beginning free's the memory area containing the vm_special_mapping data, but exit_mmap() uses this address later via vma->vm_private_data (which was set in _install_special_mapping(). Fix this by moving uprobe_clear_state() to uprobes.c and use it as close() callback. [usama.anjum@collabora.com: remove unneeded condition] Link: https://lkml.kernel.org/r/20240906101825.177490-1-usama.anjum@collabora.com Link: https://lkml.kernel.org/r/20240903073629.2442754-1-svens@linux.ibm.com Fixes: 223febc6e557 ("mm: add optional close() to struct vm_special_mapping") Signed-off-by: Sven Schnelle Suggested-by: Linus Torvalds Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Michael Ellerman Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/uprobes.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index b503fafb7fb3..493dc95d912c 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -126,7 +126,6 @@ extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); extern bool uprobe_deny_signal(void); extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); -extern void uprobe_clear_state(struct mm_struct *mm); extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); -- cgit v1.2.3 From 0e40cf2a8b2c847950e025d5aa594bd545118d26 Mon Sep 17 00:00:00 2001 From: Kinsey Ho Date: Thu, 5 Sep 2024 00:30:50 +0000 Subject: cgroup: clarify css sibling linkage is protected by cgroup_mutex or RCU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "Improve mem_cgroup_iter()", v4. Incremental cgroup iteration is being used again [1]. This patchset improves the reliability of mem_cgroup_iter(). It also improves simplicity and code readability. [1] https://lore.kernel.org/20240514202641.2821494-1-hannes@cmpxchg.org/ This patch (of 5): Explicitly document that css sibling/descendant linkage is protected by cgroup_mutex or RCU. Also, document in css_next_descendant_pre() and similar functions that it isn't necessary to hold a ref on @pos. The following changes in this patchset rely on this clarification for simplification in memcg iteration code. Link: https://lkml.kernel.org/r/20240905003058.1859929-1-kinseyho@google.com Link: https://lkml.kernel.org/r/20240905003058.1859929-2-kinseyho@google.com Suggested-by: Yosry Ahmed Reviewed-by: Michal Koutný Signed-off-by: Kinsey Ho Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Cc: Tejun Heo Cc: Zefan Li Cc: Hugh Dickins Cc: T.J. Mercier Signed-off-by: Andrew Morton --- include/linux/cgroup-defs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 7fc2d0195f56..ca7e912b8355 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -172,7 +172,11 @@ struct cgroup_subsys_state { /* reference count - access via css_[try]get() and css_put() */ struct percpu_ref refcnt; - /* siblings list anchored at the parent's ->children */ + /* + * siblings list anchored at the parent's ->children + * + * linkage is protected by cgroup_mutex or RCU + */ struct list_head sibling; struct list_head children; -- cgit v1.2.3 From ec0db74b4b1f249ffca4df450f54c17573114045 Mon Sep 17 00:00:00 2001 From: Kinsey Ho Date: Thu, 5 Sep 2024 00:30:53 +0000 Subject: mm: restart if multiple traversals raced MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, if multiple reclaimers raced on the same position, the reclaimers which detect the race will still reclaim from the same memcg. Instead, the reclaimers which detect the race should move on to the next memcg in the hierarchy. So, in the case where multiple traversals race, jump back to the start of the mem_cgroup_iter() function to find the next memcg in the hierarchy to reclaim from. Link: https://lkml.kernel.org/r/20240905003058.1859929-5-kinseyho@google.com Reported-by: syzbot+e099d407346c45275ce9@syzkaller.appspotmail.com Closes: https://lore.kernel.org/000000000000817cf10620e20d33@google.com/ Signed-off-by: Kinsey Ho Reviewed-by: T.J. Mercier Cc: Johannes Weiner Cc: Michal Hocko Cc: Michal Koutný Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Cc: Tejun Heo Cc: Yosry Ahmed Cc: Zefan Li Cc: Hugh Dickins Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fe05fdb92779..2ef94c74847d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -57,7 +57,7 @@ enum memcg_memory_event { struct mem_cgroup_reclaim_cookie { pg_data_t *pgdat; - unsigned int generation; + int generation; }; #ifdef CONFIG_MEMCG @@ -78,7 +78,7 @@ struct lruvec_stats; struct mem_cgroup_reclaim_iter { struct mem_cgroup *position; /* scan generation, increased every round-trip */ - unsigned int generation; + atomic_t generation; }; /* -- cgit v1.2.3 From eebc0f48468e68b93393d11f75ad17385a9acca8 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 5 Sep 2024 22:21:06 -0600 Subject: mm/codetag: fix a typo Link: https://lkml.kernel.org/r/20240906042108.1150526-1-yuzhao@google.com Fixes: 22d407b164ff ("lib: add allocation tagging support for memory allocation profiling") Signed-off-by: Yu Zhao Acked-by: Suren Baghdasaryan Acked-by: Muchun Song Cc: Kent Overstreet Signed-off-by: Andrew Morton --- include/linux/alloc_tag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index 8c61ccd161ba..896491d9ebe8 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -70,7 +70,7 @@ static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct) /* * When percpu variables are required to be defined as weak, static percpu * variables can't be used inside a function (see comments for DECLARE_PER_CPU_SECTION). - * Instead we will accound all module allocations to a single counter. + * Instead we will account all module allocations to a single counter. */ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); -- cgit v1.2.3 From 95599ef684d01136a8b77c16a7c853496786e173 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 5 Sep 2024 22:21:07 -0600 Subject: mm/codetag: fix pgalloc_tag_split() The current assumption is that a large folio can only be split into order-0 folios. That is not the case for hugeTLB demotion, nor for THP split: see commit c010d47f107f ("mm: thp: split huge page to any lower order pages"). When a large folio is split into ones of a lower non-zero order, only the new head pages should be tagged. Tagging tail pages can cause imbalanced "calls" counters, since only head pages are untagged by pgalloc_tag_sub() and the "calls" counts on tail pages are leaked, e.g., # echo 2048kB >/sys/kernel/mm/hugepages/hugepages-1048576kB/demote_size # echo 700 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages # time echo 700 >/sys/kernel/mm/hugepages/hugepages-1048576kB/demote # echo 0 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages # grep alloc_gigantic_folio /proc/allocinfo Before this patch: 0 549427200 mm/hugetlb.c:1549 func:alloc_gigantic_folio real 0m2.057s user 0m0.000s sys 0m2.051s After this patch: 0 0 mm/hugetlb.c:1549 func:alloc_gigantic_folio real 0m1.711s user 0m0.000s sys 0m1.704s Not tagging tail pages also improves the splitting time, e.g., by about 15% when demoting 1GB hugeTLB folios to 2MB ones, as shown above. Link: https://lkml.kernel.org/r/20240906042108.1150526-2-yuzhao@google.com Fixes: be25d1d4e822 ("mm: create new codetag references during page splitting") Signed-off-by: Yu Zhao Acked-by: Suren Baghdasaryan Cc: Kent Overstreet Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 30 ++++++++++++++++++++++++++++++ include/linux/pgalloc_tag.h | 31 ------------------------------- 2 files changed, 30 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index b0ff06d18c71..6bb778cbaabf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4084,4 +4084,34 @@ void vma_pgtable_walk_end(struct vm_area_struct *vma); int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size); +#ifdef CONFIG_MEM_ALLOC_PROFILING +static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) +{ + int i; + struct alloc_tag *tag; + unsigned int nr_pages = 1 << new_order; + + if (!mem_alloc_profiling_enabled()) + return; + + tag = pgalloc_tag_get(&folio->page); + if (!tag) + return; + + for (i = nr_pages; i < (1 << old_order); i += nr_pages) { + union codetag_ref *ref = get_page_tag_ref(folio_page(folio, i)); + + if (ref) { + /* Set new reference to point to the original tag */ + alloc_tag_ref_set(ref, tag); + put_page_tag_ref(ref); + } + } +} +#else /* !CONFIG_MEM_ALLOC_PROFILING */ +static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) +{ +} +#endif /* CONFIG_MEM_ALLOC_PROFILING */ + #endif /* _LINUX_MM_H */ diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 207f0c83c8e9..59a3deb792a8 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -80,36 +80,6 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) } } -static inline void pgalloc_tag_split(struct page *page, unsigned int nr) -{ - int i; - struct page_ext *first_page_ext; - struct page_ext *page_ext; - union codetag_ref *ref; - struct alloc_tag *tag; - - if (!mem_alloc_profiling_enabled()) - return; - - first_page_ext = page_ext = page_ext_get(page); - if (unlikely(!page_ext)) - return; - - ref = codetag_ref_from_page_ext(page_ext); - if (!ref->ct) - goto out; - - tag = ct_to_alloc_tag(ref->ct); - page_ext = page_ext_next(page_ext); - for (i = 1; i < nr; i++) { - /* Set new reference to point to the original tag */ - alloc_tag_ref_set(codetag_ref_from_page_ext(page_ext), tag); - page_ext = page_ext_next(page_ext); - } -out: - page_ext_put(first_page_ext); -} - static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { struct alloc_tag *tag = NULL; @@ -142,7 +112,6 @@ static inline void clear_page_tag_ref(struct page *page) {} static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) {} static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} -static inline void pgalloc_tag_split(struct page *page, unsigned int nr) {} static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} -- cgit v1.2.3 From e0a955bf7f61cb034d228736d81c1ab3a47a3dca Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 5 Sep 2024 22:21:08 -0600 Subject: mm/codetag: add pgalloc_tag_copy() Add pgalloc_tag_copy() to transfer the codetag from the old folio to the new one during migration. This makes original allocation sites persist cross migration rather than lump into the get_new_folio callbacks passed into migrate_pages(), e.g., compaction_alloc(): # echo 1 >/proc/sys/vm/compact_memory # grep compaction_alloc /proc/allocinfo Before this patch: 132968448 32463 mm/compaction.c:1880 func:compaction_alloc After this patch: 0 0 mm/compaction.c:1880 func:compaction_alloc Link: https://lkml.kernel.org/r/20240906042108.1150526-3-yuzhao@google.com Fixes: dcfe378c81f7 ("lib: introduce support for page allocation tagging") Signed-off-by: Yu Zhao Acked-by: Suren Baghdasaryan Cc: Kent Overstreet Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- include/linux/alloc_tag.h | 24 ++++++++++-------------- include/linux/mm.h | 27 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index 896491d9ebe8..1f0a9ff23a2c 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -137,7 +137,16 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref) {} /* Caller should verify both ref and tag to be valid */ static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) { + alloc_tag_add_check(ref, tag); + if (!ref || !tag) + return; + ref->ct = &tag->ct; +} + +static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) +{ + __alloc_tag_ref_set(ref, tag); /* * We need in increment the call counter every time we have a new * allocation or when we split a large allocation into smaller ones. @@ -147,22 +156,9 @@ static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag this_cpu_inc(tag->counters->calls); } -static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) -{ - alloc_tag_add_check(ref, tag); - if (!ref || !tag) - return; - - __alloc_tag_ref_set(ref, tag); -} - static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes) { - alloc_tag_add_check(ref, tag); - if (!ref || !tag) - return; - - __alloc_tag_ref_set(ref, tag); + alloc_tag_ref_set(ref, tag); this_cpu_add(tag->counters->bytes, bytes); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 6bb778cbaabf..39f6faace590 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4108,10 +4108,37 @@ static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new } } } + +static inline void pgalloc_tag_copy(struct folio *new, struct folio *old) +{ + struct alloc_tag *tag; + union codetag_ref *ref; + + tag = pgalloc_tag_get(&old->page); + if (!tag) + return; + + ref = get_page_tag_ref(&new->page); + if (!ref) + return; + + /* Clear the old ref to the original allocation tag. */ + clear_page_tag_ref(&old->page); + /* Decrement the counters of the tag on get_new_folio. */ + alloc_tag_sub(ref, folio_nr_pages(new)); + + __alloc_tag_ref_set(ref, tag); + + put_page_tag_ref(ref); +} #else /* !CONFIG_MEM_ALLOC_PROFILING */ static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) { } + +static inline void pgalloc_tag_copy(struct folio *new, struct folio *old) +{ +} #endif /* CONFIG_MEM_ALLOC_PROFILING */ #endif /* _LINUX_MM_H */ -- cgit v1.2.3 From 6462dd53a26088a433f90a5c15822196f201037c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Sep 2024 13:42:47 -1000 Subject: sched_ext: Compact struct bpf_iter_scx_dsq_kern struct scx_iter_scx_dsq is defined as 6 u64's and scx_dsq_iter_kern was using 5 of them. We want to add two more u64 fields but it's better if we do so while staying within scx_iter_scx_dsq to maintain binary compatibility. The way scx_iter_scx_dsq_kern is laid out is rather inefficient - the node field takes up three u64's but only one bit of the last u64 is used. Turn the bool into u32 flags and only use the lower 16 bits freeing up 48 bits - 16 bits for flags, 32 bits for a u32 - for use by struct bpf_iter_scx_dsq_kern. This allows moving the dsq_seq and flags fields of bpf_iter_scx_dsq_kern into the cursor field reducing the struct size by a full u64. No behavior changes intended. Signed-off-by: Tejun Heo --- include/linux/sched/ext.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index dc646cca4fcf..1ddbde64a31b 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -119,9 +119,17 @@ enum scx_kf_mask { __SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, }; +enum scx_dsq_lnode_flags { + SCX_DSQ_LNODE_ITER_CURSOR = 1 << 0, + + /* high 16 bits can be for iter cursor flags */ + __SCX_DSQ_LNODE_PRIV_SHIFT = 16, +}; + struct scx_dsq_list_node { struct list_head node; - bool is_bpf_iter_cursor; + u32 flags; + u32 priv; /* can be used by iter cursor */ }; /* -- cgit v1.2.3 From 17245a195df4c86b1fd38718d8cdc532c040c08e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 6 Sep 2024 09:10:59 -0700 Subject: net: remove dev_pick_tx_cpu_id() dev_pick_tx_cpu_id() has been introduced with two users by commit a4ea8a3dacc3 ("net: Add generic ndo_select_queue functions"). The use in AF_PACKET has been removed in 2019 by commit b71b5837f871 ("packet: rework packet_pick_tx_queue() to use common code selection") The other user was a Netlogic XLP driver, removed in 2021 by commit 47ac6f567c28 ("staging: Remove Netlogic XLP network driver"). It's relatively unlikely that any modern driver will need an .ndo_select_queue implementation which picks purely based on CPU ID and skips XPS, delete dev_pick_tx_cpu_id() Found by code inspection. Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240906161059.715546-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b47c00657bd0..2e40a137dc12 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3085,8 +3085,6 @@ void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); -u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev); int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev); int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id); -- cgit v1.2.3 From 038eb433dc1474c4bc7d33188294e3d4778efdfd Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 6 Sep 2024 17:54:34 -0400 Subject: dma-mapping: add tracing for dma-mapping API calls When debugging drivers, it can often be useful to trace when memory gets (un)mapped for DMA (and can be accessed by the device). Add some tracepoints for this purpose. Use u64 instead of phys_addr_t and dma_addr_t (and similarly %llx instead of %pa) because libtraceevent can't handle typedefs in all cases. Signed-off-by: Sean Anderson Signed-off-by: Christoph Hellwig --- include/trace/events/dma.h | 341 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 341 insertions(+) create mode 100644 include/trace/events/dma.h (limited to 'include') diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h new file mode 100644 index 000000000000..f57f05331d73 --- /dev/null +++ b/include/trace/events/dma.h @@ -0,0 +1,341 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM dma + +#if !defined(_TRACE_DMA_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DMA_H + +#include +#include +#include +#include + +TRACE_DEFINE_ENUM(DMA_BIDIRECTIONAL); +TRACE_DEFINE_ENUM(DMA_TO_DEVICE); +TRACE_DEFINE_ENUM(DMA_FROM_DEVICE); +TRACE_DEFINE_ENUM(DMA_NONE); + +#define decode_dma_data_direction(dir) \ + __print_symbolic(dir, \ + { DMA_BIDIRECTIONAL, "BIDIRECTIONAL" }, \ + { DMA_TO_DEVICE, "TO_DEVICE" }, \ + { DMA_FROM_DEVICE, "FROM_DEVICE" }, \ + { DMA_NONE, "NONE" }) + +#define decode_dma_attrs(attrs) \ + __print_flags(attrs, "|", \ + { DMA_ATTR_WEAK_ORDERING, "WEAK_ORDERING" }, \ + { DMA_ATTR_WRITE_COMBINE, "WRITE_COMBINE" }, \ + { DMA_ATTR_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING" }, \ + { DMA_ATTR_SKIP_CPU_SYNC, "SKIP_CPU_SYNC" }, \ + { DMA_ATTR_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS" }, \ + { DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \ + { DMA_ATTR_NO_WARN, "NO_WARN" }, \ + { DMA_ATTR_PRIVILEGED, "PRIVILEGED" }) + +DECLARE_EVENT_CLASS(dma_map, + TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, phys_addr) + __field(u64, dma_addr) + __field(size_t, size) + __field(enum dma_data_direction, dir) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + __assign_str(device); + __entry->phys_addr = phys_addr; + __entry->dma_addr = dma_addr; + __entry->size = size; + __entry->dir = dir; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addr=%llx attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __entry->dma_addr, + __entry->size, + __entry->phys_addr, + decode_dma_attrs(__entry->attrs)) +); + +DEFINE_EVENT(dma_map, dma_map_page, + TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)); + +DEFINE_EVENT(dma_map, dma_map_resource, + TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)); + +DECLARE_EVENT_CLASS(dma_unmap, + TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, addr, size, dir, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, addr) + __field(size_t, size) + __field(enum dma_data_direction, dir) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + __assign_str(device); + __entry->addr = addr; + __entry->size = size; + __entry->dir = dir; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s dma_addr=%llx size=%zu attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __entry->addr, + __entry->size, + decode_dma_attrs(__entry->attrs)) +); + +DEFINE_EVENT(dma_unmap, dma_unmap_page, + TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, addr, size, dir, attrs)); + +DEFINE_EVENT(dma_unmap, dma_unmap_resource, + TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, addr, size, dir, attrs)); + +TRACE_EVENT(dma_alloc, + TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, + size_t size, gfp_t flags, unsigned long attrs), + TP_ARGS(dev, virt_addr, dma_addr, size, flags, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, phys_addr) + __field(u64, dma_addr) + __field(size_t, size) + __field(gfp_t, flags) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + __assign_str(device); + __entry->phys_addr = virt_to_phys(virt_addr); + __entry->dma_addr = dma_addr; + __entry->size = size; + __entry->flags = flags; + __entry->attrs = attrs; + ), + + TP_printk("%s dma_addr=%llx size=%zu phys_addr=%llx flags=%s attrs=%s", + __get_str(device), + __entry->dma_addr, + __entry->size, + __entry->phys_addr, + show_gfp_flags(__entry->flags), + decode_dma_attrs(__entry->attrs)) +); + +TRACE_EVENT(dma_free, + TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, + size_t size, unsigned long attrs), + TP_ARGS(dev, virt_addr, dma_addr, size, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, phys_addr) + __field(u64, dma_addr) + __field(size_t, size) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + __assign_str(device); + __entry->phys_addr = virt_to_phys(virt_addr); + __entry->dma_addr = dma_addr; + __entry->size = size; + __entry->attrs = attrs; + ), + + TP_printk("%s dma_addr=%llx size=%zu phys_addr=%llx attrs=%s", + __get_str(device), + __entry->dma_addr, + __entry->size, + __entry->phys_addr, + decode_dma_attrs(__entry->attrs)) +); + +TRACE_EVENT(dma_map_sg, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + int ents, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, sg, nents, ents, dir, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, phys_addrs, nents) + __dynamic_array(u64, dma_addrs, ents) + __dynamic_array(unsigned int, lengths, ents) + __field(enum dma_data_direction, dir) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + int i; + + __assign_str(device); + for (i = 0; i < nents; i++) + ((u64 *)__get_dynamic_array(phys_addrs))[i] = + sg_phys(sg + i); + for (i = 0; i < ents; i++) { + ((u64 *)__get_dynamic_array(dma_addrs))[i] = + sg_dma_address(sg + i); + ((unsigned int *)__get_dynamic_array(lengths))[i] = + sg_dma_len(sg + i); + } + __entry->dir = dir; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s dma_addrs=%s sizes=%s phys_addrs=%s attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __print_array(__get_dynamic_array(dma_addrs), + __get_dynamic_array_len(dma_addrs) / + sizeof(u64), sizeof(u64)), + __print_array(__get_dynamic_array(lengths), + __get_dynamic_array_len(lengths) / + sizeof(unsigned int), sizeof(unsigned int)), + __print_array(__get_dynamic_array(phys_addrs), + __get_dynamic_array_len(phys_addrs) / + sizeof(u64), sizeof(u64)), + decode_dma_attrs(__entry->attrs)) +); + +TRACE_EVENT(dma_unmap_sg, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, sg, nents, dir, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, addrs, nents) + __field(enum dma_data_direction, dir) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + int i; + + __assign_str(device); + for (i = 0; i < nents; i++) + ((u64 *)__get_dynamic_array(addrs))[i] = + sg_phys(sg + i); + __entry->dir = dir; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s phys_addrs=%s attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __print_array(__get_dynamic_array(addrs), + __get_dynamic_array_len(addrs) / + sizeof(u64), sizeof(u64)), + decode_dma_attrs(__entry->attrs)) +); + +DECLARE_EVENT_CLASS(dma_sync_single, + TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir), + TP_ARGS(dev, dma_addr, size, dir), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, dma_addr) + __field(size_t, size) + __field(enum dma_data_direction, dir) + ), + + TP_fast_assign( + __assign_str(device); + __entry->dma_addr = dma_addr; + __entry->size = size; + __entry->dir = dir; + ), + + TP_printk("%s dir=%s dma_addr=%llx size=%zu", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __entry->dma_addr, + __entry->size) +); + +DEFINE_EVENT(dma_sync_single, dma_sync_single_for_cpu, + TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir), + TP_ARGS(dev, dma_addr, size, dir)); + +DEFINE_EVENT(dma_sync_single, dma_sync_single_for_device, + TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir), + TP_ARGS(dev, dma_addr, size, dir)); + +DECLARE_EVENT_CLASS(dma_sync_sg, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir), + TP_ARGS(dev, sg, nents, dir), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, dma_addrs, nents) + __dynamic_array(unsigned int, lengths, nents) + __field(enum dma_data_direction, dir) + ), + + TP_fast_assign( + int i; + + __assign_str(device); + for (i = 0; i < nents; i++) { + ((u64 *)__get_dynamic_array(dma_addrs))[i] = + sg_dma_address(sg + i); + ((unsigned int *)__get_dynamic_array(lengths))[i] = + sg_dma_len(sg + i); + } + __entry->dir = dir; + ), + + TP_printk("%s dir=%s dma_addrs=%s sizes=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __print_array(__get_dynamic_array(dma_addrs), + __get_dynamic_array_len(dma_addrs) / + sizeof(u64), sizeof(u64)), + __print_array(__get_dynamic_array(lengths), + __get_dynamic_array_len(lengths) / + sizeof(unsigned int), sizeof(unsigned int))) +); + +DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_cpu, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir), + TP_ARGS(dev, sg, nents, dir)); + +DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_device, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir), + TP_ARGS(dev, sg, nents, dir)); + +#endif /* _TRACE_DMA_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 6cf1c97dad2ebc4de03105cc444b3dfaa83f3dc2 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Tue, 23 Apr 2024 11:41:07 +0800 Subject: virtio_balloon: introduce oom-kill invocations When the guest OS runs under critical memory pressure, the guest starts to kill processes. A guest monitor agent may scan 'oom_kill' from /proc/vmstat, and reports the OOM KILL event. However, the agent may be killed and we will loss this critical event(and the later events). For now we can also grep for magic words in guest kernel log from host side. Rather than this unstable way, virtio balloon reports OOM-KILL invocations instead. Acked-by: David Hildenbrand Signed-off-by: zhenwei pi Message-Id: <20240423034109.1552866-3-pizhenwei@bytedance.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_balloon.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index ddaa45e723c4..b17bbe033697 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -71,7 +71,8 @@ struct virtio_balloon_config { #define VIRTIO_BALLOON_S_CACHES 7 /* Disk caches */ #define VIRTIO_BALLOON_S_HTLB_PGALLOC 8 /* Hugetlb page allocations */ #define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */ -#define VIRTIO_BALLOON_S_NR 10 +#define VIRTIO_BALLOON_S_OOM_KILL 10 /* OOM killer invocations */ +#define VIRTIO_BALLOON_S_NR 11 #define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \ VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \ @@ -83,7 +84,8 @@ struct virtio_balloon_config { VIRTIO_BALLOON_S_NAMES_prefix "available-memory", \ VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \ VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \ - VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures" \ + VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \ + VIRTIO_BALLOON_S_NAMES_prefix "oom-kills" \ } #define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("") -- cgit v1.2.3 From c5b70a26aac39f09a23fd72f44cfbb3d4d5a14d5 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Tue, 23 Apr 2024 11:41:08 +0800 Subject: virtio_balloon: introduce memory allocation stall counter Memory allocation stall counter represents the performance/latency of memory allocation, expose this counter to the host side by virtio balloon device via out-of-bound way. Acked-by: David Hildenbrand Signed-off-by: zhenwei pi Message-Id: <20240423034109.1552866-4-pizhenwei@bytedance.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_balloon.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index b17bbe033697..487b893a160e 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -72,7 +72,8 @@ struct virtio_balloon_config { #define VIRTIO_BALLOON_S_HTLB_PGALLOC 8 /* Hugetlb page allocations */ #define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */ #define VIRTIO_BALLOON_S_OOM_KILL 10 /* OOM killer invocations */ -#define VIRTIO_BALLOON_S_NR 11 +#define VIRTIO_BALLOON_S_ALLOC_STALL 11 /* Stall count of memory allocatoin */ +#define VIRTIO_BALLOON_S_NR 12 #define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \ VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \ @@ -85,7 +86,8 @@ struct virtio_balloon_config { VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \ VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \ VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \ - VIRTIO_BALLOON_S_NAMES_prefix "oom-kills" \ + VIRTIO_BALLOON_S_NAMES_prefix "oom-kills", \ + VIRTIO_BALLOON_S_NAMES_prefix "alloc-stalls" \ } #define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("") -- cgit v1.2.3 From 74c025c5d7e4ac7c7ad269c1ee64da4bdfe4770c Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Tue, 23 Apr 2024 11:41:09 +0800 Subject: virtio_balloon: introduce memory scan/reclaim info Expose memory scan/reclaim information to the host side via virtio balloon device. Now we have a metric to analyze the memory performance: y: counter increases n: counter does not changes h: the rate of counter change is high l: the rate of counter change is low OOM: VIRTIO_BALLOON_S_OOM_KILL STALL: VIRTIO_BALLOON_S_ALLOC_STALL ASCAN: VIRTIO_BALLOON_S_SCAN_ASYNC DSCAN: VIRTIO_BALLOON_S_SCAN_DIRECT ARCLM: VIRTIO_BALLOON_S_RECLAIM_ASYNC DRCLM: VIRTIO_BALLOON_S_RECLAIM_DIRECT - OOM[y], STALL[*], ASCAN[*], DSCAN[*], ARCLM[*], DRCLM[*]: the guest runs under really critial memory pressure - OOM[n], STALL[h], ASCAN[*], DSCAN[l], ARCLM[*], DRCLM[l]: the memory allocation stalls due to cgroup, not the global memory pressure. - OOM[n], STALL[h], ASCAN[*], DSCAN[h], ARCLM[*], DRCLM[h]: the memory allocation stalls due to global memory pressure. The performance gets hurt a lot. A high ratio between DRCLM/DSCAN shows quite effective memory reclaiming. - OOM[n], STALL[h], ASCAN[*], DSCAN[h], ARCLM[*], DRCLM[l]: the memory allocation stalls due to global memory pressure. the ratio between DRCLM/DSCAN gets low, the guest OS is thrashing heavily, the serious case leads poor performance and difficult trouble shooting. Ex, sshd may block on memory allocation when accepting new connections, a user can't login a VM by ssh command. - OOM[n], STALL[n], ASCAN[h], DSCAN[n], ARCLM[l], DRCLM[n]: the low ratio between ARCLM/ASCAN shows that the guest tries to reclaim more memory, but it can't. Once more memory is required in future, it will struggle to reclaim memory. Acked-by: David Hildenbrand Signed-off-by: zhenwei pi Message-Id: <20240423034109.1552866-5-pizhenwei@bytedance.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_balloon.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 487b893a160e..ee35a372805d 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -73,7 +73,11 @@ struct virtio_balloon_config { #define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */ #define VIRTIO_BALLOON_S_OOM_KILL 10 /* OOM killer invocations */ #define VIRTIO_BALLOON_S_ALLOC_STALL 11 /* Stall count of memory allocatoin */ -#define VIRTIO_BALLOON_S_NR 12 +#define VIRTIO_BALLOON_S_ASYNC_SCAN 12 /* Amount of memory scanned asynchronously */ +#define VIRTIO_BALLOON_S_DIRECT_SCAN 13 /* Amount of memory scanned directly */ +#define VIRTIO_BALLOON_S_ASYNC_RECLAIM 14 /* Amount of memory reclaimed asynchronously */ +#define VIRTIO_BALLOON_S_DIRECT_RECLAIM 15 /* Amount of memory reclaimed directly */ +#define VIRTIO_BALLOON_S_NR 16 #define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \ VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \ @@ -87,7 +91,11 @@ struct virtio_balloon_config { VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \ VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \ VIRTIO_BALLOON_S_NAMES_prefix "oom-kills", \ - VIRTIO_BALLOON_S_NAMES_prefix "alloc-stalls" \ + VIRTIO_BALLOON_S_NAMES_prefix "alloc-stalls", \ + VIRTIO_BALLOON_S_NAMES_prefix "async-scans", \ + VIRTIO_BALLOON_S_NAMES_prefix "direct-scans", \ + VIRTIO_BALLOON_S_NAMES_prefix "async-reclaims", \ + VIRTIO_BALLOON_S_NAMES_prefix "direct-reclaims" \ } #define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("") -- cgit v1.2.3 From 2f87e9cf0c9e21ab9be1fb2ba8520a1525359497 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 31 Jul 2024 11:16:01 +0800 Subject: vdpa: support set mac address from vdpa tool Add new UAPI to support the mac address from vdpa tool Function vdpa_nl_cmd_dev_attr_set_doit() will get the new MAC address from the vdpa tool and then set it to the device. The usage is: vdpa dev set name vdpa_name mac **:**:**:**:**:** Here is example: root@L1# vdpa -jp dev config show vdpa0 { "config": { "vdpa0": { "mac": "82:4d:e9:5d:d7:e6", "link ": "up", "link_announce ": false, "mtu": 1500 } } } root@L1# vdpa dev set name vdpa0 mac 00:11:22:33:44:55 root@L1# vdpa -jp dev config show vdpa0 { "config": { "vdpa0": { "mac": "00:11:22:33:44:55", "link ": "up", "link_announce ": false, "mtu": 1500 } } } Signed-off-by: Cindy Lu Message-Id: <20240731031653.1047692-2-lulu@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/linux/vdpa.h | 9 +++++++++ include/uapi/linux/vdpa.h | 1 + 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 7977ca03ac7a..2e7a30fe6b92 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -582,11 +582,20 @@ void vdpa_set_status(struct vdpa_device *vdev, u8 status); * @dev: vdpa device to remove * Driver need to remove the specified device by calling * _vdpa_unregister_device(). + * @dev_set_attr: change a vdpa device's attr after it was create + * @mdev: parent device to use for device + * @dev: vdpa device structure + * @config:Attributes to be set for the device. + * The driver needs to check the mask of the structure and then set + * the related information to the vdpa device. The driver must return 0 + * if set successfully. */ struct vdpa_mgmtdev_ops { int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name, const struct vdpa_dev_set_config *config); void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev); + int (*dev_set_attr)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev, + const struct vdpa_dev_set_config *config); }; /** diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 842bf1201ac4..71edf2c70cc3 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -19,6 +19,7 @@ enum vdpa_command { VDPA_CMD_DEV_GET, /* can dump */ VDPA_CMD_DEV_CONFIG_GET, /* can dump */ VDPA_CMD_DEV_VSTATS_GET, + VDPA_CMD_DEV_ATTR_SET, }; enum vdpa_attr { -- cgit v1.2.3 From 11596dc3dfae572cc267dda2dc4dab9ae34668f3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Sep 2024 07:39:05 +0300 Subject: iomap: pass flags to iomap_file_buffered_write_punch_delalloc To fix short write error handling, We'll need to figure out what operation iomap_file_buffered_write_punch_delalloc is called for. Pass the flags argument on to it, and reorder the argument list to match that of ->iomap_end so that the compiler only has to add the new punch argument to the end of it instead of reshuffling the registers. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240910043949.3481298-4-hch@lst.de Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/linux/iomap.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index f792b37f7627..52626906ff35 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -258,10 +258,6 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i) ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops, void *private); -int iomap_file_buffered_write_punch_delalloc(struct inode *inode, - struct iomap *iomap, loff_t pos, loff_t length, ssize_t written, - int (*punch)(struct inode *inode, loff_t pos, loff_t length)); - int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); @@ -277,6 +273,12 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, const struct iomap_ops *ops); vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops); + +typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length); +int iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t pos, + loff_t length, ssize_t written, unsigned flag, + struct iomap *iomap, iomap_punch_t punch); + int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len, const struct iomap_ops *ops); loff_t iomap_seek_hole(struct inode *inode, loff_t offset, -- cgit v1.2.3 From 492f53758fad4fde3e9a98696780f8b53f87cdae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Sep 2024 07:39:06 +0300 Subject: iomap: pass the iomap to the punch callback XFS will need to look at the flags in the iomap structure, so pass it down all the way to the callback. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240910043949.3481298-5-hch@lst.de Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/linux/iomap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 52626906ff35..ec2eab94f00a 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -274,7 +274,8 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops); -typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length); +typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length, + struct iomap *iomap); int iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t pos, loff_t length, ssize_t written, unsigned flag, struct iomap *iomap, iomap_punch_t punch); -- cgit v1.2.3 From 4bceb9ba05aca23652567fb5f899cc7fcb12c8d0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Sep 2024 07:39:07 +0300 Subject: iomap: remove the iomap_file_buffered_write_punch_delalloc return value iomap_file_buffered_write_punch_delalloc can only return errors if either the ->punch callback returned an error, or if someone changed the API of mapping_seek_hole_data to return a negative error code that is not -ENXIO. As the only instance of ->punch never returns an error, an such an error would be fatal anyway remove the entire error propagation and don't return an error code from iomap_file_buffered_write_punch_delalloc. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240910043949.3481298-6-hch@lst.de Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/linux/iomap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index ec2eab94f00a..4ad12a3c8bae 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -274,9 +274,9 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops); -typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length, +typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length, struct iomap *iomap); -int iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t pos, +void iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t pos, loff_t length, ssize_t written, unsigned flag, struct iomap *iomap, iomap_punch_t punch); -- cgit v1.2.3 From 9028cdeb38e1f37d63cb3154799dd259b67e879e Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 5 Sep 2024 10:34:22 -0700 Subject: memcg: add charging of already allocated slab objects At the moment, the slab objects are charged to the memcg at the allocation time. However there are cases where slab objects are allocated at the time where the right target memcg to charge it to is not known. One such case is the network sockets for the incoming connection which are allocated in the softirq context. Couple hundred thousand connections are very normal on large loaded server and almost all of those sockets underlying those connections get allocated in the softirq context and thus not charged to any memcg. However later at the accept() time we know the right target memcg to charge. Let's add new API to charge already allocated objects, so we can have better accounting of the memory usage. To measure the performance impact of this change, tcp_crr is used from the neper [1] performance suite. Basically it is a network ping pong test with new connection for each ping pong. The server and the client are run inside 3 level of cgroup hierarchy using the following commands: Server: $ tcp_crr -6 Client: $ tcp_crr -6 -c -H ${server_ip} If the client and server run on different machines with 50 GBPS NIC, there is no visible impact of the change. For the same machine experiment with v6.11-rc5 as base. base (throughput) with-patch tcp_crr 14545 (+- 80) 14463 (+- 56) It seems like the performance impact is within the noise. Link: https://github.com/google/neper [1] Signed-off-by: Shakeel Butt Reviewed-by: Roman Gushchin Reviewed-by: Yosry Ahmed Acked-by: Paolo Abeni # net Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index eb2bf4629157..3be2a5ed4936 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -547,6 +547,35 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags) __assume_slab_alignment __malloc; #define kmem_cache_alloc_lru(...) alloc_hooks(kmem_cache_alloc_lru_noprof(__VA_ARGS__)) +/** + * kmem_cache_charge - memcg charge an already allocated slab memory + * @objp: address of the slab object to memcg charge + * @gfpflags: describe the allocation context + * + * kmem_cache_charge allows charging a slab object to the current memcg, + * primarily in cases where charging at allocation time might not be possible + * because the target memcg is not known (i.e. softirq context) + * + * The objp should be pointer returned by the slab allocator functions like + * kmalloc (with __GFP_ACCOUNT in flags) or kmem_cache_alloc. The memcg charge + * behavior can be controlled through gfpflags parameter, which affects how the + * necessary internal metadata can be allocated. Including __GFP_NOFAIL denotes + * that overcharging is requested instead of failure, but is not applied for the + * internal metadata allocation. + * + * There are several cases where it will return true even if the charging was + * not done: + * More specifically: + * + * 1. For !CONFIG_MEMCG or cgroup_disable=memory systems. + * 2. Already charged slab objects. + * 3. For slab objects from KMALLOC_NORMAL caches - allocated by kmalloc() + * without __GFP_ACCOUNT + * 4. Allocating internal metadata has failed + * + * Return: true if charge was successful otherwise false. + */ +bool kmem_cache_charge(void *objp, gfp_t gfpflags); void kmem_cache_free(struct kmem_cache *s, void *objp); kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, -- cgit v1.2.3 From 879fb3c274c12cb0892718e1e54f85fc406e3c7b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 5 Sep 2024 09:56:45 +0200 Subject: slab: add struct kmem_cache_args Currently we have multiple kmem_cache_create*() variants that take up to seven separate parameters with one of the functions having to grow an eigth parameter in the future to handle both usercopy and a custom freelist pointer. Add a struct kmem_cache_args structure and move less common parameters into it. Core parameters such as name, object size, and flags continue to be passed separately. Add a new function __kmem_cache_create_args() that takes a struct kmem_cache_args pointer and port do_kmem_cache_create_usercopy() over to it. In follow-up patches we will port the other kmem_cache_create*() variants over to it as well. Reviewed-by: Kees Cook Reviewed-by: Jens Axboe Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Vlastimil Babka Signed-off-by: Christian Brauner Reviewed-by: Roman Gushchin Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 5b2da2cf31a8..2b8eeca7fd2c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -240,6 +240,28 @@ struct mem_cgroup; */ bool slab_is_available(void); +/** + * struct kmem_cache_args - Less common arguments for kmem_cache_create() + * @align: The required alignment for the objects. + * @useroffset: Usercopy region offset + * @usersize: Usercopy region size + * @freeptr_offset: Custom offset for the free pointer in RCU caches + * @use_freeptr_offset: Whether a @freeptr_offset is used + * @ctor: A constructor for the objects. + */ +struct kmem_cache_args { + unsigned int align; + unsigned int useroffset; + unsigned int usersize; + unsigned int freeptr_offset; + bool use_freeptr_offset; + void (*ctor)(void *); +}; + +struct kmem_cache *__kmem_cache_create_args(const char *name, + unsigned int object_size, + struct kmem_cache_args *args, + slab_flags_t flags); struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, void (*ctor)(void *)); -- cgit v1.2.3 From 052d67b46bcd91b6785e8e6e047241814f6142a3 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 5 Sep 2024 09:56:53 +0200 Subject: slab: port KMEM_CACHE() to struct kmem_cache_args Make KMEM_CACHE() use struct kmem_cache_args. Reviewed-by: Kees Cook Reviewed-by: Jens Axboe Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Vlastimil Babka Signed-off-by: Christian Brauner Reviewed-by: Roman Gushchin Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 2b8eeca7fd2c..1f38d94387cc 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -284,9 +284,11 @@ int kmem_cache_shrink(struct kmem_cache *s); * f.e. add ____cacheline_aligned_in_smp to the struct declaration * then the objects will be properly aligned in SMP configurations. */ -#define KMEM_CACHE(__struct, __flags) \ - kmem_cache_create(#__struct, sizeof(struct __struct), \ - __alignof__(struct __struct), (__flags), NULL) +#define KMEM_CACHE(__struct, __flags) \ + __kmem_cache_create_args(#__struct, sizeof(struct __struct), \ + &(struct kmem_cache_args) { \ + .align = __alignof__(struct __struct), \ + }, (__flags)) /* * To whitelist a single field for copying to/from usercopy, use this -- cgit v1.2.3 From 199cd13a745eb44fb4828bca373155293cdcfa5c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 5 Sep 2024 09:56:54 +0200 Subject: slab: port KMEM_CACHE_USERCOPY() to struct kmem_cache_args Make KMEM_CACHE_USERCOPY() use struct kmem_cache_args. Reviewed-by: Kees Cook Reviewed-by: Jens Axboe Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Vlastimil Babka Signed-off-by: Christian Brauner Reviewed-by: Roman Gushchin Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 1f38d94387cc..7e15a3a3edb1 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -294,12 +294,13 @@ int kmem_cache_shrink(struct kmem_cache *s); * To whitelist a single field for copying to/from usercopy, use this * macro instead for KMEM_CACHE() above. */ -#define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ - kmem_cache_create_usercopy(#__struct, \ - sizeof(struct __struct), \ - __alignof__(struct __struct), (__flags), \ - offsetof(struct __struct, __field), \ - sizeof_field(struct __struct, __field), NULL) +#define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ + __kmem_cache_create_args(#__struct, sizeof(struct __struct), \ + &(struct kmem_cache_args) { \ + .align = __alignof__(struct __struct), \ + .useroffset = offsetof(struct __struct, __field), \ + .usersize = sizeof_field(struct __struct, __field), \ + }, (__flags)) /* * Common kmalloc functions provided by all allocators -- cgit v1.2.3 From b2e7456b5c25c41eda7a8a15f7ccaa4e7579949f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 5 Sep 2024 09:56:55 +0200 Subject: slab: create kmem_cache_create() compatibility layer Use _Generic() to create a compatibility layer that type switches on the third argument to either call __kmem_cache_create() or __kmem_cache_create_args(). If NULL is passed for the struct kmem_cache_args argument use default args making porting for callers that don't care about additional arguments easy. Reviewed-by: Kees Cook Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Roman Gushchin Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 7e15a3a3edb1..15eb0a0defd6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -262,9 +262,10 @@ struct kmem_cache *__kmem_cache_create_args(const char *name, unsigned int object_size, struct kmem_cache_args *args, slab_flags_t flags); -struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, - unsigned int align, slab_flags_t flags, - void (*ctor)(void *)); + +struct kmem_cache *__kmem_cache_create(const char *name, unsigned int size, + unsigned int align, slab_flags_t flags, + void (*ctor)(void *)); struct kmem_cache *kmem_cache_create_usercopy(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, @@ -273,6 +274,28 @@ struct kmem_cache *kmem_cache_create_usercopy(const char *name, struct kmem_cache *kmem_cache_create_rcu(const char *name, unsigned int size, unsigned int freeptr_offset, slab_flags_t flags); + +/* If NULL is passed for @args, use this variant with default arguments. */ +static inline struct kmem_cache * +__kmem_cache_default_args(const char *name, unsigned int size, + struct kmem_cache_args *args, + slab_flags_t flags) +{ + struct kmem_cache_args kmem_default_args = {}; + + /* Make sure we don't get passed garbage. */ + if (WARN_ON_ONCE(args)) + return ERR_PTR(-EINVAL); + + return __kmem_cache_create_args(name, size, &kmem_default_args, flags); +} + +#define kmem_cache_create(__name, __object_size, __args, ...) \ + _Generic((__args), \ + struct kmem_cache_args *: __kmem_cache_create_args, \ + void *: __kmem_cache_default_args, \ + default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__) + void kmem_cache_destroy(struct kmem_cache *s); int kmem_cache_shrink(struct kmem_cache *s); -- cgit v1.2.3 From 3d453e60f1a9c377d670d5fe306d3b9eed477bc0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 5 Sep 2024 09:56:57 +0200 Subject: slab: remove kmem_cache_create_rcu() Now that we have ported all users of kmem_cache_create_rcu() to struct kmem_cache_args the function is unused and can be removed. Reviewed-by: Kees Cook Reviewed-by: Jens Axboe Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Vlastimil Babka Signed-off-by: Christian Brauner Reviewed-by: Roman Gushchin Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 15eb0a0defd6..cb82a6414a28 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -271,9 +271,6 @@ struct kmem_cache *kmem_cache_create_usercopy(const char *name, slab_flags_t flags, unsigned int useroffset, unsigned int usersize, void (*ctor)(void *)); -struct kmem_cache *kmem_cache_create_rcu(const char *name, unsigned int size, - unsigned int freeptr_offset, - slab_flags_t flags); /* If NULL is passed for @args, use this variant with default arguments. */ static inline struct kmem_cache * -- cgit v1.2.3 From 0c9050b09cfb1ddb3fe4b2d401dca1fb674c825a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 5 Sep 2024 09:56:58 +0200 Subject: slab: make kmem_cache_create_usercopy() static inline Make kmem_cache_create_usercopy() a static inline function. Signed-off-by: Christian Brauner Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Roman Gushchin Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 49 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index cb82a6414a28..634717c9f73b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -266,11 +266,50 @@ struct kmem_cache *__kmem_cache_create_args(const char *name, struct kmem_cache *__kmem_cache_create(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, void (*ctor)(void *)); -struct kmem_cache *kmem_cache_create_usercopy(const char *name, - unsigned int size, unsigned int align, - slab_flags_t flags, - unsigned int useroffset, unsigned int usersize, - void (*ctor)(void *)); + +/** + * kmem_cache_create_usercopy - Create a cache with a region suitable + * for copying to userspace + * @name: A string which is used in /proc/slabinfo to identify this cache. + * @size: The size of objects to be created in this cache. + * @align: The required alignment for the objects. + * @flags: SLAB flags + * @useroffset: Usercopy region offset + * @usersize: Usercopy region size + * @ctor: A constructor for the objects. + * + * Cannot be called within a interrupt, but can be interrupted. + * The @ctor is run when new pages are allocated by the cache. + * + * The flags are + * + * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) + * to catch references to uninitialised memory. + * + * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check + * for buffer overruns. + * + * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware + * cacheline. This can be beneficial if you're counting cycles as closely + * as davem. + * + * Return: a pointer to the cache on success, NULL on failure. + */ +static inline struct kmem_cache * +kmem_cache_create_usercopy(const char *name, unsigned int size, + unsigned int align, slab_flags_t flags, + unsigned int useroffset, unsigned int usersize, + void (*ctor)(void *)) +{ + struct kmem_cache_args kmem_args = { + .align = align, + .ctor = ctor, + .useroffset = useroffset, + .usersize = usersize, + }; + + return __kmem_cache_create_args(name, size, &kmem_args, flags); +} /* If NULL is passed for @args, use this variant with default arguments. */ static inline struct kmem_cache * -- cgit v1.2.3 From 781aee755638dbb6dbfe022bdd2f732621ec9534 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 5 Sep 2024 09:56:59 +0200 Subject: slab: make __kmem_cache_create() static inline Make __kmem_cache_create() a static inline function. Signed-off-by: Christian Brauner Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Roman Gushchin Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 634717c9f73b..331412a9f4f2 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -262,10 +262,17 @@ struct kmem_cache *__kmem_cache_create_args(const char *name, unsigned int object_size, struct kmem_cache_args *args, slab_flags_t flags); +static inline struct kmem_cache * +__kmem_cache_create(const char *name, unsigned int size, unsigned int align, + slab_flags_t flags, void (*ctor)(void *)) +{ + struct kmem_cache_args kmem_args = { + .align = align, + .ctor = ctor, + }; -struct kmem_cache *__kmem_cache_create(const char *name, unsigned int size, - unsigned int align, slab_flags_t flags, - void (*ctor)(void *)); + return __kmem_cache_create_args(name, size, &kmem_args, flags); +} /** * kmem_cache_create_usercopy - Create a cache with a region suitable -- cgit v1.2.3 From 4ba4f1afb6a9fed8ef896c2363076e36572f71da Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 2 Aug 2024 08:16:37 -0700 Subject: perf: Generic hotplug support for a PMU with a scope The perf subsystem assumes that the counters of a PMU are per-CPU. So the user space tool reads a counter from each CPU in the system wide mode. However, many PMUs don't have a per-CPU counter. The counter is effective for a scope, e.g., a die or a socket. To address this, a cpumask is exposed by the kernel driver to restrict to one CPU to stand for a specific scope. In case the given CPU is removed, the hotplug support has to be implemented for each such driver. The codes to support the cpumask and hotplug are very similar. - Expose a cpumask into sysfs - Pickup another CPU in the same scope if the given CPU is removed. - Invoke the perf_pmu_migrate_context() to migrate to a new CPU. - In event init, always set the CPU in the cpumask to event->cpu Similar duplicated codes are implemented for each such PMU driver. It would be good to introduce a generic infrastructure to avoid such duplication. 5 popular scopes are implemented here, core, die, cluster, pkg, and the system-wide. The scope can be set when a PMU is registered. If so, a "cpumask" is automatically exposed for the PMU. The "cpumask" is from the perf_online__mask, which is to track the active CPU for each scope. They are set when the first CPU of the scope is online via the generic perf hotplug support. When a corresponding CPU is removed, the perf_online__mask is updated accordingly and the PMU will be moved to a new CPU from the same scope if possible. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240802151643.1691631-2-kan.liang@linux.intel.com --- include/linux/perf_event.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 701549967c18..a3cbcd727366 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -295,6 +295,19 @@ struct perf_event_pmu_context; #define PERF_PMU_CAP_AUX_OUTPUT 0x0080 #define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 +/** + * pmu::scope + */ +enum perf_pmu_scope { + PERF_PMU_SCOPE_NONE = 0, + PERF_PMU_SCOPE_CORE, + PERF_PMU_SCOPE_DIE, + PERF_PMU_SCOPE_CLUSTER, + PERF_PMU_SCOPE_PKG, + PERF_PMU_SCOPE_SYS_WIDE, + PERF_PMU_MAX_SCOPE, +}; + struct perf_output_handle; #define PMU_NULL_DEV ((void *)(~0UL)) @@ -318,6 +331,11 @@ struct pmu { */ int capabilities; + /* + * PMU scope + */ + unsigned int scope; + int __percpu *pmu_disable_count; struct perf_cpu_pmu_context __percpu *cpu_pmu_context; atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ -- cgit v1.2.3 From a48a36b316ae5d3ab83f9b545dba15998e96d59c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 2 Aug 2024 08:16:38 -0700 Subject: perf: Add PERF_EV_CAP_READ_SCOPE Usually, an event can be read from any CPU of the scope. It doesn't need to be read from the advertised CPU. Add a new event cap, PERF_EV_CAP_READ_SCOPE. An event of a PMU with scope can be read from any active CPU in the scope. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240802151643.1691631-3-kan.liang@linux.intel.com --- include/linux/perf_event.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a3cbcd727366..794f66057878 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -636,10 +636,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and * cannot be a group leader. If an event with this flag is detached from the * group it is scheduled out and moved into an unrecoverable ERROR state. + * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the + * PMU scope where it is active. */ #define PERF_EV_CAP_SOFTWARE BIT(0) #define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1) #define PERF_EV_CAP_SIBLING BIT(2) +#define PERF_EV_CAP_READ_SCOPE BIT(3) #define SWEVENT_HLIST_BITS 8 #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) -- cgit v1.2.3 From 08155c7f2a2cc6ff99886ea5743da274be24ebe4 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 2 Aug 2024 08:16:39 -0700 Subject: perf/x86/intel/cstate: Clean up cpumask and hotplug There are three cstate PMUs with different scopes, core, die and module. The scopes are supported by the generic perf_event subsystem now. Set the scope for each PMU and remove all the cpumask and hotplug codes. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240802151643.1691631-4-kan.liang@linux.intel.com --- include/linux/cpuhotplug.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 9316c39260e0..2101ae2ecfca 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -152,7 +152,6 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, CPUHP_AP_PERF_X86_STARTING, CPUHP_AP_PERF_X86_AMD_IBS_STARTING, - CPUHP_AP_PERF_X86_CSTATE_STARTING, CPUHP_AP_PERF_XTENSA_STARTING, CPUHP_AP_ARM_VFP_STARTING, CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, @@ -209,7 +208,6 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, CPUHP_AP_PERF_X86_RAPL_ONLINE, - CPUHP_AP_PERF_X86_CSTATE_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, -- cgit v1.2.3 From 740c1c84bfa3d8c63bd3b01fb570e7452f51fbd8 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 10 Sep 2024 10:26:17 +0800 Subject: spi: remove spi_controller_is_slave() and spi_slave_abort() spi_controller_is_slave() and spi_slave_abort() are all replaced, so they can be removed. No functional changed. Signed-off-by: Yang Yingliang Link: https://patch.msgid.link/20240910022618.1397-8-yangyingliang@huaweicloud.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index d47d5f14ff99..4b95663163e0 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -498,7 +498,6 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * controller has native support for memory like operations. * @mem_caps: controller capabilities for the handling of memory operations. * @unprepare_message: undo any work done by prepare_message(). - * @slave_abort: abort the ongoing transfer request on an SPI slave controller * @target_abort: abort the ongoing transfer request on an SPI target controller * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS * number. Any individual value may be NULL for CS lines that @@ -725,10 +724,7 @@ struct spi_controller { struct spi_message *message); int (*unprepare_message)(struct spi_controller *ctlr, struct spi_message *message); - union { - int (*slave_abort)(struct spi_controller *ctlr); - int (*target_abort)(struct spi_controller *ctlr); - }; + int (*target_abort)(struct spi_controller *ctlr); /* * These hooks are for drivers that use a generic implementation @@ -802,11 +798,6 @@ static inline void spi_controller_put(struct spi_controller *ctlr) put_device(&ctlr->dev); } -static inline bool spi_controller_is_slave(struct spi_controller *ctlr) -{ - return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->slave; -} - static inline bool spi_controller_is_target(struct spi_controller *ctlr) { return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->target; @@ -1296,7 +1287,6 @@ extern int devm_spi_optimize_message(struct device *dev, struct spi_device *spi, extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); -extern int spi_slave_abort(struct spi_device *spi); extern int spi_target_abort(struct spi_device *spi); static inline size_t -- cgit v1.2.3 From d6de45e3c6f3713d3825d3e2860c11d24e0f941f Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Tue, 10 Sep 2024 00:35:03 +0200 Subject: platform/x86: asus-wmi: Disable OOBE experience on Zenbook S 16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OOBE experience fades the keyboard backlight in & out continuously, and make the backlight uncontrollable using its device. Workaround taken from https://wiki.archlinux.org/index.php?title=ASUS_Zenbook_UM5606&diff=next&oldid=815547 Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Luke D. Jones Link: https://lore.kernel.org/r/20240909223503.1445779-1-bas@basnieuwenhuizen.nl Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/x86/asus-wmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 0aeeae1c1943..ae9bf7479e7b 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -62,6 +62,7 @@ #define ASUS_WMI_DEVID_KBD_BACKLIGHT 0x00050021 #define ASUS_WMI_DEVID_LIGHT_SENSOR 0x00050022 /* ?? */ #define ASUS_WMI_DEVID_LIGHTBAR 0x00050025 +#define ASUS_WMI_DEVID_OOBE 0x0005002F /* This can only be used to disable the screen, not re-enable */ #define ASUS_WMI_DEVID_SCREENPAD_POWER 0x00050031 /* Writing a brightness re-enables the screen if disabled */ -- cgit v1.2.3 From 9e97e8b277a2235bbb562a4feb6f1216fb52d1b1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 23 Jul 2024 17:12:15 -0400 Subject: btrfs: update the writepage tracepoint to take a folio Willy is wanting to get rid of page->index, convert the writepage tracepoint to take a folio so we can do folio->index instead of page->index. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 0a523023bdcc..0eddbb8b6728 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -674,10 +674,10 @@ TRACE_EVENT(btrfs_finish_ordered_extent, DECLARE_EVENT_CLASS(btrfs__writepage, - TP_PROTO(const struct page *page, const struct inode *inode, + TP_PROTO(const struct folio *folio, const struct inode *inode, const struct writeback_control *wbc), - TP_ARGS(page, inode, wbc), + TP_ARGS(folio, inode, wbc), TP_STRUCT__entry_btrfs( __field( u64, ino ) @@ -695,7 +695,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage, TP_fast_assign_btrfs(btrfs_sb(inode->i_sb), __entry->ino = btrfs_ino(BTRFS_I(inode)); - __entry->index = page->index; + __entry->index = folio->index; __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->range_start = wbc->range_start; @@ -723,10 +723,10 @@ DECLARE_EVENT_CLASS(btrfs__writepage, DEFINE_EVENT(btrfs__writepage, __extent_writepage, - TP_PROTO(const struct page *page, const struct inode *inode, + TP_PROTO(const struct folio *folio, const struct inode *inode, const struct writeback_control *wbc), - TP_ARGS(page, inode, wbc) + TP_ARGS(folio, inode, wbc) ); TRACE_EVENT(btrfs_writepage_end_io_hook, -- cgit v1.2.3 From 06de42c5a98a28060b314589241cabcacc3c4ff8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 27 Aug 2024 03:30:16 +0200 Subject: btrfs: rename __extent_writepage() and drop double underscores The function does not follow the pattern where the underscores would be justified, so rename it. Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 0eddbb8b6728..e4add61e00f1 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -721,7 +721,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage, __entry->writeback_index) ); -DEFINE_EVENT(btrfs__writepage, __extent_writepage, +DEFINE_EVENT(btrfs__writepage, extent_writepage, TP_PROTO(const struct folio *folio, const struct inode *inode, const struct writeback_control *wbc), -- cgit v1.2.3 From ca283ea9920ac20ae23ed398b693db3121045019 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 26 Jun 2024 23:39:11 +0200 Subject: btrfs: constify more pointer parameters Continue adding const to parameters. This is for clarity and minor addition to safety. There are some minor effects, in the assembly code and .ko measured on release config. Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index e4add61e00f1..bf60ad50011e 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1825,7 +1825,7 @@ TRACE_EVENT(qgroup_update_counters, TRACE_EVENT(qgroup_update_reserve, - TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup, + TP_PROTO(const struct btrfs_fs_info *fs_info, const struct btrfs_qgroup *qgroup, s64 diff, int type), TP_ARGS(fs_info, qgroup, diff, type), @@ -1851,7 +1851,7 @@ TRACE_EVENT(qgroup_update_reserve, TRACE_EVENT(qgroup_meta_reserve, - TP_PROTO(struct btrfs_root *root, s64 diff, int type), + TP_PROTO(const struct btrfs_root *root, s64 diff, int type), TP_ARGS(root, diff, type), @@ -1874,7 +1874,7 @@ TRACE_EVENT(qgroup_meta_reserve, TRACE_EVENT(qgroup_meta_convert, - TP_PROTO(struct btrfs_root *root, s64 diff), + TP_PROTO(const struct btrfs_root *root, s64 diff), TP_ARGS(root, diff), -- cgit v1.2.3 From 29aeb4e8918e6aeeaf0bb7a03b000a73596d54a3 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Fri, 26 Jul 2024 16:13:24 +0530 Subject: Bluetooth: Add a helper function to extract iso header Add a helper function hci_iso_hdr() to extract iso header from skb. Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index d1d073089f38..bab1e3d7452a 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2901,6 +2901,11 @@ static inline struct hci_sco_hdr *hci_sco_hdr(const struct sk_buff *skb) return (struct hci_sco_hdr *) skb->data; } +static inline struct hci_iso_hdr *hci_iso_hdr(const struct sk_buff *skb) +{ + return (struct hci_iso_hdr *)skb->data; +} + /* Command opcode pack/unpack */ #define hci_opcode_pack(ogf, ocf) ((__u16) ((ocf & 0x03ff)|(ogf << 10))) #define hci_opcode_ogf(op) (op >> 10) -- cgit v1.2.3 From 2fcb7936cef3a045ced9d5b8cdb644bced9f99b5 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Mon, 19 Aug 2024 21:52:11 +0800 Subject: Bluetooth: L2CAP: Remove unused declarations Commit e7b02296fb40 ("Bluetooth: Remove BT_HS") removed the implementations but leave declarations. Signed-off-by: Yue Haibing Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/l2cap.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 5cfdc813491a..313d0b972e06 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -968,10 +968,6 @@ void l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func, void *data); void l2cap_chan_del(struct l2cap_chan *chan, int err); void l2cap_send_conn_req(struct l2cap_chan *chan); -void l2cap_move_start(struct l2cap_chan *chan); -void l2cap_logical_cfm(struct l2cap_chan *chan, struct hci_chan *hchan, - u8 status); -void __l2cap_physical_cfm(struct l2cap_chan *chan, int result); struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn); void l2cap_conn_put(struct l2cap_conn *conn); -- cgit v1.2.3 From d47da6bd4cfa982fe903f33423b9e2ec541e9496 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 30 Aug 2024 17:29:27 -0400 Subject: Bluetooth: hci_core: Fix sending MGMT_EV_CONNECT_FAILED If HCI_CONN_MGMT_CONNECTED has been set then the event shall be HCI_CONN_MGMT_DISCONNECTED. Fixes: b644ba336997 ("Bluetooth: Update device_connected and device_found events to latest API") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 1a32e602630e..88265d37aa72 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2257,8 +2257,8 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, bool mgmt_connected); void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status); -void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, - u8 addr_type, u8 status); +void mgmt_connect_failed(struct hci_dev *hdev, struct hci_conn *conn, + u8 status); void mgmt_pin_code_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 secure); void mgmt_pin_code_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 status); -- cgit v1.2.3 From b2d4da31a1f40b05a61076efd4c79b88439003b7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 2 Aug 2024 09:56:28 -0400 Subject: drm: new helper: drm_gem_prime_handle_to_dmabuf() Once something had been put into descriptor table, the only thing you can do with it is returning descriptor to userland - you can't withdraw it on subsequent failure exit, etc. You certainly can't count upon it staying in the same slot of descriptor table - another thread could've played with close(2)/dup2(2)/whatnot. drm_gem_prime_handle_to_fd() creates a dmabuf, allocates a descriptor and attaches dmabuf's file to it (the last two steps are done in dma_buf_fd()). That's nice when all you are going to do is passing a descriptor to userland. If you just need to work with the resulting object or have something else to be done that might fail, drm_gem_prime_handle_to_fd() is racy. The problem is analogous to one with anon_inode_getfd(), and solution is similar to what anon_inode_getfile() provides. Add drm_gem_prime_handle_to_dmabuf() - the "set dmabuf up" parts of drm_gem_prime_handle_to_fd() without the descriptor-related ones. Instead of inserting into descriptor table and returning the file descriptor it just returns the struct file. drm_gem_prime_handle_to_fd() becomes a wrapper for it. Other users will be introduced in the next commit. Acked-by: Thomas Zimmermann Signed-off-by: Al Viro Signed-off-by: Alex Deucher --- include/drm/drm_prime.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h index 2a1d01e5b56b..fa085c44d4ca 100644 --- a/include/drm/drm_prime.h +++ b/include/drm/drm_prime.h @@ -69,6 +69,9 @@ void drm_gem_dmabuf_release(struct dma_buf *dma_buf); int drm_gem_prime_fd_to_handle(struct drm_device *dev, struct drm_file *file_priv, int prime_fd, uint32_t *handle); +struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev, + struct drm_file *file_priv, uint32_t handle, + uint32_t flags); int drm_gem_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv, uint32_t handle, uint32_t flags, int *prime_fd); -- cgit v1.2.3 From 070a5e6295e88a2b75cbfef04ff8b4ec05775e6d Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Fri, 6 Sep 2024 22:30:06 +0800 Subject: net: stmmac: move stmmac_fpe_cfg to stmmac_priv data By moving the fpe_cfg field to the stmmac_priv data, stmmac_fpe_cfg becomes platform-data eventually, instead of a run-time config. Suggested-by: Serge Semin Signed-off-by: Furong Xu <0x1207@gmail.com> Reviewed-by: Vladimir Oltean Reviewed-by: Serge Semin Link: https://patch.msgid.link/d9b3d7ecb308c5e39778a4c8ae9df288a2754379.1725631883.git.0x1207@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 338991c08f00..d79ff252cfdc 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -138,33 +138,6 @@ struct stmmac_txq_cfg { int tbs_en; }; -/* FPE link state */ -enum stmmac_fpe_state { - FPE_STATE_OFF = 0, - FPE_STATE_CAPABLE = 1, - FPE_STATE_ENTERING_ON = 2, - FPE_STATE_ON = 3, -}; - -/* FPE link-partner hand-shaking mPacket type */ -enum stmmac_mpacket_type { - MPACKET_VERIFY = 0, - MPACKET_RESPONSE = 1, -}; - -enum stmmac_fpe_task_state_t { - __FPE_REMOVING, - __FPE_TASK_SCHED, -}; - -struct stmmac_fpe_cfg { - bool enable; /* FPE enable */ - bool hs_enable; /* FPE handshake enable */ - enum stmmac_fpe_state lp_fpe_state; /* Link Partner FPE state */ - enum stmmac_fpe_state lo_fpe_state; /* Local station FPE state */ - u32 fpe_csr; /* MAC_FPE_CTRL_STS reg cache */ -}; - struct stmmac_safety_feature_cfg { u32 tsoee; u32 mrxpee; @@ -232,7 +205,6 @@ struct plat_stmmacenet_data { struct fwnode_handle *port_node; struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; - struct stmmac_fpe_cfg *fpe_cfg; struct stmmac_safety_feature_cfg *safety_feat_cfg; int clk_csr; int has_gmac; -- cgit v1.2.3 From be8e9eb3750639aa5cffb3f764ca080caed41bd0 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 9 Sep 2024 09:56:11 +0800 Subject: net-timestamp: introduce SOF_TIMESTAMPING_OPT_RX_FILTER flag introduce a new flag SOF_TIMESTAMPING_OPT_RX_FILTER in the receive path. User can set it with SOF_TIMESTAMPING_SOFTWARE to filter out rx software timestamp report, especially after a process turns on netstamp_needed_key which can time stamp every incoming skb. Previously, we found out if an application starts first which turns on netstamp_needed_key, then another one only passing SOF_TIMESTAMPING_SOFTWARE could also get rx timestamp. Now we handle this case by introducing this new flag without breaking users. Quoting Willem to explain why we need the flag: "why a process would want to request software timestamp reporting, but not receive software timestamp generation. The only use I see is when the application does request SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_TX_SOFTWARE." Similarly, this new flag could also be used for hardware case where we can set it with SOF_TIMESTAMPING_RAW_HARDWARE, then we won't receive hardware receive timestamp. Another thing about errqueue in this patch I have a few words to say: In this case, we need to handle the egress path carefully, or else reporting the tx timestamp will fail. Egress path and ingress path will finally call sock_recv_timestamp(). We have to distinguish them. Errqueue is a good indicator to reflect the flow direction. Suggested-by: Willem de Bruijn Signed-off-by: Jason Xing Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20240909015612.3856-2-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/net_tstamp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index a2c66b3d7f0f..858339d1c1c4 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -32,8 +32,9 @@ enum { SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), SOF_TIMESTAMPING_BIND_PHC = (1 << 15), SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), + SOF_TIMESTAMPING_OPT_RX_FILTER = (1 << 17), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_RX_FILTER, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; -- cgit v1.2.3 From d591f6804e7e1310881c9224d72247a2b65039af Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 27 Aug 2024 18:48:46 -0500 Subject: PCI: Wait for device readiness with Configuration RRS After a device reset, delays are required before the device can successfully complete config accesses. PCIe r6.0, sec 6.6, specifies some delays required before software can perform config accesses. Devices that require more time after those delays may respond to config accesses with Configuration Request Retry Status (RRS) completions. Callers of pci_dev_wait() are responsible for delays until the device can respond to config accesses. pci_dev_wait() waits any additional time until the device can successfully complete config accesses. Reading config space of devices that are not present or not ready typically returns ~0 (PCI_ERROR_RESPONSE). Previously we polled the Command register until we got a value other than ~0. This is sometimes a problem because Root Complex handling of RRS completions may include several retries and implementation-specific behavior that is invisible to software (see sec 2.3.2), so the exponential backoff in pci_dev_wait() may not work as intended. Linux enables Configuration RRS Software Visibility on all Root Ports that support it. If it is enabled, read the Vendor ID instead of the Command register. RRS completions cause immediate return of the 0x0001 reserved Vendor ID value, so the pci_dev_wait() backoff works correctly. When a read of Vendor ID eventually completes successfully by returning a non-0x0001 value (the Vendor ID or 0xffff for VFs), the device should be initialized and ready to respond to config requests. For conventional PCI devices or devices below Root Ports that don't support Configuration RRS Software Visibility, poll the Command register as before. This was developed independently, but is very similar to Stanislav Spassov's previous work at https://lore.kernel.org/linux-pci/20200223122057.6504-1-stanspas@amazon.com Link: https://lore.kernel.org/r/20240827234848.4429-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Tested-by: Duc Dang --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4cf89a4b4cbc..121d8d94d6d0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -371,6 +371,7 @@ struct pci_dev { can be generated */ unsigned int pme_poll:1; /* Poll device's PME status bit */ unsigned int pinned:1; /* Whether this dev is pinned */ + unsigned int config_crs_sv:1; /* Config CRS software visibility */ unsigned int imm_ready:1; /* Supports Immediate Readiness */ unsigned int d1_support:1; /* Low power state D1 is supported */ unsigned int d2_support:1; /* Low power state D2 is supported */ -- cgit v1.2.3 From 87f10faf166a9114aa0d4132298cad379de16fdd Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 27 Aug 2024 18:48:48 -0500 Subject: PCI: Rename CRS Completion Status to RRS PCIe r6.0 changed the abbreviation for "Configuration Request Retry Status" Completion Status from "CRS" to "RRS" and uses the terminology of "Configuration RRS Software Visibility" instead of "CRS Software Visibility". Align the Linux usage with the r6.0 spec language. No functional change intended. It's confusing to make this change, but I think "RRS" *is* a better abbreviation because it was easy to interpret "CRS" as "Completion Retry Status", which really didn't make any sense. Link: https://lore.kernel.org/r/20240827234848.4429-4-helgaas@kernel.org Signed-off-by: Bjorn Helgaas --- include/linux/bcma/bcma_driver_pci.h | 2 +- include/linux/pci.h | 2 +- include/uapi/linux/pci_regs.h | 6 ++++-- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 68da8dba5162..dba41b65ae0d 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -203,7 +203,7 @@ struct pci_dev; #define BCMA_CORE_PCI_MDIO_RXCTRL0 0x840 /* PCIE Root Capability Register bits (Host mode only) */ -#define BCMA_CORE_PCI_RC_CRS_VISIBILITY 0x0001 +#define BCMA_CORE_PCI_RC_RRS_VISIBILITY 0x0001 struct bcma_drv_pci; struct bcma_bus; diff --git a/include/linux/pci.h b/include/linux/pci.h index 121d8d94d6d0..27d8ce977674 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -371,7 +371,7 @@ struct pci_dev { can be generated */ unsigned int pme_poll:1; /* Poll device's PME status bit */ unsigned int pinned:1; /* Whether this dev is pinned */ - unsigned int config_crs_sv:1; /* Config CRS software visibility */ + unsigned int config_rrs_sv:1; /* Config RRS software visibility */ unsigned int imm_ready:1; /* Supports Immediate Readiness */ unsigned int d1_support:1; /* Low power state D1 is supported */ unsigned int d2_support:1; /* Low power state D2 is supported */ diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 94c00996e633..f94591f9f5e9 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -634,9 +634,11 @@ #define PCI_EXP_RTCTL_SENFEE 0x0002 /* System Error on Non-Fatal Error */ #define PCI_EXP_RTCTL_SEFEE 0x0004 /* System Error on Fatal Error */ #define PCI_EXP_RTCTL_PMEIE 0x0008 /* PME Interrupt Enable */ -#define PCI_EXP_RTCTL_CRSSVE 0x0010 /* CRS Software Visibility Enable */ +#define PCI_EXP_RTCTL_RRS_SVE 0x0010 /* Config RRS Software Visibility Enable */ +#define PCI_EXP_RTCTL_CRSSVE PCI_EXP_RTCTL_RRS_SVE /* compatibility */ #define PCI_EXP_RTCAP 0x1e /* Root Capabilities */ -#define PCI_EXP_RTCAP_CRSVIS 0x0001 /* CRS Software Visibility capability */ +#define PCI_EXP_RTCAP_RRS_SV 0x0001 /* Config RRS Software Visibility */ +#define PCI_EXP_RTCAP_CRSVIS PCI_EXP_RTCAP_RRS_SV /* compatibility */ #define PCI_EXP_RTSTA 0x20 /* Root Status */ #define PCI_EXP_RTSTA_PME_RQ_ID 0x0000ffff /* PME Requester ID */ #define PCI_EXP_RTSTA_PME 0x00010000 /* PME status */ -- cgit v1.2.3 From 884ee6dc85b959bc152f15bca80c30f06069e6c4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 6 Sep 2024 14:27:24 +0800 Subject: f2fs: get rid of online repaire on corrupted directory syzbot reports a f2fs bug as below: kernel BUG at fs/f2fs/inode.c:896! RIP: 0010:f2fs_evict_inode+0x1598/0x15c0 fs/f2fs/inode.c:896 Call Trace: evict+0x532/0x950 fs/inode.c:704 dispose_list fs/inode.c:747 [inline] evict_inodes+0x5f9/0x690 fs/inode.c:797 generic_shutdown_super+0x9d/0x2d0 fs/super.c:627 kill_block_super+0x44/0x90 fs/super.c:1696 kill_f2fs_super+0x344/0x690 fs/f2fs/super.c:4898 deactivate_locked_super+0xc4/0x130 fs/super.c:473 cleanup_mnt+0x41f/0x4b0 fs/namespace.c:1373 task_work_run+0x24f/0x310 kernel/task_work.c:228 ptrace_notify+0x2d2/0x380 kernel/signal.c:2402 ptrace_report_syscall include/linux/ptrace.h:415 [inline] ptrace_report_syscall_exit include/linux/ptrace.h:477 [inline] syscall_exit_work+0xc6/0x190 kernel/entry/common.c:173 syscall_exit_to_user_mode_prepare kernel/entry/common.c:200 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:205 [inline] syscall_exit_to_user_mode+0x279/0x370 kernel/entry/common.c:218 do_syscall_64+0x100/0x230 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0010:f2fs_evict_inode+0x1598/0x15c0 fs/f2fs/inode.c:896 Online repaire on corrupted directory in f2fs_lookup() can generate dirty data/meta while racing w/ readonly remount, it may leave dirty inode after filesystem becomes readonly, however, checkpoint() will skips flushing dirty inode in a state of readonly mode, result in above panic. Let's get rid of online repaire in f2fs_lookup(), and leave the work to fsck.f2fs. Fixes: 510022a85839 ("f2fs: add F2FS_INLINE_DOTS to recover missing dot dentries") Reported-by: syzbot+ebea2790904673d7c618@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000a7b20f061ff2d56a@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index f17f89a259e2..b0b821edfd97 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -278,7 +278,7 @@ struct node_footer { #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ -#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ +#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries (obsolete) */ #define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */ #define F2FS_PIN_FILE 0x40 /* file should not be gced */ #define F2FS_COMPRESS_RELEASED 0x80 /* file released compressed blocks */ -- cgit v1.2.3 From 540c830212edc908361c39d40df21772a8bda308 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 9 Sep 2024 20:30:18 +0000 Subject: firmware: imx: remove duplicate scmi_imx_misc_ctrl_get() These two functions have a stub definition when CONFIG_IMX_SCMI_MISC_EXT is not set, which conflict with the global definition: In file included from drivers/firmware/imx/sm-misc.c:6: include/linux/firmware/imx/sm.h:30:1: error: expected identifier or '(' before '{' token 30 | { | ^ drivers/firmware/imx/sm-misc.c:26:5: error: redefinition of 'scmi_imx_misc_ctrl_get' 26 | int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val) | ^~~~~~~~~~~~~~~~~~~~~~ include/linux/firmware/imx/sm.h:24:19: note: previous definition of 'scmi_imx_misc_ctrl_get' with type 'int(u32, u32 *, u32 *)' {aka 'int(unsigned int, unsigned int *, unsigned int *)'} 24 | static inline int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val) | ^~~~~~~~~~~~~~~~~~~~~~ There is no real need for the #ifdef, and removing this avoids the build failure. Fixes: 0b4f8a68b292 ("firmware: imx: Add i.MX95 MISC driver") Link: https://lore.kernel.org/r/20240909203023.1275232-1-arnd@kernel.org Signed-off-by: Arnd Bergmann --- include/linux/firmware/imx/sm.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h index 62a2690e2abd..9b85a3f028d1 100644 --- a/include/linux/firmware/imx/sm.h +++ b/include/linux/firmware/imx/sm.h @@ -17,18 +17,7 @@ #define SCMI_IMX_CTRL_SAI4_MCLK 4 /* WAKE SAI4 MCLK */ #define SCMI_IMX_CTRL_SAI5_MCLK 5 /* WAKE SAI5 MCLK */ -#if IS_ENABLED(CONFIG_IMX_SCMI_MISC_EXT) int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val); int scmi_imx_misc_ctrl_set(u32 id, u32 val); -#else -static inline int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val) -{ - return -EOPNOTSUPP; -} -static inline int scmi_imx_misc_ctrl_set(u32 id, u32 val); -{ - return -EOPNOTSUPP; -} -#endif #endif -- cgit v1.2.3 From 6ebf2d021a13a77b495b4de15c6834e26b80d08e Mon Sep 17 00:00:00 2001 From: Christian Loehle Date: Tue, 13 Aug 2024 15:43:46 +0100 Subject: sched/deadline: Clarify nanoseconds in uapi Specify the time values of the deadline parameters of deadline, runtime, and period as being in nanoseconds explicitly as they always have been. Signed-off-by: Christian Loehle Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juri Lelli Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20240813144348.1180344-3-christian.loehle@arm.com --- include/uapi/linux/sched/types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h index 90662385689b..bf6e9ae031c1 100644 --- a/include/uapi/linux/sched/types.h +++ b/include/uapi/linux/sched/types.h @@ -58,9 +58,9 @@ * * This is reflected by the following fields of the sched_attr structure: * - * @sched_deadline representative of the task's deadline - * @sched_runtime representative of the task's runtime - * @sched_period representative of the task's period + * @sched_deadline representative of the task's deadline in nanoseconds + * @sched_runtime representative of the task's runtime in nanoseconds + * @sched_period representative of the task's period in nanoseconds * * Given this task model, there are a multiplicity of scheduling algorithms * and policies, that can be used to ensure all the tasks will make their -- cgit v1.2.3 From 4b3fc475c61fa8733a9acc9d743f6cc9ca4915f5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 6 Sep 2024 16:05:07 +0530 Subject: net: phylink: Add phylink_set_fixed_link() to configure fixed link state in phylink The function allows for the configuration of a fixed link state for a given phylink instance. This addition is particularly useful for network devices that operate with a fixed link configuration, where the link parameters do not change dynamically. By using `phylink_set_fixed_link()`, drivers can easily set up the fixed link state during initialization or configuration changes. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: Raju Lakkaraju Signed-off-by: David S. Miller --- include/linux/phylink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 2381e07429a2..5c01048860c4 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -598,6 +598,8 @@ int phylink_fwnode_phy_connect(struct phylink *pl, const struct fwnode_handle *fwnode, u32 flags); void phylink_disconnect_phy(struct phylink *); +int phylink_set_fixed_link(struct phylink *, + const struct phylink_link_state *); void phylink_mac_change(struct phylink *, bool up); void phylink_pcs_change(struct phylink_pcs *, bool up); -- cgit v1.2.3 From cef7dde8836ab09a3bfe96ada4f18ef2496eacc9 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Mon, 9 Sep 2024 13:04:57 +0300 Subject: net/mlx5: Expand mkey page size to support 6 bits Protect the usage of the 6th bit with the relevant capability to ensure we are using the new page sizes with FW that supports the bit extension. Signed-off-by: Michael Guralnik Link: https://patch.msgid.link/20240909100504.29797-2-michaelgur@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 970c9d8473ef..ec1117d4e441 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1988,7 +1988,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 migratable[0x1]; u8 reserved_at_81[0x11]; u8 query_vuid[0x1]; - u8 reserved_at_93[0xd]; + u8 reserved_at_93[0x5]; + u8 umr_log_entity_size_5[0x1]; + u8 reserved_at_99[0x7]; u8 max_reformat_insert_size[0x8]; u8 max_reformat_insert_offset[0x8]; @@ -4212,8 +4214,7 @@ struct mlx5_ifc_mkc_bits { u8 reserved_at_1c0[0x19]; u8 relaxed_ordering_read[0x1]; - u8 reserved_at_1d9[0x1]; - u8 log_page_size[0x5]; + u8 log_page_size[0x6]; u8 reserved_at_1e0[0x20]; }; -- cgit v1.2.3 From 6cd9171d04cff79abe78c166927ab8563bf95fe5 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Mon, 9 Sep 2024 13:04:58 +0300 Subject: net/mlx5: Expose HW bits for Memory scheme ODP Expose IFC bits to support the new memory scheme on demand paging. Change the macro reading odp capabilities to be able to read from the new IFC layout and align the code in upper layers to be compiled. Signed-off-by: Michael Guralnik Link: https://patch.msgid.link/20240909100504.29797-3-michaelgur@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 4 +++ include/linux/mlx5/mlx5_ifc.h | 57 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index ba875a619b97..bd081f276654 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1369,6 +1369,10 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap) +#define MLX5_CAP_ODP_SCHEME(mdev, cap) \ + MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, \ + transport_page_fault_scheme_cap.cap) + #define MLX5_CAP_ODP_MAX(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->max, cap) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ec1117d4e441..3e3336bb9191 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1326,11 +1326,13 @@ struct mlx5_ifc_atomic_caps_bits { u8 reserved_at_e0[0x720]; }; -struct mlx5_ifc_odp_cap_bits { +struct mlx5_ifc_odp_scheme_cap_bits { u8 reserved_at_0[0x40]; u8 sig[0x1]; - u8 reserved_at_41[0x1f]; + u8 reserved_at_41[0x4]; + u8 page_prefetch[0x1]; + u8 reserved_at_46[0x1a]; u8 reserved_at_60[0x20]; @@ -1344,7 +1346,20 @@ struct mlx5_ifc_odp_cap_bits { struct mlx5_ifc_odp_per_transport_service_cap_bits dc_odp_caps; - u8 reserved_at_120[0x6E0]; + u8 reserved_at_120[0xe0]; +}; + +struct mlx5_ifc_odp_cap_bits { + struct mlx5_ifc_odp_scheme_cap_bits transport_page_fault_scheme_cap; + + struct mlx5_ifc_odp_scheme_cap_bits memory_page_fault_scheme_cap; + + u8 reserved_at_400[0x200]; + + u8 mem_page_fault[0x1]; + u8 reserved_at_601[0x1f]; + + u8 reserved_at_620[0x1e0]; }; struct mlx5_ifc_tls_cap_bits { @@ -2034,7 +2049,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 min_mkey_log_entity_size_fixed_buffer[0x5]; u8 ec_vf_vport_base[0x10]; - u8 reserved_at_3a0[0x10]; + u8 reserved_at_3a0[0xa]; + u8 max_mkey_log_entity_size_mtt[0x6]; u8 max_rqt_vhca_id[0x10]; u8 reserved_at_3c0[0x20]; @@ -7258,6 +7274,30 @@ struct mlx5_ifc_qp_2err_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_trans_page_fault_info_bits { + u8 error[0x1]; + u8 reserved_at_1[0x4]; + u8 page_fault_type[0x3]; + u8 wq_number[0x18]; + + u8 reserved_at_20[0x8]; + u8 fault_token[0x18]; +}; + +struct mlx5_ifc_mem_page_fault_info_bits { + u8 error[0x1]; + u8 reserved_at_1[0xf]; + u8 fault_token_47_32[0x10]; + + u8 fault_token_31_0[0x20]; +}; + +union mlx5_ifc_page_fault_resume_in_page_fault_info_auto_bits { + struct mlx5_ifc_trans_page_fault_info_bits trans_page_fault_info; + struct mlx5_ifc_mem_page_fault_info_bits mem_page_fault_info; + u8 reserved_at_0[0x40]; +}; + struct mlx5_ifc_page_fault_resume_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -7274,13 +7314,8 @@ struct mlx5_ifc_page_fault_resume_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 error[0x1]; - u8 reserved_at_41[0x4]; - u8 page_fault_type[0x3]; - u8 wq_number[0x18]; - - u8 reserved_at_60[0x8]; - u8 token[0x18]; + union mlx5_ifc_page_fault_resume_in_page_fault_info_auto_bits + page_fault_info; }; struct mlx5_ifc_nop_out_bits { -- cgit v1.2.3 From 64c68385a39bb676c76da36164ab696e8da78842 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Mon, 9 Sep 2024 13:04:59 +0300 Subject: RDMA/mlx5: Add new ODP memory scheme eqe format Add new fields to support the new memory scheme page fault and extend the token field to u64 as in the new scheme the token is 48 bit. Signed-off-by: Michael Guralnik Link: https://patch.msgid.link/20240909100504.29797-4-michaelgur@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index bd081f276654..154095256d0d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -211,6 +211,7 @@ enum { enum { MLX5_PFAULT_SUBTYPE_WQE = 0, MLX5_PFAULT_SUBTYPE_RDMA = 1, + MLX5_PFAULT_SUBTYPE_MEMORY = 2, }; enum wqe_page_fault_type { @@ -646,10 +647,11 @@ struct mlx5_eqe_page_req { __be32 rsvd1[5]; }; +#define MEMORY_SCHEME_PAGE_FAULT_GRANULARITY 4096 struct mlx5_eqe_page_fault { - __be32 bytes_committed; union { struct { + __be32 bytes_committed; u16 reserved1; __be16 wqe_index; u16 reserved2; @@ -659,6 +661,7 @@ struct mlx5_eqe_page_fault { __be32 pftype_wq; } __packed wqe; struct { + __be32 bytes_committed; __be32 r_key; u16 reserved1; __be16 packet_length; @@ -666,6 +669,23 @@ struct mlx5_eqe_page_fault { __be64 rdma_va; __be32 pftype_token; } __packed rdma; + struct { + u8 flags; + u8 reserved1; + __be16 post_demand_fault_pages; + __be16 pre_demand_fault_pages; + __be16 token47_32; + __be32 token31_0; + /* + * FW changed from specifying the fault size in byte + * count to 4k pages granularity. The size specified + * in pages uses bits 31:12, to keep backward + * compatibility. + */ + __be32 demand_fault_pages; + __be32 mkey; + __be64 va; + } __packed memory; } __packed; } __packed; -- cgit v1.2.3 From c912ac66b3fc92acb0079fff7d030e9be0756c36 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 9 Sep 2024 15:41:05 +0300 Subject: platform/x86: intel_scu_ipc: Move intel_scu_ipc.h out of arch/x86/include/asm This is a platform/x86 library that is mostly being used by other drivers not directly under arch/x86 anyway (with the exception of the Intel MID setup code) so it makes sense that it lives under the platform_data/x86/ directory instead. No functional changes intended. Suggested-by: Andy Shevchenko Signed-off-by: Mika Westerberg Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240909124952.1152017-3-andriy.shevchenko@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/platform_data/x86/intel_scu_ipc.h | 68 +++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 include/linux/platform_data/x86/intel_scu_ipc.h (limited to 'include') diff --git a/include/linux/platform_data/x86/intel_scu_ipc.h b/include/linux/platform_data/x86/intel_scu_ipc.h new file mode 100644 index 000000000000..0ca9962e97f2 --- /dev/null +++ b/include/linux/platform_data/x86/intel_scu_ipc.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PLATFORM_X86_INTEL_SCU_IPC_H_ +#define __PLATFORM_X86_INTEL_SCU_IPC_H_ + +#include + +struct device; +struct intel_scu_ipc_dev; + +/** + * struct intel_scu_ipc_data - Data used to configure SCU IPC + * @mem: Base address of SCU IPC MMIO registers + * @irq: The IRQ number used for SCU (optional) + */ +struct intel_scu_ipc_data { + struct resource mem; + int irq; +}; + +struct intel_scu_ipc_dev * +__intel_scu_ipc_register(struct device *parent, + const struct intel_scu_ipc_data *scu_data, + struct module *owner); + +#define intel_scu_ipc_register(parent, scu_data) \ + __intel_scu_ipc_register(parent, scu_data, THIS_MODULE) + +void intel_scu_ipc_unregister(struct intel_scu_ipc_dev *scu); + +struct intel_scu_ipc_dev * +__devm_intel_scu_ipc_register(struct device *parent, + const struct intel_scu_ipc_data *scu_data, + struct module *owner); + +#define devm_intel_scu_ipc_register(parent, scu_data) \ + __devm_intel_scu_ipc_register(parent, scu_data, THIS_MODULE) + +struct intel_scu_ipc_dev *intel_scu_ipc_dev_get(void); +void intel_scu_ipc_dev_put(struct intel_scu_ipc_dev *scu); +struct intel_scu_ipc_dev *devm_intel_scu_ipc_dev_get(struct device *dev); + +int intel_scu_ipc_dev_ioread8(struct intel_scu_ipc_dev *scu, u16 addr, + u8 *data); +int intel_scu_ipc_dev_iowrite8(struct intel_scu_ipc_dev *scu, u16 addr, + u8 data); +int intel_scu_ipc_dev_readv(struct intel_scu_ipc_dev *scu, u16 *addr, + u8 *data, size_t len); +int intel_scu_ipc_dev_writev(struct intel_scu_ipc_dev *scu, u16 *addr, + u8 *data, size_t len); + +int intel_scu_ipc_dev_update(struct intel_scu_ipc_dev *scu, u16 addr, + u8 data, u8 mask); + +int intel_scu_ipc_dev_simple_command(struct intel_scu_ipc_dev *scu, int cmd, + int sub); +int intel_scu_ipc_dev_command_with_size(struct intel_scu_ipc_dev *scu, int cmd, + int sub, const void *in, size_t inlen, + size_t size, void *out, size_t outlen); + +static inline int intel_scu_ipc_dev_command(struct intel_scu_ipc_dev *scu, int cmd, + int sub, const void *in, size_t inlen, + void *out, size_t outlen) +{ + return intel_scu_ipc_dev_command_with_size(scu, cmd, sub, in, inlen, + inlen, out, outlen); +} + +#endif -- cgit v1.2.3 From 5f1cda51107fd5e53cd012bd1f6f39aede96bdbe Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 9 Sep 2024 15:41:06 +0300 Subject: platform/x86: intel_scu_wdt: Move intel_scu_wdt.h to x86 subfolder This is a platform/x86 library that can only be used on x86 devices. so it makes sense that it lives under the platform_data/x86/ directory instead. No functional changes intended. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240909124952.1152017-4-andriy.shevchenko@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/platform_data/intel-mid_wdt.h | 19 ------------------- include/linux/platform_data/x86/intel-mid_wdt.h | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 19 deletions(-) delete mode 100644 include/linux/platform_data/intel-mid_wdt.h create mode 100644 include/linux/platform_data/x86/intel-mid_wdt.h (limited to 'include') diff --git a/include/linux/platform_data/intel-mid_wdt.h b/include/linux/platform_data/intel-mid_wdt.h deleted file mode 100644 index 8dba70b4b020..000000000000 --- a/include/linux/platform_data/intel-mid_wdt.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * intel-mid_wdt: generic Intel MID SCU watchdog driver - * - * Copyright (C) 2014 Intel Corporation. All rights reserved. - * Contact: David Cohen - */ - -#ifndef __INTEL_MID_WDT_H__ -#define __INTEL_MID_WDT_H__ - -#include - -struct intel_mid_wdt_pdata { - int irq; - int (*probe)(struct platform_device *pdev); -}; - -#endif /*__INTEL_MID_WDT_H__*/ diff --git a/include/linux/platform_data/x86/intel-mid_wdt.h b/include/linux/platform_data/x86/intel-mid_wdt.h new file mode 100644 index 000000000000..e5c0210d0fec --- /dev/null +++ b/include/linux/platform_data/x86/intel-mid_wdt.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * intel-mid_wdt: generic Intel MID SCU watchdog driver + * + * Copyright (C) 2014 Intel Corporation. All rights reserved. + * Contact: David Cohen + */ + +#ifndef __PLATFORM_X86_INTEL_MID_WDT_H_ +#define __PLATFORM_X86_INTEL_MID_WDT_H_ + +#include + +struct intel_mid_wdt_pdata { + int irq; + int (*probe)(struct platform_device *pdev); +}; + +#endif /* __PLATFORM_X86_INTEL_MID_WDT_H_ */ -- cgit v1.2.3 From d3bfbfb1248498656cd25c51e41c1e31219bd0dd Mon Sep 17 00:00:00 2001 From: Kundan Kumar Date: Wed, 11 Sep 2024 12:19:34 +0530 Subject: mm: release number of pages of a folio Add a new function unpin_user_folio() to put the refs of a folio by npages count. The check for BIO_PAGE_PINNED flag is removed as it is already checked in bio_release_pages(). Signed-off-by: Kundan Kumar Tested-by: Luis Chamberlain Reviewed-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20240911064935.5630-4-kundan.kumar@samsung.com Signed-off-by: Jens Axboe --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c4b238a20b76..2fd88cd5997d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1597,6 +1597,7 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, bool make_dirty); void unpin_user_pages(struct page **pages, unsigned long npages); +void unpin_user_folio(struct folio *folio, unsigned long npages); void unpin_folios(struct folio **folios, unsigned long nfolios); static inline bool is_cow_mapping(vm_flags_t flags) -- cgit v1.2.3 From b03ffc76b83c1a7d058454efbcf1bf0e345ef1c2 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 6 Sep 2024 15:13:31 +0200 Subject: soc: qcom: geni-se: add GP_LENGTH/IRQ_EN_SET/IRQ_EN_CLEAR registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For UART devices the M_GP_LENGTH is the TX word count. For other devices this is the transaction word count. For UART devices the S_GP_LENGTH is the RX word count. The IRQ_EN set/clear registers allow you to set or clear bits in the IRQ_EN register without needing a read-modify-write. Acked-by: Bjorn Andersson Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20240610152420.v4.1.Ife7ced506aef1be3158712aa3ff34a006b973559@changeid Tested-by: Nícolas F. R. A. Prado Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20240906131336.23625-4-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/soc/qcom/geni-se.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h index 0f038a1a0330..c3bca9c0bf2c 100644 --- a/include/linux/soc/qcom/geni-se.h +++ b/include/linux/soc/qcom/geni-se.h @@ -88,11 +88,15 @@ struct geni_se { #define SE_GENI_M_IRQ_STATUS 0x610 #define SE_GENI_M_IRQ_EN 0x614 #define SE_GENI_M_IRQ_CLEAR 0x618 +#define SE_GENI_M_IRQ_EN_SET 0x61c +#define SE_GENI_M_IRQ_EN_CLEAR 0x620 #define SE_GENI_S_CMD0 0x630 #define SE_GENI_S_CMD_CTRL_REG 0x634 #define SE_GENI_S_IRQ_STATUS 0x640 #define SE_GENI_S_IRQ_EN 0x644 #define SE_GENI_S_IRQ_CLEAR 0x648 +#define SE_GENI_S_IRQ_EN_SET 0x64c +#define SE_GENI_S_IRQ_EN_CLEAR 0x650 #define SE_GENI_TX_FIFOn 0x700 #define SE_GENI_RX_FIFOn 0x780 #define SE_GENI_TX_FIFO_STATUS 0x800 @@ -101,6 +105,8 @@ struct geni_se { #define SE_GENI_RX_WATERMARK_REG 0x810 #define SE_GENI_RX_RFR_WATERMARK_REG 0x814 #define SE_GENI_IOS 0x908 +#define SE_GENI_M_GP_LENGTH 0x910 +#define SE_GENI_S_GP_LENGTH 0x914 #define SE_DMA_TX_IRQ_STAT 0xc40 #define SE_DMA_TX_IRQ_CLR 0xc44 #define SE_DMA_TX_FSM_RST 0xc58 @@ -234,6 +240,9 @@ struct geni_se { #define IO2_DATA_IN BIT(1) #define RX_DATA_IN BIT(0) +/* SE_GENI_M_GP_LENGTH and SE_GENI_S_GP_LENGTH fields */ +#define GP_LENGTH GENMASK(31, 0) + /* SE_DMA_TX_IRQ_STAT Register fields */ #define TX_DMA_DONE BIT(0) #define TX_EOT BIT(1) -- cgit v1.2.3 From 4c59c59ef3ab9275f2a8f84dcffbd16c285ce8ea Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Wed, 11 Sep 2024 13:02:11 +0800 Subject: tty: serial: samsung: Use bit manipulation macros for APPLE_S5L_* New entries using BIT() will be added soon, so change the existing ones to use bit manipulation macros including BIT() and GENMASK() for consistency. Suggested-by: Krzysztof Kozlowski Tested-by: Janne Grunau Reviewed-by: Neal Gompa Signed-off-by: Nick Chan Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20240911050741.14477-2-towinchenmi@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_s3c.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/serial_s3c.h b/include/linux/serial_s3c.h index 1672cf0810ef..2a934e20ca4b 100644 --- a/include/linux/serial_s3c.h +++ b/include/linux/serial_s3c.h @@ -249,9 +249,9 @@ #define APPLE_S5L_UCON_RXTO_ENA 9 #define APPLE_S5L_UCON_RXTHRESH_ENA 12 #define APPLE_S5L_UCON_TXTHRESH_ENA 13 -#define APPLE_S5L_UCON_RXTO_ENA_MSK (1 << APPLE_S5L_UCON_RXTO_ENA) -#define APPLE_S5L_UCON_RXTHRESH_ENA_MSK (1 << APPLE_S5L_UCON_RXTHRESH_ENA) -#define APPLE_S5L_UCON_TXTHRESH_ENA_MSK (1 << APPLE_S5L_UCON_TXTHRESH_ENA) +#define APPLE_S5L_UCON_RXTO_ENA_MSK BIT(APPLE_S5L_UCON_RXTO_ENA) +#define APPLE_S5L_UCON_RXTHRESH_ENA_MSK BIT(APPLE_S5L_UCON_RXTHRESH_ENA) +#define APPLE_S5L_UCON_TXTHRESH_ENA_MSK BIT(APPLE_S5L_UCON_TXTHRESH_ENA) #define APPLE_S5L_UCON_DEFAULT (S3C2410_UCON_TXIRQMODE | \ S3C2410_UCON_RXIRQMODE | \ @@ -260,10 +260,10 @@ APPLE_S5L_UCON_RXTHRESH_ENA_MSK | \ APPLE_S5L_UCON_TXTHRESH_ENA_MSK) -#define APPLE_S5L_UTRSTAT_RXTHRESH (1<<4) -#define APPLE_S5L_UTRSTAT_TXTHRESH (1<<5) -#define APPLE_S5L_UTRSTAT_RXTO (1<<9) -#define APPLE_S5L_UTRSTAT_ALL_FLAGS (0x3f0) +#define APPLE_S5L_UTRSTAT_RXTHRESH BIT(4) +#define APPLE_S5L_UTRSTAT_TXTHRESH BIT(5) +#define APPLE_S5L_UTRSTAT_RXTO BIT(9) +#define APPLE_S5L_UTRSTAT_ALL_FLAGS GENMASK(9, 4) #ifndef __ASSEMBLY__ -- cgit v1.2.3 From 5ed771f174726ae879945d4f148a9005ac909cb7 Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Wed, 11 Sep 2024 13:02:13 +0800 Subject: tty: serial: samsung: Fix serial rx on Apple A7-A9 Apple's older A7-A9 SoCs seems to use bit 3 in UTRSTAT as RXTO, which is enabled by bit 11 in UCON. Access these bits in addition to the original RXTO and RXTO enable bits, to allow serial rx to function on A7-A9 SoCs. This change does not appear to affect the A10 SoC and up. Tested-by: Janne Grunau Reviewed-by: Neal Gompa Signed-off-by: Nick Chan Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20240911050741.14477-4-towinchenmi@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_s3c.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/serial_s3c.h b/include/linux/serial_s3c.h index 2a934e20ca4b..102aa33d956c 100644 --- a/include/linux/serial_s3c.h +++ b/include/linux/serial_s3c.h @@ -246,24 +246,28 @@ S5PV210_UFCON_TXTRIG4 | \ S5PV210_UFCON_RXTRIG4) -#define APPLE_S5L_UCON_RXTO_ENA 9 -#define APPLE_S5L_UCON_RXTHRESH_ENA 12 -#define APPLE_S5L_UCON_TXTHRESH_ENA 13 -#define APPLE_S5L_UCON_RXTO_ENA_MSK BIT(APPLE_S5L_UCON_RXTO_ENA) -#define APPLE_S5L_UCON_RXTHRESH_ENA_MSK BIT(APPLE_S5L_UCON_RXTHRESH_ENA) -#define APPLE_S5L_UCON_TXTHRESH_ENA_MSK BIT(APPLE_S5L_UCON_TXTHRESH_ENA) +#define APPLE_S5L_UCON_RXTO_ENA 9 +#define APPLE_S5L_UCON_RXTO_LEGACY_ENA 11 +#define APPLE_S5L_UCON_RXTHRESH_ENA 12 +#define APPLE_S5L_UCON_TXTHRESH_ENA 13 +#define APPLE_S5L_UCON_RXTO_ENA_MSK BIT(APPLE_S5L_UCON_RXTO_ENA) +#define APPLE_S5L_UCON_RXTO_LEGACY_ENA_MSK BIT(APPLE_S5L_UCON_RXTO_LEGACY_ENA) +#define APPLE_S5L_UCON_RXTHRESH_ENA_MSK BIT(APPLE_S5L_UCON_RXTHRESH_ENA) +#define APPLE_S5L_UCON_TXTHRESH_ENA_MSK BIT(APPLE_S5L_UCON_TXTHRESH_ENA) #define APPLE_S5L_UCON_DEFAULT (S3C2410_UCON_TXIRQMODE | \ S3C2410_UCON_RXIRQMODE | \ S3C2410_UCON_RXFIFO_TOI) #define APPLE_S5L_UCON_MASK (APPLE_S5L_UCON_RXTO_ENA_MSK | \ + APPLE_S5L_UCON_RXTO_LEGACY_ENA_MSK | \ APPLE_S5L_UCON_RXTHRESH_ENA_MSK | \ APPLE_S5L_UCON_TXTHRESH_ENA_MSK) +#define APPLE_S5L_UTRSTAT_RXTO_LEGACY BIT(3) #define APPLE_S5L_UTRSTAT_RXTHRESH BIT(4) #define APPLE_S5L_UTRSTAT_TXTHRESH BIT(5) #define APPLE_S5L_UTRSTAT_RXTO BIT(9) -#define APPLE_S5L_UTRSTAT_ALL_FLAGS GENMASK(9, 4) +#define APPLE_S5L_UTRSTAT_ALL_FLAGS GENMASK(9, 3) #ifndef __ASSEMBLY__ -- cgit v1.2.3 From 9a26234423b87e111351eac0e95775e6ba14d752 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 11 Sep 2024 15:57:52 +0200 Subject: ALSA: pcm: Fix breakage of PCM rates used for topology It turned out that the topology ABI takes the standard PCM rate bits as is, and it means that the recent change of the PCM rate bits would lead to the inconsistent rate values used for topology. This patch reverts the original PCM rate bit definitions while adding the new rates to the extended bits instead. This needed the change of snd_pcm_known_rates, too. And this also required to fix the handling in snd_pcm_hw_limit_rates() that blindly assumed that the list is sorted while it became unsorted now. Fixes: 090624b7dc83 ("ALSA: pcm: add more sample rate definitions") Reported-by: Pierre-Louis Bossart Closes: https://lore.kernel.org/1ab3efaa-863c-4dd0-8f81-b50fd9775fad@linux.intel.com Reviewed-by: Jaroslav Kysela Tested-by: Jerome Brunet Tested-by: Bard Liao Link: https://patch.msgid.link/20240911135756.24434-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index c993350975a9..0bf7d25434d7 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -109,23 +109,24 @@ struct snd_pcm_ops { #define SNDRV_PCM_RATE_5512 (1U<<0) /* 5512Hz */ #define SNDRV_PCM_RATE_8000 (1U<<1) /* 8000Hz */ #define SNDRV_PCM_RATE_11025 (1U<<2) /* 11025Hz */ -#define SNDRV_PCM_RATE_12000 (1U<<3) /* 12000Hz */ -#define SNDRV_PCM_RATE_16000 (1U<<4) /* 16000Hz */ -#define SNDRV_PCM_RATE_22050 (1U<<5) /* 22050Hz */ -#define SNDRV_PCM_RATE_24000 (1U<<6) /* 24000Hz */ -#define SNDRV_PCM_RATE_32000 (1U<<7) /* 32000Hz */ -#define SNDRV_PCM_RATE_44100 (1U<<8) /* 44100Hz */ -#define SNDRV_PCM_RATE_48000 (1U<<9) /* 48000Hz */ -#define SNDRV_PCM_RATE_64000 (1U<<10) /* 64000Hz */ -#define SNDRV_PCM_RATE_88200 (1U<<11) /* 88200Hz */ -#define SNDRV_PCM_RATE_96000 (1U<<12) /* 96000Hz */ -#define SNDRV_PCM_RATE_128000 (1U<<13) /* 128000Hz */ -#define SNDRV_PCM_RATE_176400 (1U<<14) /* 176400Hz */ -#define SNDRV_PCM_RATE_192000 (1U<<15) /* 192000Hz */ -#define SNDRV_PCM_RATE_352800 (1U<<16) /* 352800Hz */ -#define SNDRV_PCM_RATE_384000 (1U<<17) /* 384000Hz */ -#define SNDRV_PCM_RATE_705600 (1U<<18) /* 705600Hz */ -#define SNDRV_PCM_RATE_768000 (1U<<19) /* 768000Hz */ +#define SNDRV_PCM_RATE_16000 (1U<<3) /* 16000Hz */ +#define SNDRV_PCM_RATE_22050 (1U<<4) /* 22050Hz */ +#define SNDRV_PCM_RATE_32000 (1U<<5) /* 32000Hz */ +#define SNDRV_PCM_RATE_44100 (1U<<6) /* 44100Hz */ +#define SNDRV_PCM_RATE_48000 (1U<<7) /* 48000Hz */ +#define SNDRV_PCM_RATE_64000 (1U<<8) /* 64000Hz */ +#define SNDRV_PCM_RATE_88200 (1U<<9) /* 88200Hz */ +#define SNDRV_PCM_RATE_96000 (1U<<10) /* 96000Hz */ +#define SNDRV_PCM_RATE_176400 (1U<<11) /* 176400Hz */ +#define SNDRV_PCM_RATE_192000 (1U<<12) /* 192000Hz */ +#define SNDRV_PCM_RATE_352800 (1U<<13) /* 352800Hz */ +#define SNDRV_PCM_RATE_384000 (1U<<14) /* 384000Hz */ +#define SNDRV_PCM_RATE_705600 (1U<<15) /* 705600Hz */ +#define SNDRV_PCM_RATE_768000 (1U<<16) /* 768000Hz */ +/* extended rates since 6.12 */ +#define SNDRV_PCM_RATE_12000 (1U<<17) /* 12000Hz */ +#define SNDRV_PCM_RATE_24000 (1U<<18) /* 24000Hz */ +#define SNDRV_PCM_RATE_128000 (1U<<19) /* 128000Hz */ #define SNDRV_PCM_RATE_CONTINUOUS (1U<<30) /* continuous range */ #define SNDRV_PCM_RATE_KNOT (1U<<31) /* supports more non-continuous rates */ -- cgit v1.2.3 From 01ced022e125f7b328335bb2944a107afcafe351 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 3 Sep 2024 15:32:16 -0500 Subject: ACPI: CPPC: Adjust return code for inline functions in !CONFIG_ACPI_CPPC_LIB Checkpath emits the following warning: ``` WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP ``` Adjust the code accordingly. Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- include/acpi/cppc_acpi.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 930b6afba6f4..409a7190a4a8 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -162,31 +162,31 @@ extern int cppc_set_auto_sel(int cpu, bool enable); #else /* !CONFIG_ACPI_CPPC_LIB */ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_set_enable(int cpu, bool enable) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline bool cppc_perf_ctrs_in_pcc(void) { @@ -210,27 +210,27 @@ static inline bool cpc_ffh_supported(void) } static inline int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_get_epp_perf(int cpunum, u64 *epp_perf) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_set_auto_sel(int cpu, bool enable) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps) { - return -ENOTSUPP; + return -EOPNOTSUPP; } #endif /* !CONFIG_ACPI_CPPC_LIB */ -- cgit v1.2.3 From 6c09e3b445a1a647a5b57ea6afd23e846225dd8f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 26 Aug 2024 16:13:52 -0500 Subject: x86/amd: Rename amd_get_highest_perf() to amd_get_boost_ratio_numerator() The function name is ambiguous because it returns an intermediate value for calculating maximum frequency rather than the CPPC 'Highest Perf' register. Rename the function to clarify its use and allow the function to return errors. Adjust the consumer in acpi-cpufreq to catch errors. Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- include/acpi/cppc_acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 409a7190a4a8..97861abc5f5b 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -159,6 +159,7 @@ extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf); extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable); extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps); extern int cppc_set_auto_sel(int cpu, bool enable); +extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator); #else /* !CONFIG_ACPI_CPPC_LIB */ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) { @@ -232,6 +233,10 @@ static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf { return -EOPNOTSUPP; } +static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) +{ + return -EOPNOTSUPP; +} #endif /* !CONFIG_ACPI_CPPC_LIB */ #endif /* _CPPC_ACPI_H*/ -- cgit v1.2.3 From 2819bfef6483c66c55064ca678f2630a1a09f3f9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 26 Aug 2024 16:13:54 -0500 Subject: x86/amd: Move amd_get_highest_perf() out of amd-pstate amd_pstate_get_highest_perf() is a helper used to get the highest perf value on AMD systems. It's used in amd-pstate as part of preferred core handling, but applicable for acpi-cpufreq as well. Move it out to cppc handling code as amd_get_highest_perf(). Reviewed-by: Perry Yuan Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- include/acpi/cppc_acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 97861abc5f5b..f7c7abf2a95e 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -159,6 +159,7 @@ extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf); extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable); extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps); extern int cppc_set_auto_sel(int cpu, bool enable); +extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf); extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator); #else /* !CONFIG_ACPI_CPPC_LIB */ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) @@ -233,6 +234,10 @@ static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf { return -EOPNOTSUPP; } +static inline int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf) +{ + return -ENODEV; +} static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) { return -EOPNOTSUPP; -- cgit v1.2.3 From 279f838a61f96cbfeb1f9ba060e4a452e6e041d0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 26 Aug 2024 16:13:55 -0500 Subject: x86/amd: Detect preferred cores in amd_get_boost_ratio_numerator() AMD systems that support preferred cores will use "166" as their numerator for max frequency calculations instead of "255". Add a function for detecting preferred cores by looking at the highest perf value on all cores. If preferred cores are enabled return 166 and if disabled the value in the highest perf register. As the function will be called multiple times, cache the values for the boost numerator and if preferred cores will be enabled in global variables. Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- include/acpi/cppc_acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index f7c7abf2a95e..482e0587a041 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -161,6 +161,7 @@ extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps); extern int cppc_set_auto_sel(int cpu, bool enable); extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf); extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator); +extern int amd_detect_prefcore(bool *detected); #else /* !CONFIG_ACPI_CPPC_LIB */ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) { @@ -242,6 +243,10 @@ static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator { return -EOPNOTSUPP; } +static inline int amd_detect_prefcore(bool *detected) +{ + return -ENODEV; +} #endif /* !CONFIG_ACPI_CPPC_LIB */ #endif /* _CPPC_ACPI_H*/ -- cgit v1.2.3 From edd3f6f7588c713477e1299c38c84dcd91a7f148 Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Wed, 11 Sep 2024 11:37:17 +0800 Subject: tcp: Use skb__nullable in trace_tcp_send_reset Replace skb with skb__nullable as the argument name. The suffix tells bpf verifier through btf that the arg could be NULL and should be checked in tp_btf prog. For now, this is the only nullable argument in tcp tracepoints. Signed-off-by: Philo Lu Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20240911033719.91468-4-lulie@linux.alibaba.com Signed-off-by: Martin KaFai Lau --- include/trace/events/tcp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 1c8bd8e186b8..a27c4b619dff 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -91,10 +91,10 @@ DEFINE_RST_REASON(FN, FN) TRACE_EVENT(tcp_send_reset, TP_PROTO(const struct sock *sk, - const struct sk_buff *skb, + const struct sk_buff *skb__nullable, const enum sk_rst_reason reason), - TP_ARGS(sk, skb, reason), + TP_ARGS(sk, skb__nullable, reason), TP_STRUCT__entry( __field(const void *, skbaddr) @@ -106,7 +106,7 @@ TRACE_EVENT(tcp_send_reset, ), TP_fast_assign( - __entry->skbaddr = skb; + __entry->skbaddr = skb__nullable; __entry->skaddr = sk; /* Zero means unknown state. */ __entry->state = sk ? sk->sk_state : 0; @@ -118,13 +118,13 @@ TRACE_EVENT(tcp_send_reset, const struct inet_sock *inet = inet_sk(sk); TP_STORE_ADDR_PORTS(__entry, inet, sk); - } else if (skb) { - const struct tcphdr *th = (const struct tcphdr *)skb->data; + } else if (skb__nullable) { + const struct tcphdr *th = (const struct tcphdr *)skb__nullable->data; /* * We should reverse the 4-tuple of skb, so later * it can print the right flow direction of rst. */ - TP_STORE_ADDR_PORTS_SKB(skb, th, entry->daddr, entry->saddr); + TP_STORE_ADDR_PORTS_SKB(skb__nullable, th, entry->daddr, entry->saddr); } __entry->reason = reason; ), -- cgit v1.2.3 From 6746ee4c3a189f8b60694f01e7e29bc5ff7972e0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 11 Sep 2024 17:34:37 +0100 Subject: io_uring/cmd: expose iowq to cmds When an io_uring request needs blocking context we offload it to the io_uring's thread pool called io-wq. We can get there off ->uring_cmd by returning -EAGAIN, but there is no straightforward way of doing that from an asynchronous callback. Add a helper that would transfer a command to a blocking context. Note, we do an extra hop via task_work before io_queue_iowq(), that's a limitation of io_uring infra we have that can likely be lifted later if that would ever become a problem. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/f735f807d7c8ba50c9452c69dfe5d3e9e535037b.1726072086.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 447fbfd32215..86ceb3383e49 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -48,6 +48,9 @@ void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, unsigned int issue_flags); +/* Execute the request from a blocking context */ +void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd); + #else static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd) @@ -67,6 +70,9 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, unsigned int issue_flags) { } +static inline void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd) +{ +} #endif /* -- cgit v1.2.3 From a6ccb48e13662bcb98282e051512b9686b02d353 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 11 Sep 2024 17:34:38 +0100 Subject: io_uring/cmd: give inline space in request to cmds Some io_uring commands can use some inline space in io_kiocb. We have 32 bytes in struct io_uring_cmd, expose it. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/7ca779a61ee5e166e535d70df9c7f07b15d8a0ce.1726072086.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 86ceb3383e49..c189d36ad55e 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -23,6 +23,15 @@ static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) return sqe->cmd; } +static inline void io_uring_cmd_private_sz_check(size_t cmd_sz) +{ + BUILD_BUG_ON(cmd_sz > sizeof_field(struct io_uring_cmd, pdu)); +} +#define io_uring_cmd_to_pdu(cmd, pdu_type) ( \ + io_uring_cmd_private_sz_check(sizeof(pdu_type)), \ + ((pdu_type *)&(cmd)->pdu) \ +) + #if defined(CONFIG_IO_URING) int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd); -- cgit v1.2.3 From a12c883a0a6a005cfb3ad01feaf783e2248bfc3e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 11 Sep 2024 17:34:39 +0100 Subject: filemap: introduce filemap_invalidate_pages kiocb_invalidate_pages() is useful for the write path, however not everything is backed by kiocb and we want to reuse the function for bio based discard implementation. Extract and and reuse a new helper called filemap_invalidate_pages(), which takes a argument indicating whether it should be non-blocking and might return -EAGAIN. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/f81374b52c92d0dce0f01a279d1eed42b54056aa.1726072086.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/pagemap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d9c7edb6422b..e39c3a7ce33c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -32,6 +32,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end); int kiocb_invalidate_pages(struct kiocb *iocb, size_t count); void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count); +int filemap_invalidate_pages(struct address_space *mapping, + loff_t pos, loff_t end, bool nowait); int write_inode_now(struct inode *, int sync); int filemap_fdatawrite(struct address_space *); -- cgit v1.2.3 From 50c52250e2d74b098465841163c18f4b4e9ad430 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 11 Sep 2024 17:34:41 +0100 Subject: block: implement async io_uring discard cmd io_uring allows implementing custom file specific asynchronous operations via the fops->uring_cmd callback, a.k.a. IORING_OP_URING_CMD requests or just io_uring commands. Use it to add support for async discards. Normally, it first tries to queue up bios in a non-blocking context, and if that fails, we'd retry from a blocking context by returning -EAGAIN to the core io_uring. We always get the result from bios asynchronously by setting a custom bi_end_io callback, at which point we drag the request into the task context to either reissue or complete it and post a completion to the user. Unlike ioctl(BLKDISCARD) with stronger guarantees against races, we only do a best effort attempt to invalidate page cache, and it can race with any writes and reads and leave page cache stale. It's the same kind of races we allow to direct writes. Also, apart from cases where discarding is not allowed at all, e.g. discards are not supported or the file/device is read only, the user should assume that the sector range on disk is not valid anymore, even when an error was returned to the user. Suggested-by: Conrad Meyer Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/2b5210443e4fa0257934f73dfafcc18a77cd0e09.1726072086.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/blkdev.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/uapi/linux/blkdev.h (limited to 'include') diff --git a/include/uapi/linux/blkdev.h b/include/uapi/linux/blkdev.h new file mode 100644 index 000000000000..66373cd1a83a --- /dev/null +++ b/include/uapi/linux/blkdev.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_BLKDEV_H +#define _UAPI_LINUX_BLKDEV_H + +#include +#include + +/* + * io_uring block file commands, see IORING_OP_URING_CMD. + * It's a different number space from ioctl(), reuse the block's code 0x12. + */ +#define BLOCK_URING_CMD_DISCARD _IO(0x12, 0) + +#endif -- cgit v1.2.3 From 45b8fc3096542a53bfd245a9ad8ef870384b4897 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 29 Aug 2024 10:42:27 -0700 Subject: lib/buildid: rename build_id_parse() into build_id_parse_nofault() Make it clear that build_id_parse() assumes that it can take no page fault by renaming it and current few users to build_id_parse_nofault(). Also add build_id_parse() stub which for now falls back to non-sleepable implementation, but will be changed in subsequent patches to take advantage of sleepable context. PROCMAP_QUERY ioctl() on /proc//maps file is using build_id_parse() and will automatically take advantage of more reliable sleepable context implementation. Reviewed-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240829174232.3133883-6-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/buildid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/buildid.h b/include/linux/buildid.h index 20aa3c2d89f7..014a88c41073 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -7,8 +7,8 @@ #define BUILD_ID_SIZE_MAX 20 struct vm_area_struct; -int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, - __u32 *size); +int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); +int build_id_parse_nofault(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size); #if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO) -- cgit v1.2.3 From d4dd9775ec242425576af93daadb80a34083a53c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 29 Aug 2024 10:42:31 -0700 Subject: bpf: wire up sleepable bpf_get_stack() and bpf_get_task_stack() helpers Add sleepable implementations of bpf_get_stack() and bpf_get_task_stack() helpers and allow them to be used from sleepable BPF program (e.g., sleepable uprobes). Note, the stack trace IPs capturing itself is not sleepable (that would need to be a separate project), only build ID fetching is sleepable and thus more reliable, as it will wait for data to be paged in, if necessary. For that we make use of sleepable build_id_parse() implementation. Now that build ID related internals in kernel/bpf/stackmap.c can be used both in sleepable and non-sleepable contexts, we need to add additional rcu_read_lock()/rcu_read_unlock() protection around fetching perf_callchain_entry, but with the refactoring in previous commit it's now pretty straightforward. We make sure to do rcu_read_unlock (in sleepable mode only) right before stack_map_get_build_id_offset() call which can sleep. By that time we don't have any more use of perf_callchain_entry. Note, bpf_get_task_stack() will fail for user mode if task != current. And for kernel mode build ID are irrelevant. So in that sense adding sleepable bpf_get_task_stack() implementation is a no-op. It feel right to wire this up for symmetry and completeness, but I'm open to just dropping it until we support `user && crosstask` condition. Reviewed-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240829174232.3133883-10-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6f87fb014fba..7f3e8477d5e7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3200,7 +3200,9 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; extern const struct bpf_func_proto bpf_get_stack_proto; +extern const struct bpf_func_proto bpf_get_stack_sleepable_proto; extern const struct bpf_func_proto bpf_get_task_stack_proto; +extern const struct bpf_func_proto bpf_get_task_stack_sleepable_proto; extern const struct bpf_func_proto bpf_get_stackid_proto_pe; extern const struct bpf_func_proto bpf_get_stack_proto_pe; extern const struct bpf_func_proto bpf_sock_map_update_proto; -- cgit v1.2.3 From 6982826fe5e53ef115836de7dd397bd970030937 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 9 Sep 2024 22:09:22 +0200 Subject: mptcp: fallback to TCP after SYN+MPC drops Some middleboxes might be nasty with MPTCP, and decide to drop packets with MPTCP options, instead of just dropping the MPTCP options (or letting them pass...). In this case, it sounds better to fallback to "plain" TCP after 2 retransmissions, and try again. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/477 Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240909-net-next-mptcp-fallback-x-mpc-v1-2-da7ebb4cd2a3@kernel.org Signed-off-by: Jakub Kicinski --- include/net/mptcp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 0bc4ab03f487..814b5f2e3ed5 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -223,6 +223,8 @@ static inline __be32 mptcp_reset_option(const struct sk_buff *skb) return htonl(0u); } + +void mptcp_active_detect_blackhole(struct sock *sk, bool expired); #else static inline void mptcp_init(void) @@ -307,6 +309,8 @@ static inline struct request_sock *mptcp_subflow_reqsk_alloc(const struct reques } static inline __be32 mptcp_reset_option(const struct sk_buff *skb) { return htonl(0u); } + +static inline void mptcp_active_detect_blackhole(struct sock *sk, bool expired) { } #endif /* CONFIG_MPTCP */ #if IS_ENABLED(CONFIG_MPTCP_IPV6) -- cgit v1.2.3 From 6513eb3d3191574b58859ef2d6dc26c0277c6f81 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 10 Sep 2024 17:35:35 -0400 Subject: net: tighten bad gso csum offset check in virtio_net_hdr The referenced commit drops bad input, but has false positives. Tighten the check to avoid these. The check detects illegal checksum offload requests, which produce csum_start/csum_off beyond end of packet after segmentation. But it is based on two incorrect assumptions: 1. virtio_net_hdr_to_skb with VIRTIO_NET_HDR_GSO_TCP[46] implies GSO. True in callers that inject into the tx path, such as tap. But false in callers that inject into rx, like virtio-net. Here, the flags indicate GRO, and CHECKSUM_UNNECESSARY or CHECKSUM_NONE without VIRTIO_NET_HDR_F_NEEDS_CSUM is normal. 2. TSO requires checksum offload, i.e., ip_summed == CHECKSUM_PARTIAL. False, as tcp[46]_gso_segment will fix up csum_start and offset for all other ip_summed by calling __tcp_v4_send_check. Because of 2, we can limit the scope of the fix to virtio_net_hdr that do try to set these fields, with a bogus value. Link: https://lore.kernel.org/netdev/20240909094527.GA3048202@port70.net/ Fixes: 89add40066f9 ("net: drop bad gso csum_start and offset in virtio_net_hdr") Signed-off-by: Willem de Bruijn Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20240910213553.839926-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/virtio_net.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 6c395a2600e8..276ca543ef44 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -173,7 +173,8 @@ retry: break; case SKB_GSO_TCPV4: case SKB_GSO_TCPV6: - if (skb->csum_offset != offsetof(struct tcphdr, check)) + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb->csum_offset != offsetof(struct tcphdr, check)) return -EINVAL; break; } -- cgit v1.2.3 From 7c88f86576f382a5037f7acf2fce796ccafba4db Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:45 +0000 Subject: netdev: add netdev_rx_queue_restart() Add netdev_rx_queue_restart(), which resets an rx queue using the queue API recently merged[1]. The queue API was merged to enable the core net stack to reset individual rx queues to actuate changes in the rx queue's configuration. In later patches in this series, we will use netdev_rx_queue_restart() to reset rx queues after binding or unbinding dmabuf configuration, which will cause reallocation of the page_pool to repopulate its memory using the new configuration. [1] https://lore.kernel.org/netdev/20240430231420.699177-1-shailend@google.com/T/ Signed-off-by: David Wei Signed-off-by: Mina Almasry Reviewed-by: Pavel Begunkov Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20240910171458.219195-2-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/net/netdev_rx_queue.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index aa1716fb0e53..e78ca52d67fb 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -54,4 +54,7 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue) return index; } #endif + +int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq); + #endif -- cgit v1.2.3 From 3efd7ab46d0aebc3e567a9846e79a98dbad3291c Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:46 +0000 Subject: net: netdev netlink api to bind dma-buf to a net device API takes the dma-buf fd as input, and binds it to the netdevice. The user can specify the rx queues to bind the dma-buf to. Suggested-by: Stanislav Fomichev Signed-off-by: Mina Almasry Reviewed-by: Donald Hunter Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20240910171458.219195-3-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 43742ac5b00d..91bf3ecc5f1d 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -173,6 +173,16 @@ enum { NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) }; +enum { + NETDEV_A_DMABUF_IFINDEX = 1, + NETDEV_A_DMABUF_QUEUES, + NETDEV_A_DMABUF_FD, + NETDEV_A_DMABUF_ID, + + __NETDEV_A_DMABUF_MAX, + NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -186,6 +196,7 @@ enum { NETDEV_CMD_QUEUE_GET, NETDEV_CMD_NAPI_GET, NETDEV_CMD_QSTATS_GET, + NETDEV_CMD_BIND_RX, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3 From 170aafe35cb98e0f3fbacb446ea86389fbce22ea Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:47 +0000 Subject: netdev: support binding dma-buf to netdevice Add a netdev_dmabuf_binding struct which represents the dma-buf-to-netdevice binding. The netlink API will bind the dma-buf to rx queues on the netdevice. On the binding, the dma_buf_attach & dma_buf_map_attachment will occur. The entries in the sg_table from mapping will be inserted into a genpool to make it ready for allocation. The chunks in the genpool are owned by a dmabuf_chunk_owner struct which holds the dma-buf offset of the base of the chunk and the dma_addr of the chunk. Both are needed to use allocations that come from this chunk. We create a new type that represents an allocation from the genpool: net_iov. We setup the net_iov allocation size in the genpool to PAGE_SIZE for simplicity: to match the PAGE_SIZE normally allocated by the page pool and given to the drivers. The user can unbind the dmabuf from the netdevice by closing the netlink socket that established the binding. We do this so that the binding is automatically unbound even if the userspace process crashes. The binding and unbinding leaves an indicator in struct netdev_rx_queue that the given queue is bound, and the binding is actuated by resetting the rx queue using the queue API. The netdev_dmabuf_binding struct is refcounted, and releases its resources only when all the refs are released. Signed-off-by: Willem de Bruijn Signed-off-by: Kaiyuan Zhang Signed-off-by: Mina Almasry Reviewed-by: Pavel Begunkov # excluding netlink Acked-by: Daniel Vetter Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20240910171458.219195-4-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 ++ include/net/netdev_rx_queue.h | 2 ++ include/net/netmem.h | 8 ++++++++ include/net/page_pool/types.h | 6 ++++++ 4 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2465bdb6037f..e87b5e488325 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3953,6 +3953,8 @@ u8 dev_xdp_prog_count(struct net_device *dev); int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); +u32 dev_get_min_mp_channel_count(const struct net_device *dev); + int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb); diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index e78ca52d67fb..ac34f5fb4f71 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -6,6 +6,7 @@ #include #include #include +#include /* This structure contains an instance of an RX queue. */ struct netdev_rx_queue { @@ -25,6 +26,7 @@ struct netdev_rx_queue { * Readers and writers must hold RTNL */ struct napi_struct *napi; + struct pp_memory_provider_params mp_params; } ____cacheline_aligned_in_smp; /* diff --git a/include/net/netmem.h b/include/net/netmem.h index 46cc9b89ac79..c23e224dd6a0 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -8,6 +8,14 @@ #ifndef _NET_NETMEM_H #define _NET_NETMEM_H +/* net_iov */ + +struct net_iov { + struct dmabuf_genpool_chunk_owner *owner; +}; + +/* netmem */ + /** * typedef netmem_ref - a nonexistent type marking a reference to generic * network memory. diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 50569fed7868..4afd6dd56351 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -139,6 +139,10 @@ struct page_pool_stats { */ #define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long)) +struct pp_memory_provider_params { + void *mp_priv; +}; + struct page_pool { struct page_pool_params_fast p; @@ -197,6 +201,8 @@ struct page_pool { */ struct ptr_ring ring; + void *mp_priv; + #ifdef CONFIG_PAGE_POOL_STATS /* recycle stats are per-cpu to avoid locking */ struct page_pool_recycle_stats __percpu *recycle_stats; -- cgit v1.2.3 From 8ab79ed50cf10f338465c296012500de1081646f Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:49 +0000 Subject: page_pool: devmem support Convert netmem to be a union of struct page and struct netmem. Overload the LSB of struct netmem* to indicate that it's a net_iov, otherwise it's a page. Currently these entries in struct page are rented by the page_pool and used exclusively by the net stack: struct { unsigned long pp_magic; struct page_pool *pp; unsigned long _pp_mapping_pad; unsigned long dma_addr; atomic_long_t pp_ref_count; }; Mirror these (and only these) entries into struct net_iov and implement netmem helpers that can access these common fields regardless of whether the underlying type is page or net_iov. Implement checks for net_iov in netmem helpers which delegate to mm APIs, to ensure net_iov are never passed to the mm stack. Signed-off-by: Mina Almasry Reviewed-by: Pavel Begunkov Acked-by: Jakub Kicinski Link: https://patch.msgid.link/20240910171458.219195-6-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/net/netmem.h | 124 ++++++++++++++++++++++++++++++++++++--- include/net/page_pool/helpers.h | 39 +++--------- include/trace/events/page_pool.h | 12 ++-- 3 files changed, 130 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/net/netmem.h b/include/net/netmem.h index c23e224dd6a0..8a6e20be4b9d 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -8,12 +8,52 @@ #ifndef _NET_NETMEM_H #define _NET_NETMEM_H +#include +#include + /* net_iov */ +DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers); + +/* We overload the LSB of the struct page pointer to indicate whether it's + * a page or net_iov. + */ +#define NET_IOV 0x01UL + struct net_iov { + unsigned long __unused_padding; + unsigned long pp_magic; + struct page_pool *pp; struct dmabuf_genpool_chunk_owner *owner; + unsigned long dma_addr; + atomic_long_t pp_ref_count; }; +/* These fields in struct page are used by the page_pool and net stack: + * + * struct { + * unsigned long pp_magic; + * struct page_pool *pp; + * unsigned long _pp_mapping_pad; + * unsigned long dma_addr; + * atomic_long_t pp_ref_count; + * }; + * + * We mirror the page_pool fields here so the page_pool can access these fields + * without worrying whether the underlying fields belong to a page or net_iov. + * + * The non-net stack fields of struct page are private to the mm stack and must + * never be mirrored to net_iov. + */ +#define NET_IOV_ASSERT_OFFSET(pg, iov) \ + static_assert(offsetof(struct page, pg) == \ + offsetof(struct net_iov, iov)) +NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic); +NET_IOV_ASSERT_OFFSET(pp, pp); +NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr); +NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count); +#undef NET_IOV_ASSERT_OFFSET + /* netmem */ /** @@ -27,20 +67,37 @@ struct net_iov { */ typedef unsigned long __bitwise netmem_ref; +static inline bool netmem_is_net_iov(const netmem_ref netmem) +{ + return (__force unsigned long)netmem & NET_IOV; +} + /* This conversion fails (returns NULL) if the netmem_ref is not struct page * backed. - * - * Currently struct page is the only possible netmem, and this helper never - * fails. */ static inline struct page *netmem_to_page(netmem_ref netmem) { + if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) + return NULL; + return (__force struct page *)netmem; } -/* Converting from page to netmem is always safe, because a page can always be - * a netmem. - */ +static inline struct net_iov *netmem_to_net_iov(netmem_ref netmem) +{ + if (netmem_is_net_iov(netmem)) + return (struct net_iov *)((__force unsigned long)netmem & + ~NET_IOV); + + DEBUG_NET_WARN_ON_ONCE(true); + return NULL; +} + +static inline netmem_ref net_iov_to_netmem(struct net_iov *niov) +{ + return (__force netmem_ref)((unsigned long)niov | NET_IOV); +} + static inline netmem_ref page_to_netmem(struct page *page) { return (__force netmem_ref)page; @@ -48,17 +105,70 @@ static inline netmem_ref page_to_netmem(struct page *page) static inline int netmem_ref_count(netmem_ref netmem) { + /* The non-pp refcount of net_iov is always 1. On net_iov, we only + * support pp refcounting which uses the pp_ref_count field. + */ + if (netmem_is_net_iov(netmem)) + return 1; + return page_ref_count(netmem_to_page(netmem)); } -static inline unsigned long netmem_to_pfn(netmem_ref netmem) +static inline unsigned long netmem_pfn_trace(netmem_ref netmem) { + if (netmem_is_net_iov(netmem)) + return 0; + return page_to_pfn(netmem_to_page(netmem)); } +static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem) +{ + return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV); +} + +static inline struct page_pool *netmem_get_pp(netmem_ref netmem) +{ + return __netmem_clear_lsb(netmem)->pp; +} + +static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem) +{ + return &__netmem_clear_lsb(netmem)->pp_ref_count; +} + +static inline bool netmem_is_pref_nid(netmem_ref netmem, int pref_nid) +{ + /* NUMA node preference only makes sense if we're allocating + * system memory. Memory providers (which give us net_iovs) + * choose for us. + */ + if (netmem_is_net_iov(netmem)) + return true; + + return page_to_nid(netmem_to_page(netmem)) == pref_nid; +} + static inline netmem_ref netmem_compound_head(netmem_ref netmem) { + /* niov are never compounded */ + if (netmem_is_net_iov(netmem)) + return netmem; + return page_to_netmem(compound_head(netmem_to_page(netmem))); } +static inline void *netmem_address(netmem_ref netmem) +{ + if (netmem_is_net_iov(netmem)) + return NULL; + + return page_address(netmem_to_page(netmem)); +} + +static inline unsigned long netmem_get_dma_addr(netmem_ref netmem) +{ + return __netmem_clear_lsb(netmem)->dma_addr; +} + #endif /* _NET_NETMEM_H */ diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 2b43a893c619..793e6fd78bc5 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -216,7 +216,7 @@ page_pool_get_dma_dir(const struct page_pool *pool) static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr) { - atomic_long_set(&netmem_to_page(netmem)->pp_ref_count, nr); + atomic_long_set(netmem_get_pp_ref_count_ref(netmem), nr); } /** @@ -244,7 +244,7 @@ static inline void page_pool_fragment_page(struct page *page, long nr) static inline long page_pool_unref_netmem(netmem_ref netmem, long nr) { - struct page *page = netmem_to_page(netmem); + atomic_long_t *pp_ref_count = netmem_get_pp_ref_count_ref(netmem); long ret; /* If nr == pp_ref_count then we have cleared all remaining @@ -261,19 +261,19 @@ static inline long page_pool_unref_netmem(netmem_ref netmem, long nr) * initially, and only overwrite it when the page is partitioned into * more than one piece. */ - if (atomic_long_read(&page->pp_ref_count) == nr) { + if (atomic_long_read(pp_ref_count) == nr) { /* As we have ensured nr is always one for constant case using * the BUILD_BUG_ON(), only need to handle the non-constant case * here for pp_ref_count draining, which is a rare case. */ BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1); if (!__builtin_constant_p(nr)) - atomic_long_set(&page->pp_ref_count, 1); + atomic_long_set(pp_ref_count, 1); return 0; } - ret = atomic_long_sub_return(nr, &page->pp_ref_count); + ret = atomic_long_sub_return(nr, pp_ref_count); WARN_ON(ret < 0); /* We are the last user here too, reset pp_ref_count back to 1 to @@ -282,7 +282,7 @@ static inline long page_pool_unref_netmem(netmem_ref netmem, long nr) * page_pool_unref_page() currently. */ if (unlikely(!ret)) - atomic_long_set(&page->pp_ref_count, 1); + atomic_long_set(pp_ref_count, 1); return ret; } @@ -401,9 +401,7 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va, static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem) { - struct page *page = netmem_to_page(netmem); - - dma_addr_t ret = page->dma_addr; + dma_addr_t ret = netmem_get_dma_addr(netmem); if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) ret <<= PAGE_SHIFT; @@ -423,24 +421,6 @@ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page)); } -static inline bool page_pool_set_dma_addr_netmem(netmem_ref netmem, - dma_addr_t addr) -{ - struct page *page = netmem_to_page(netmem); - - if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { - page->dma_addr = addr >> PAGE_SHIFT; - - /* We assume page alignment to shave off bottom bits, - * if this "compression" doesn't work we need to drop. - */ - return addr != (dma_addr_t)page->dma_addr << PAGE_SHIFT; - } - - page->dma_addr = addr; - return false; -} - /** * page_pool_dma_sync_for_cpu - sync Rx page for CPU after it's written by HW * @pool: &page_pool the @page belongs to @@ -463,11 +443,6 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, page_pool_get_dma_dir(pool)); } -static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) -{ - return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr); -} - static inline bool page_pool_put(struct page_pool *pool) { return refcount_dec_and_test(&pool->user_cnt); diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h index 543e54e432a1..31825ed30032 100644 --- a/include/trace/events/page_pool.h +++ b/include/trace/events/page_pool.h @@ -57,12 +57,12 @@ TRACE_EVENT(page_pool_state_release, __entry->pool = pool; __entry->netmem = (__force unsigned long)netmem; __entry->release = release; - __entry->pfn = netmem_to_pfn(netmem); + __entry->pfn = netmem_pfn_trace(netmem); ), - TP_printk("page_pool=%p netmem=%p pfn=0x%lx release=%u", + TP_printk("page_pool=%p netmem=%p is_net_iov=%lu pfn=0x%lx release=%u", __entry->pool, (void *)__entry->netmem, - __entry->pfn, __entry->release) + __entry->netmem & NET_IOV, __entry->pfn, __entry->release) ); TRACE_EVENT(page_pool_state_hold, @@ -83,12 +83,12 @@ TRACE_EVENT(page_pool_state_hold, __entry->pool = pool; __entry->netmem = (__force unsigned long)netmem; __entry->hold = hold; - __entry->pfn = netmem_to_pfn(netmem); + __entry->pfn = netmem_pfn_trace(netmem); ), - TP_printk("page_pool=%p netmem=%p pfn=0x%lx hold=%u", + TP_printk("page_pool=%p netmem=%p is_net_iov=%lu, pfn=0x%lx hold=%u", __entry->pool, (void *)__entry->netmem, - __entry->pfn, __entry->hold) + __entry->netmem & NET_IOV, __entry->pfn, __entry->hold) ); TRACE_EVENT(page_pool_update_nid, -- cgit v1.2.3 From 0f921404689398943257793f7240db239a23b609 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:50 +0000 Subject: memory-provider: dmabuf devmem memory provider Implement a memory provider that allocates dmabuf devmem in the form of net_iov. The provider receives a reference to the struct netdev_dmabuf_binding via the pool->mp_priv pointer. The driver needs to set this pointer for the provider in the net_iov. The provider obtains a reference on the netdev_dmabuf_binding which guarantees the binding and the underlying mapping remains alive until the provider is destroyed. Usage of PP_FLAG_DMA_MAP is required for this memory provide such that the page_pool can provide the driver with the dma-addrs of the devmem. Support for PP_FLAG_DMA_SYNC_DEV is omitted for simplicity & p.order != 0. Signed-off-by: Willem de Bruijn Signed-off-by: Kaiyuan Zhang Signed-off-by: Mina Almasry Reviewed-by: Pavel Begunkov Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20240910171458.219195-7-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 4afd6dd56351..c022c410abe3 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -20,8 +20,18 @@ * device driver responsibility */ #define PP_FLAG_SYSTEM_POOL BIT(2) /* Global system page_pool */ + +/* Allow unreadable (net_iov backed) netmem in this page_pool. Drivers setting + * this must be able to support unreadable netmem, where netmem_address() would + * return NULL. This flag should not be set for header page_pools. + * + * If the driver sets PP_FLAG_ALLOW_UNREADABLE_NETMEM, it should also set + * page_pool_params.slow.queue_idx. + */ +#define PP_FLAG_ALLOW_UNREADABLE_NETMEM BIT(3) + #define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \ - PP_FLAG_SYSTEM_POOL) + PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM) /* * Fast allocation side cache array/stack @@ -57,7 +67,9 @@ struct pp_alloc_cache { * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV * @slow: params with slowpath access only (initialization and Netlink) * @netdev: netdev this pool will serve (leave as NULL if none or multiple) - * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL + * @queue_idx: queue idx this page_pool is being created for. + * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL, + * PP_FLAG_ALLOW_UNREADABLE_NETMEM. */ struct page_pool_params { struct_group_tagged(page_pool_params_fast, fast, @@ -72,6 +84,7 @@ struct page_pool_params { ); struct_group_tagged(page_pool_params_slow, slow, struct net_device *netdev; + unsigned int queue_idx; unsigned int flags; /* private: used by test code only */ void (*init_callback)(netmem_ref netmem, void *arg); -- cgit v1.2.3 From 9f6b619edf2e85746f261b42ae8f818a59d126f7 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:51 +0000 Subject: net: support non paged skb frags Make skb_frag_page() fail in the case where the frag is not backed by a page, and fix its relevant callers to handle this case. Signed-off-by: Mina Almasry Reviewed-by: Eric Dumazet Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20240910171458.219195-8-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 42 +++++++++++++++++++++++++++++++++++++++++- include/linux/skbuff_ref.h | 9 ++++----- 2 files changed, 45 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5803eb8a157d..1019f14583c9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3524,21 +3524,58 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto, fragto->offset = fragfrom->offset; } +/* Return: true if the skb_frag contains a net_iov. */ +static inline bool skb_frag_is_net_iov(const skb_frag_t *frag) +{ + return netmem_is_net_iov(frag->netmem); +} + +/** + * skb_frag_net_iov - retrieve the net_iov referred to by fragment + * @frag: the fragment + * + * Return: the &struct net_iov associated with @frag. Returns NULL if this + * frag has no associated net_iov. + */ +static inline struct net_iov *skb_frag_net_iov(const skb_frag_t *frag) +{ + if (!skb_frag_is_net_iov(frag)) + return NULL; + + return netmem_to_net_iov(frag->netmem); +} + /** * skb_frag_page - retrieve the page referred to by a paged fragment * @frag: the paged fragment * - * Returns the &struct page associated with @frag. + * Return: the &struct page associated with @frag. Returns NULL if this frag + * has no associated page. */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { + if (skb_frag_is_net_iov(frag)) + return NULL; + return netmem_to_page(frag->netmem); } +/** + * skb_frag_netmem - retrieve the netmem referred to by a fragment + * @frag: the fragment + * + * Return: the &netmem_ref associated with @frag. + */ +static inline netmem_ref skb_frag_netmem(const skb_frag_t *frag) +{ + return frag->netmem; +} + int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, struct bpf_prog *prog); + /** * skb_frag_address - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer @@ -3548,6 +3585,9 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, */ static inline void *skb_frag_address(const skb_frag_t *frag) { + if (!skb_frag_page(frag)) + return NULL; + return page_address(skb_frag_page(frag)) + skb_frag_off(frag); } diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h index 16c241a23472..0f3c58007488 100644 --- a/include/linux/skbuff_ref.h +++ b/include/linux/skbuff_ref.h @@ -34,14 +34,13 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) bool napi_pp_put_page(netmem_ref netmem); -static inline void -skb_page_unref(struct page *page, bool recycle) +static inline void skb_page_unref(netmem_ref netmem, bool recycle) { #ifdef CONFIG_PAGE_POOL - if (recycle && napi_pp_put_page(page_to_netmem(page))) + if (recycle && napi_pp_put_page(netmem)) return; #endif - put_page(page); + put_page(netmem_to_page(netmem)); } /** @@ -54,7 +53,7 @@ skb_page_unref(struct page *page, bool recycle) */ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) { - skb_page_unref(skb_frag_page(frag), recycle); + skb_page_unref(skb_frag_netmem(frag), recycle); } /** -- cgit v1.2.3 From 65249feb6b3df9e17bab5911ee56fa7b0971e231 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:52 +0000 Subject: net: add support for skbs with unreadable frags For device memory TCP, we expect the skb headers to be available in host memory for access, and we expect the skb frags to be in device memory and unaccessible to the host. We expect there to be no mixing and matching of device memory frags (unaccessible) with host memory frags (accessible) in the same skb. Add a skb->devmem flag which indicates whether the frags in this skb are device memory frags or not. __skb_fill_netmem_desc() now checks frags added to skbs for net_iov, and marks the skb as skb->devmem accordingly. Add checks through the network stack to avoid accessing the frags of devmem skbs and avoid coalescing devmem skbs with non devmem skbs. Signed-off-by: Willem de Bruijn Signed-off-by: Kaiyuan Zhang Signed-off-by: Mina Almasry Reviewed-by: Eric Dumazet Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20240910171458.219195-9-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 19 +++++++++++++++++-- include/net/tcp.h | 3 ++- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1019f14583c9..39f1d16f3628 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -827,6 +827,8 @@ enum skb_tstamp_type { * @csum_level: indicates the number of consecutive checksums found in * the packet minus one that have been verified as * CHECKSUM_UNNECESSARY (max 3) + * @unreadable: indicates that at least 1 of the fragments in this skb is + * unreadable. * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required @@ -1008,7 +1010,7 @@ struct sk_buff { #if IS_ENABLED(CONFIG_IP_SCTP) __u8 csum_not_inet:1; #endif - + __u8 unreadable:1; #if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS) __u16 tc_index; /* traffic control index */ #endif @@ -1824,6 +1826,12 @@ static inline void skb_zcopy_downgrade_managed(struct sk_buff *skb) __skb_zcopy_downgrade_managed(skb); } +/* Return true if frags in this skb are readable by the host. */ +static inline bool skb_frags_readable(const struct sk_buff *skb) +{ + return !skb->unreadable; +} + static inline void skb_mark_not_on_list(struct sk_buff *skb) { skb->next = NULL; @@ -2540,10 +2548,17 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, netmem_ref netmem, int off, int size) { - struct page *page = netmem_to_page(netmem); + struct page *page; __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size); + if (netmem_is_net_iov(netmem)) { + skb->unreadable = true; + return; + } + + page = netmem_to_page(netmem); + /* Propagate page pfmemalloc to the skb if we can. The problem is * that not all callers have unique ownership of the page but rely * on page_is_pfmemalloc doing the right thing(tm). diff --git a/include/net/tcp.h b/include/net/tcp.h index 2aac11e7e1cc..f77f812bfbe7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1069,7 +1069,8 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to, /* skb_cmp_decrypted() not needed, use tcp_write_collapse_fence() */ return likely(tcp_skb_can_collapse_to(to) && mptcp_skb_can_collapse(to, from) && - skb_pure_zcopy_same(to, from)); + skb_pure_zcopy_same(to, from) && + skb_frags_readable(to) == skb_frags_readable(from)); } static inline bool tcp_skb_can_collapse_rx(const struct sk_buff *to, -- cgit v1.2.3 From 8f0b3cc9a4c102c24808c87f1bc943659d7a7f9f Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:53 +0000 Subject: tcp: RX path for devmem TCP In tcp_recvmsg_locked(), detect if the skb being received by the user is a devmem skb. In this case - if the user provided the MSG_SOCK_DEVMEM flag - pass it to tcp_recvmsg_devmem() for custom handling. tcp_recvmsg_devmem() copies any data in the skb header to the linear buffer, and returns a cmsg to the user indicating the number of bytes returned in the linear buffer. tcp_recvmsg_devmem() then loops over the unaccessible devmem skb frags, and returns to the user a cmsg_devmem indicating the location of the data in the dmabuf device memory. cmsg_devmem contains this information: 1. the offset into the dmabuf where the payload starts. 'frag_offset'. 2. the size of the frag. 'frag_size'. 3. an opaque token 'frag_token' to return to the kernel when the buffer is to be released. The pages awaiting freeing are stored in the newly added sk->sk_user_frags, and each page passed to userspace is get_page()'d. This reference is dropped once the userspace indicates that it is done reading this page. All pages are released when the socket is destroyed. Signed-off-by: Willem de Bruijn Signed-off-by: Kaiyuan Zhang Signed-off-by: Mina Almasry Reviewed-by: Pavel Begunkov Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240910171458.219195-10-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/linux/socket.h | 1 + include/net/sock.h | 2 ++ include/uapi/asm-generic/socket.h | 5 +++++ include/uapi/linux/uio.h | 13 +++++++++++++ 4 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index df9cdb8bbfb8..d18cc47e89bd 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -327,6 +327,7 @@ struct ucred { * plain text and require encryption */ +#define MSG_SOCK_DEVMEM 0x2000000 /* Receive devmem skbs as cmsg */ #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ diff --git a/include/net/sock.h b/include/net/sock.h index f51d61fab059..c58ca8dd561b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -337,6 +337,7 @@ struct sk_filter; * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags * @ns_tracker: tracker for netns reference + * @sk_user_frags: xarray of pages the user is holding a reference on. */ struct sock { /* @@ -542,6 +543,7 @@ struct sock { #endif struct rcu_head sk_rcu; netns_tracker ns_tracker; + struct xarray sk_user_frags; }; struct sock_bh_locked { diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 8ce8a39a1e5f..e993edc9c0ee 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -135,6 +135,11 @@ #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 +#define SO_DEVMEM_LINEAR 78 +#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR +#define SO_DEVMEM_DMABUF 79 +#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/uio.h b/include/uapi/linux/uio.h index 059b1a9147f4..3a22ddae376a 100644 --- a/include/uapi/linux/uio.h +++ b/include/uapi/linux/uio.h @@ -20,6 +20,19 @@ struct iovec __kernel_size_t iov_len; /* Must be size_t (1003.1g) */ }; +struct dmabuf_cmsg { + __u64 frag_offset; /* offset into the dmabuf where the frag starts. + */ + __u32 frag_size; /* size of the frag. */ + __u32 frag_token; /* token representing this frag for + * DEVMEM_DONTNEED. + */ + __u32 dmabuf_id; /* dmabuf id this frag belongs to. */ + __u32 flags; /* Currently unused. Reserved for future + * uses. + */ +}; + /* * UIO_MAXIOV shall be at least 16 1003.1g (5.4.1.1) */ -- cgit v1.2.3 From 678f6e28b5f6fc2316f2c0fed8f8903101f1e128 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:54 +0000 Subject: net: add SO_DEVMEM_DONTNEED setsockopt to release RX frags Add an interface for the user to notify the kernel that it is done reading the devmem dmabuf frags returned as cmsg. The kernel will drop the reference on the frags to make them available for reuse. Signed-off-by: Willem de Bruijn Signed-off-by: Kaiyuan Zhang Signed-off-by: Mina Almasry Reviewed-by: Pavel Begunkov Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240910171458.219195-11-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/uapi/asm-generic/socket.h | 1 + include/uapi/linux/uio.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index e993edc9c0ee..3b4e3e815602 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -139,6 +139,7 @@ #define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR #define SO_DEVMEM_DMABUF 79 #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF +#define SO_DEVMEM_DONTNEED 80 #if !defined(__KERNEL__) diff --git a/include/uapi/linux/uio.h b/include/uapi/linux/uio.h index 3a22ddae376a..649739e0c404 100644 --- a/include/uapi/linux/uio.h +++ b/include/uapi/linux/uio.h @@ -33,6 +33,11 @@ struct dmabuf_cmsg { */ }; +struct dmabuf_token { + __u32 token_start; + __u32 token_count; +}; + /* * UIO_MAXIOV shall be at least 16 1003.1g (5.4.1.1) */ -- cgit v1.2.3 From d0caf9876a1c9f844307effb598ad1312d9e0025 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:57 +0000 Subject: netdev: add dmabuf introspection Add dmabuf information to page_pool stats: $ ./cli.py --spec ../netlink/specs/netdev.yaml --dump page-pool-get ... {'dmabuf': 10, 'id': 456, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, {'dmabuf': 10, 'id': 455, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, {'dmabuf': 10, 'id': 454, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, {'dmabuf': 10, 'id': 453, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, {'dmabuf': 10, 'id': 452, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, {'dmabuf': 10, 'id': 451, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, {'dmabuf': 10, 'id': 450, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, {'dmabuf': 10, 'id': 449, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, And queue stats: $ ./cli.py --spec ../netlink/specs/netdev.yaml --dump queue-get ... {'dmabuf': 10, 'id': 8, 'ifindex': 3, 'type': 'rx'}, {'dmabuf': 10, 'id': 9, 'ifindex': 3, 'type': 'rx'}, {'dmabuf': 10, 'id': 10, 'ifindex': 3, 'type': 'rx'}, {'dmabuf': 10, 'id': 11, 'ifindex': 3, 'type': 'rx'}, {'dmabuf': 10, 'id': 12, 'ifindex': 3, 'type': 'rx'}, {'dmabuf': 10, 'id': 13, 'ifindex': 3, 'type': 'rx'}, {'dmabuf': 10, 'id': 14, 'ifindex': 3, 'type': 'rx'}, {'dmabuf': 10, 'id': 15, 'ifindex': 3, 'type': 'rx'}, Suggested-by: Jakub Kicinski Signed-off-by: Mina Almasry Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20240910171458.219195-14-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 91bf3ecc5f1d..7c308f04e7a0 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -93,6 +93,7 @@ enum { NETDEV_A_PAGE_POOL_INFLIGHT, NETDEV_A_PAGE_POOL_INFLIGHT_MEM, NETDEV_A_PAGE_POOL_DETACH_TIME, + NETDEV_A_PAGE_POOL_DMABUF, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) @@ -131,6 +132,7 @@ enum { NETDEV_A_QUEUE_IFINDEX, NETDEV_A_QUEUE_TYPE, NETDEV_A_QUEUE_NAPI_ID, + NETDEV_A_QUEUE_DMABUF, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) -- cgit v1.2.3 From aa58bec064ab16224645776ce8e0af2fee46136a Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Mon, 9 Sep 2024 13:55:02 +0530 Subject: net: ethernet: oa_tc6: implement register write operation Implement register write operation according to the control communication specified in the OPEN Alliance 10BASE-T1x MACPHY Serial Interface document. Control write commands are used by the SPI host to write registers within the MAC-PHY. Each control write commands are composed of a 32 bits control command header followed by register write data. The MAC-PHY ignores the final 32 bits of data from the SPI host at the end of the control write command. The write command and data is also echoed from the MAC-PHY back to the SPI host to enable the SPI host to identify which register write failed in the case of any bus errors. Control write commands can write either a single register or multiple consecutive registers. When multiple consecutive registers are written, the address is automatically post-incremented by the MAC-PHY. Writing to any unimplemented or undefined registers shall be ignored and yield no effect. Reviewed-by: Andrew Lunn Signed-off-by: Parthiban Veerasooran Link: https://patch.msgid.link/20240909082514.262942-3-Parthiban.Veerasooran@microchip.com Signed-off-by: Jakub Kicinski --- include/linux/oa_tc6.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 include/linux/oa_tc6.h (limited to 'include') diff --git a/include/linux/oa_tc6.h b/include/linux/oa_tc6.h new file mode 100644 index 000000000000..99c490f1c8a8 --- /dev/null +++ b/include/linux/oa_tc6.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * OPEN Alliance 10BASE‑T1x MAC‑PHY Serial Interface framework + * + * Link: https://opensig.org/download/document/OPEN_Alliance_10BASET1x_MAC-PHY_Serial_Interface_V1.1.pdf + * + * Author: Parthiban Veerasooran + */ + +#include + +struct oa_tc6; + +struct oa_tc6 *oa_tc6_init(struct spi_device *spi); +int oa_tc6_write_register(struct oa_tc6 *tc6, u32 address, u32 value); +int oa_tc6_write_registers(struct oa_tc6 *tc6, u32 address, u32 value[], + u8 length); -- cgit v1.2.3 From 375d1e0278cca70ce801d52c6b95c7a3c00f249c Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Mon, 9 Sep 2024 13:55:03 +0530 Subject: net: ethernet: oa_tc6: implement register read operation Implement register read operation according to the control communication specified in the OPEN Alliance 10BASE-T1x MACPHY Serial Interface document. Control read commands are used by the SPI host to read registers within the MAC-PHY. Each control read commands are composed of a 32 bits control command header. The MAC-PHY ignores all data from the SPI host following the control header for the remainder of the control read command. Control read commands can read either a single register or multiple consecutive registers. When multiple consecutive registers are read, the address is automatically post-incremented by the MAC-PHY. Reading any unimplemented or undefined registers shall return zero. Reviewed-by: Andrew Lunn Signed-off-by: Parthiban Veerasooran Link: https://patch.msgid.link/20240909082514.262942-4-Parthiban.Veerasooran@microchip.com Signed-off-by: Jakub Kicinski --- include/linux/oa_tc6.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/oa_tc6.h b/include/linux/oa_tc6.h index 99c490f1c8a8..85aeecf87306 100644 --- a/include/linux/oa_tc6.h +++ b/include/linux/oa_tc6.h @@ -15,3 +15,6 @@ struct oa_tc6 *oa_tc6_init(struct spi_device *spi); int oa_tc6_write_register(struct oa_tc6 *tc6, u32 address, u32 value); int oa_tc6_write_registers(struct oa_tc6 *tc6, u32 address, u32 value[], u8 length); +int oa_tc6_read_register(struct oa_tc6 *tc6, u32 address, u32 *value); +int oa_tc6_read_registers(struct oa_tc6 *tc6, u32 address, u32 value[], + u8 length); -- cgit v1.2.3 From 8f9bf857e43b3f75a098e3af3a6fec2d03203a1e Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Mon, 9 Sep 2024 13:55:06 +0530 Subject: net: ethernet: oa_tc6: implement internal PHY initialization Internal PHY is initialized as per the PHY register capability supported by the MAC-PHY. Direct PHY Register Access Capability indicates if PHY registers are directly accessible within the SPI register memory space. Indirect PHY Register Access Capability indicates if PHY registers are indirectly accessible through the MDIO/MDC registers MDIOACCn defined in OPEN Alliance specification. Currently the direct register access is only supported. Reviewed-by: Andrew Lunn Signed-off-by: Parthiban Veerasooran Link: https://patch.msgid.link/20240909082514.262942-7-Parthiban.Veerasooran@microchip.com Signed-off-by: Jakub Kicinski --- include/linux/oa_tc6.h | 4 +++- include/uapi/linux/mdio.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/oa_tc6.h b/include/linux/oa_tc6.h index 85aeecf87306..606ba9f1e663 100644 --- a/include/linux/oa_tc6.h +++ b/include/linux/oa_tc6.h @@ -7,11 +7,13 @@ * Author: Parthiban Veerasooran */ +#include #include struct oa_tc6; -struct oa_tc6 *oa_tc6_init(struct spi_device *spi); +struct oa_tc6 *oa_tc6_init(struct spi_device *spi, struct net_device *netdev); +void oa_tc6_exit(struct oa_tc6 *tc6); int oa_tc6_write_register(struct oa_tc6 *tc6, u32 address, u32 value); int oa_tc6_write_registers(struct oa_tc6 *tc6, u32 address, u32 value[], u8 length); diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index c0c8ec995b06..f0d3f268240d 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -23,6 +23,7 @@ #define MDIO_MMD_DTEXS 5 /* DTE Extender Sublayer */ #define MDIO_MMD_TC 6 /* Transmission Convergence */ #define MDIO_MMD_AN 7 /* Auto-Negotiation */ +#define MDIO_MMD_POWER_UNIT 13 /* PHY Power Unit */ #define MDIO_MMD_C22EXT 29 /* Clause 22 extension */ #define MDIO_MMD_VEND1 30 /* Vendor specific 1 */ #define MDIO_MMD_VEND2 31 /* Vendor specific 2 */ -- cgit v1.2.3 From 53fbde8ab21e8c2c6187159cc17fc10cbf20900a Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Mon, 9 Sep 2024 13:55:09 +0530 Subject: net: ethernet: oa_tc6: implement transmit path to transfer tx ethernet frames The transmit ethernet frame will be converted into multiple transmit data chunks. Each transmit data chunk consists of a 4 bytes header followed by a 64 bytes transmit data chunk payload. The 4 bytes data header occurs at the beginning of each transmit data chunk on MOSI. The data header contains the information needed to determine the validity and location of the transmit frame data within the data chunk payload. The number of transmit data chunks transmitted to mac-phy is limited to the number transmit credits available in the mac-phy. Initially the transmit credits will be updated from the buffer status register and then it will be updated from the footer received on each spi data transfer. The received footer will be examined for the transmit errors if any. Reviewed-by: Andrew Lunn Signed-off-by: Parthiban Veerasooran Link: https://patch.msgid.link/20240909082514.262942-10-Parthiban.Veerasooran@microchip.com Signed-off-by: Jakub Kicinski --- include/linux/oa_tc6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/oa_tc6.h b/include/linux/oa_tc6.h index 606ba9f1e663..5c7811ac9cbe 100644 --- a/include/linux/oa_tc6.h +++ b/include/linux/oa_tc6.h @@ -20,3 +20,4 @@ int oa_tc6_write_registers(struct oa_tc6 *tc6, u32 address, u32 value[], int oa_tc6_read_register(struct oa_tc6 *tc6, u32 address, u32 *value); int oa_tc6_read_registers(struct oa_tc6 *tc6, u32 address, u32 value[], u8 length); +netdev_tx_t oa_tc6_start_xmit(struct oa_tc6 *tc6, struct sk_buff *skb); -- cgit v1.2.3 From afd42170c8a6e68edd3f3a7f2aacd2bfbedb58b2 Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Mon, 9 Sep 2024 13:55:12 +0530 Subject: net: ethernet: oa_tc6: add helper function to enable zero align rx frame Zero align receive frame feature can be enabled to align all receive ethernet frames data to start at the beginning of any receive data chunk payload with a start word offset (SWO) of zero. Receive frames may begin anywhere within the receive data chunk payload when this feature is not enabled. Reviewed-by: Andrew Lunn Signed-off-by: Parthiban Veerasooran Link: https://patch.msgid.link/20240909082514.262942-13-Parthiban.Veerasooran@microchip.com Signed-off-by: Jakub Kicinski --- include/linux/oa_tc6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/oa_tc6.h b/include/linux/oa_tc6.h index 5c7811ac9cbe..15f58e3c56c7 100644 --- a/include/linux/oa_tc6.h +++ b/include/linux/oa_tc6.h @@ -21,3 +21,4 @@ int oa_tc6_read_register(struct oa_tc6 *tc6, u32 address, u32 *value); int oa_tc6_read_registers(struct oa_tc6 *tc6, u32 address, u32 value[], u8 length); netdev_tx_t oa_tc6_start_xmit(struct oa_tc6 *tc6, struct sk_buff *skb); +int oa_tc6_zero_align_receive_frame_enable(struct oa_tc6 *tc6); -- cgit v1.2.3 From 5e9b50dea970ae6d3e1309d4254157099734a2af Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:59 +0200 Subject: fs: add f_pipe Only regular files with FMODE_ATOMIC_POS and directories need f_pos_lock. Place a new f_pipe member in a union with f_pos_lock that they can use and make them stop abusing f_version in follow-up patches. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-18-6d3e4816aa7b@kernel.org Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/fs.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3e6b3c1afb31..ca4925008244 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1001,6 +1001,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_cred: stashed credentials of creator/opener * @f_path: path of the file * @f_pos_lock: lock protecting file position + * @f_pipe: specific to pipes * @f_pos: file position * @f_version: file version * @f_security: LSM security context of this file @@ -1026,7 +1027,12 @@ struct file { const struct cred *f_cred; /* --- cacheline 1 boundary (64 bytes) --- */ struct path f_path; - struct mutex f_pos_lock; + union { + /* regular files (with FMODE_ATOMIC_POS) and directories */ + struct mutex f_pos_lock; + /* pipes */ + u64 f_pipe; + }; loff_t f_pos; u64 f_version; /* --- cacheline 2 boundary (128 bytes) --- */ -- cgit v1.2.3 From 11068e0b64cbb540b96e577fcca0926242ecaf58 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:05:01 +0200 Subject: fs: remove f_version Now that detecting concurrent seeks is done by the filesystems that require it we can remove f_version and free up 8 bytes for future extensions. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-20-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/fs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index ca4925008244..7e11ce172140 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1003,7 +1003,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_pos_lock: lock protecting file position * @f_pipe: specific to pipes * @f_pos: file position - * @f_version: file version * @f_security: LSM security context of this file * @f_owner: file owner * @f_wb_err: writeback error @@ -1034,11 +1033,10 @@ struct file { u64 f_pipe; }; loff_t f_pos; - u64 f_version; - /* --- cacheline 2 boundary (128 bytes) --- */ #ifdef CONFIG_SECURITY void *f_security; #endif + /* --- cacheline 2 boundary (128 bytes) --- */ struct fown_struct *f_owner; errseq_t f_wb_err; errseq_t f_sb_err; -- cgit v1.2.3 From 2077006d4725c82c6e9612cec3a6c140921b067f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 10 Sep 2024 10:16:39 +0200 Subject: uidgid: make sure we fit into one cacheline When I expanded uidgid mappings I intended for a struct uid_gid_map to fit into a single cacheline on x86 as they tend to be pretty performance sensitive (idmapped mounts etc). But a 4 byte hole was added that brought it over 64 bytes. Fix that by adding the static extent array and the extent counter into a substruct. C's type punning for unions guarantees that we can access ->nr_extents even if the last written to member wasn't within the same object. This is also what we rely on in struct_group() and friends. This of course relies on non-strict aliasing which we don't do. 99) If the member used to read the contents of a union object is not the same as the member last used to store a value in the object, the appropriate part of the object representation of the value is reinterpreted as an object representation in the new type as described in 6.2.6 (a process sometimes called "type punning"). Link: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2310.pdf Link: https://lore.kernel.org/r/20240910-work-uid_gid_map-v1-1-e6bc761363ed@kernel.org Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/user_namespace.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 6030a8235617..3625096d5f85 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -21,9 +21,11 @@ struct uid_gid_extent { }; struct uid_gid_map { /* 64 bytes -- 1 cache line */ - u32 nr_extents; union { - struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS]; + struct { + struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS]; + u32 nr_extents; + }; struct { struct uid_gid_extent *forward; struct uid_gid_extent *reverse; -- cgit v1.2.3 From db0aa2e9566fda2d23dc8f6c102856ead95578a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 19 Jun 2024 00:20:42 +0100 Subject: mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios Define a data structure, struct folio_queue, to represent a sequence of folios and a kernel-internal I/O iterator type, ITER_FOLIOQ, to allow a list of folio_queue structures to be used to provide a buffer to iov_iter-taking functions, such as sendmsg and recvmsg. The folio_queue structure looks like: struct folio_queue { struct folio_batch vec; u8 orders[PAGEVEC_SIZE]; struct folio_queue *next; struct folio_queue *prev; unsigned long marks; unsigned long marks2; }; It does not use a list_head so that next and/or prev can be set to NULL at the ends of the list, allowing iov_iter-handling routines to determine that they *are* the ends without needing to store a head pointer in the iov_iter struct. A folio_batch struct is used to hold the folio pointers which allows the batch to be passed to batch handling functions. Two mark bits are available per slot. The intention is to use at least one of them to mark folios that need putting, but that might not be ultimately necessary. Accessor functions are used to access the slots to do the masking and an additional accessor function is used to indicate the size of the array. The order of each folio is also stored in the structure to avoid the need for iov_iter_advance() and iov_iter_revert() to have to query each folio to find its size. With careful barriering, this can be used as an extending buffer with new folios inserted and new folio_queue structs added without the need for a lock. Further, provided we always keep at least one struct in the buffer, we can also remove consumed folios and consumed structs from the head end as we without the need for locks. [Questions/thoughts] (1) To manage this, I need a head pointer, a tail pointer, a tail slot number (assuming insertion happens at the tail end and the next pointers point from head to tail). Should I put these into a struct of their own, say "folio_queue_head" or "rolling_buffer"? I will end up with two of these in netfs_io_request eventually, one keeping track of the pagecache I'm dealing with for buffered I/O and the other to hold a bounce buffer when we need one. (2) Should I make the slots {folio,off,len} or bio_vec? (3) This is intended to replace ITER_XARRAY eventually. Using an xarray in I/O iteration requires the taking of the RCU read lock, doing copying under the RCU read lock, walking the xarray (which may change under us), handling retries and dealing with special values. The advantage of ITER_XARRAY is that when we're dealing with the pagecache directly, we don't need any allocation - but if we're doing encrypted comms, there's a good chance we'd be using a bounce buffer anyway. This will require afs, erofs, cifs, orangefs and fscache to be converted to not use this. afs still uses it for dirs and symlinks; some of erofs usages should be easy to change, but there's one which won't be so easy; ceph's use via fscache can be fixed by porting ceph to netfslib; cifs is using xarray as a bounce buffer - that can be moved to use sheaves instead; and orangefs has a similar problem to erofs - maybe orangefs could use netfslib? Signed-off-by: David Howells cc: Matthew Wilcox cc: Jeff Layton cc: Steve French cc: Ilya Dryomov cc: Gao Xiang cc: Mike Marshall cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: linux-erofs@lists.ozlabs.org cc: devel@lists.orangefs.org Link: https://lore.kernel.org/r/20240814203850.2240469-13-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/linux/folio_queue.h | 138 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/iov_iter.h | 57 ++++++++++++++++++ include/linux/uio.h | 12 ++++ 3 files changed, 207 insertions(+) create mode 100644 include/linux/folio_queue.h (limited to 'include') diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h new file mode 100644 index 000000000000..52773613bf23 --- /dev/null +++ b/include/linux/folio_queue.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Queue of folios definitions + * + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef _LINUX_FOLIO_QUEUE_H +#define _LINUX_FOLIO_QUEUE_H + +#include + +/* + * Segment in a queue of running buffers. Each segment can hold a number of + * folios and a portion of the queue can be referenced with the ITER_FOLIOQ + * iterator. The possibility exists of inserting non-folio elements into the + * queue (such as gaps). + * + * Explicit prev and next pointers are used instead of a list_head to make it + * easier to add segments to tail and remove them from the head without the + * need for a lock. + */ +struct folio_queue { + struct folio_batch vec; /* Folios in the queue segment */ + u8 orders[PAGEVEC_SIZE]; /* Order of each folio */ + struct folio_queue *next; /* Next queue segment or NULL */ + struct folio_queue *prev; /* Previous queue segment of NULL */ + unsigned long marks; /* 1-bit mark per folio */ + unsigned long marks2; /* Second 1-bit mark per folio */ +#if PAGEVEC_SIZE > BITS_PER_LONG +#error marks is not big enough +#endif +}; + +static inline void folioq_init(struct folio_queue *folioq) +{ + folio_batch_init(&folioq->vec); + folioq->next = NULL; + folioq->prev = NULL; + folioq->marks = 0; + folioq->marks2 = 0; +} + +static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq) +{ + return PAGEVEC_SIZE; +} + +static inline unsigned int folioq_count(struct folio_queue *folioq) +{ + return folio_batch_count(&folioq->vec); +} + +static inline bool folioq_full(struct folio_queue *folioq) +{ + //return !folio_batch_space(&folioq->vec); + return folioq_count(folioq) >= folioq_nr_slots(folioq); +} + +static inline bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot) +{ + return test_bit(slot, &folioq->marks); +} + +static inline void folioq_mark(struct folio_queue *folioq, unsigned int slot) +{ + set_bit(slot, &folioq->marks); +} + +static inline void folioq_unmark(struct folio_queue *folioq, unsigned int slot) +{ + clear_bit(slot, &folioq->marks); +} + +static inline bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot) +{ + return test_bit(slot, &folioq->marks2); +} + +static inline void folioq_mark2(struct folio_queue *folioq, unsigned int slot) +{ + set_bit(slot, &folioq->marks2); +} + +static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot) +{ + clear_bit(slot, &folioq->marks2); +} + +static inline unsigned int __folio_order(struct folio *folio) +{ + if (!folio_test_large(folio)) + return 0; + return folio->_flags_1 & 0xff; +} + +static inline unsigned int folioq_append(struct folio_queue *folioq, struct folio *folio) +{ + unsigned int slot = folioq->vec.nr++; + + folioq->vec.folios[slot] = folio; + folioq->orders[slot] = __folio_order(folio); + return slot; +} + +static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct folio *folio) +{ + unsigned int slot = folioq->vec.nr++; + + folioq->vec.folios[slot] = folio; + folioq->orders[slot] = __folio_order(folio); + folioq_mark(folioq, slot); + return slot; +} + +static inline struct folio *folioq_folio(const struct folio_queue *folioq, unsigned int slot) +{ + return folioq->vec.folios[slot]; +} + +static inline unsigned int folioq_folio_order(const struct folio_queue *folioq, unsigned int slot) +{ + return folioq->orders[slot]; +} + +static inline size_t folioq_folio_size(const struct folio_queue *folioq, unsigned int slot) +{ + return PAGE_SIZE << folioq_folio_order(folioq, slot); +} + +static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot) +{ + folioq->vec.folios[slot] = NULL; + folioq_unmark(folioq, slot); + folioq_unmark2(folioq, slot); +} + +#endif /* _LINUX_FOLIO_QUEUE_H */ diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h index 270454a6703d..a223370a59a7 100644 --- a/include/linux/iov_iter.h +++ b/include/linux/iov_iter.h @@ -10,6 +10,7 @@ #include #include +#include typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len, void *priv, void *priv2); @@ -140,6 +141,60 @@ size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, return progress; } +/* + * Handle ITER_FOLIOQ. + */ +static __always_inline +size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + const struct folio_queue *folioq = iter->folioq; + unsigned int slot = iter->folioq_slot; + size_t progress = 0, skip = iter->iov_offset; + + if (slot == folioq_nr_slots(folioq)) { + /* The iterator may have been extended. */ + folioq = folioq->next; + slot = 0; + } + + do { + struct folio *folio = folioq_folio(folioq, slot); + size_t part, remain, consumed; + size_t fsize; + void *base; + + if (!folio) + break; + + fsize = folioq_folio_size(folioq, slot); + base = kmap_local_folio(folio, skip); + part = umin(len, PAGE_SIZE - skip % PAGE_SIZE); + remain = step(base, progress, part, priv, priv2); + kunmap_local(base); + consumed = part - remain; + len -= consumed; + progress += consumed; + skip += consumed; + if (skip >= fsize) { + skip = 0; + slot++; + if (slot == folioq_nr_slots(folioq) && folioq->next) { + folioq = folioq->next; + slot = 0; + } + } + if (remain) + break; + } while (len); + + iter->folioq_slot = slot; + iter->folioq = folioq; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + /* * Handle ITER_XARRAY. */ @@ -249,6 +304,8 @@ size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv, return iterate_bvec(iter, len, priv, priv2, step); if (iov_iter_is_kvec(iter)) return iterate_kvec(iter, len, priv, priv2, step); + if (iov_iter_is_folioq(iter)) + return iterate_folioq(iter, len, priv, priv2, step); if (iov_iter_is_xarray(iter)) return iterate_xarray(iter, len, priv, priv2, step); return iterate_discard(iter, len, priv, priv2, step); diff --git a/include/linux/uio.h b/include/linux/uio.h index 7020adedfa08..845d110acadc 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -11,6 +11,7 @@ #include struct page; +struct folio_queue; typedef unsigned int __bitwise iov_iter_extraction_t; @@ -25,6 +26,7 @@ enum iter_type { ITER_IOVEC, ITER_BVEC, ITER_KVEC, + ITER_FOLIOQ, ITER_XARRAY, ITER_DISCARD, }; @@ -66,6 +68,7 @@ struct iov_iter { const struct iovec *__iov; const struct kvec *kvec; const struct bio_vec *bvec; + const struct folio_queue *folioq; struct xarray *xarray; void __user *ubuf; }; @@ -74,6 +77,7 @@ struct iov_iter { }; union { unsigned long nr_segs; + u8 folioq_slot; loff_t xarray_start; }; }; @@ -126,6 +130,11 @@ static inline bool iov_iter_is_discard(const struct iov_iter *i) return iov_iter_type(i) == ITER_DISCARD; } +static inline bool iov_iter_is_folioq(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_FOLIOQ; +} + static inline bool iov_iter_is_xarray(const struct iov_iter *i) { return iov_iter_type(i) == ITER_XARRAY; @@ -273,6 +282,9 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec, unsigned long nr_segs, size_t count); void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); +void iov_iter_folio_queue(struct iov_iter *i, unsigned int direction, + const struct folio_queue *folioq, + unsigned int first_slot, unsigned int offset, size_t count); void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, loff_t start, size_t count); ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, -- cgit v1.2.3 From 197a3de607d92b3d72e69edf5470e0a8fae548cc Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 14 Aug 2024 16:14:21 +0100 Subject: iov_iter: Provide copy_folio_from_iter() Provide a copy_folio_from_iter() wrapper. Signed-off-by: David Howells cc: Alexander Viro cc: Christian Brauner cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/20240814203850.2240469-14-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/linux/uio.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/uio.h b/include/linux/uio.h index 845d110acadc..853f9de5aa05 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -189,6 +189,12 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, return copy_page_to_iter(&folio->page, offset, bytes, i); } +static inline size_t copy_folio_from_iter(struct folio *folio, size_t offset, + size_t bytes, struct iov_iter *i) +{ + return copy_page_from_iter(&folio->page, offset, bytes, i); +} + static inline size_t copy_folio_from_iter_atomic(struct folio *folio, size_t offset, size_t bytes, struct iov_iter *i) { -- cgit v1.2.3 From cd0277ed0c188dd40e7744e89299af7b78831ca4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 29 May 2024 21:47:07 +0100 Subject: netfs: Use new folio_queue data type and iterator instead of xarray iter Make the netfs write-side routines use the new folio_queue struct to hold a rolling buffer of folios, with the issuer adding folios at the tail and the collector removing them from the head as they're processed instead of using an xarray. This will allow a subsequent patch to simplify the write collector. The primary mark (as tested by folioq_is_marked()) is used to note if the corresponding folio needs putting. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-16-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/linux/netfs.h | 8 ++++---- include/trace/events/netfs.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 63ab2c775a21..f7b444f4f251 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -38,10 +38,6 @@ static inline void folio_start_private_2(struct folio *folio) folio_set_private_2(folio); } -/* Marks used on xarray-based buffers */ -#define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */ -#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */ - enum netfs_io_source { NETFS_SOURCE_UNKNOWN, NETFS_FILL_WITH_ZEROES, @@ -233,6 +229,8 @@ struct netfs_io_request { struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ #define NR_IO_STREAMS 2 //wreq->nr_io_streams struct netfs_group *group; /* Writeback group being written back */ + struct folio_queue *buffer; /* Head of I/O buffer */ + struct folio_queue *buffer_tail; /* Tail of I/O buffer */ struct iov_iter iter; /* Unencrypted-side iterator */ struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */ void *netfs_priv; /* Private data for the netfs */ @@ -254,6 +252,8 @@ struct netfs_io_request { short error; /* 0 or error that occurred */ enum netfs_io_origin origin; /* Origin of the request */ bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ + u8 buffer_head_slot; /* First slot in ->buffer */ + u8 buffer_tail_slot; /* Next slot in ->buffer_tail */ unsigned long long i_size; /* Size of the file */ unsigned long long start; /* Start position */ atomic64_t issued_to; /* Write issuer folio cursor */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 47cd11aaccac..4e13774a06e6 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -153,6 +153,7 @@ EM(netfs_folio_trace_mkwrite, "mkwrite") \ EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \ EM(netfs_folio_trace_not_under_wback, "!wback") \ + EM(netfs_folio_trace_put, "put") \ EM(netfs_folio_trace_read_gaps, "read-gaps") \ EM(netfs_folio_trace_redirtied, "redirtied") \ EM(netfs_folio_trace_store, "store") \ -- cgit v1.2.3 From 983cdcf8fe141b0ce16bc71959a5dc55bcb0764d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Jun 2024 07:48:55 +0100 Subject: netfs: Simplify the writeback code Use the new folio_queue structures to simplify the writeback code. The problem with referring to the i_pages xarray directly is that we may have gaps in the sequence of folios we're writing from that we need to skip when we're removing the writeback mark from the folios we're writing back from. At the moment the code tries to deal with this by carefully tracking the gaps in each writeback stream (eg. write to server and write to cache) and divining when there's a gap that spans folios (something that's not helped by folios not being a consistent size). Instead, the folio_queue buffer contains pointers only the folios we're dealing with, has them in ascending order and indicates a gap by placing non-consequitive folios next to each other. This makes it possible to track where we need to clean up to by just keeping track of where we've processed to on each stream and taking the minimum. Note that the I/O iterator is always rounded up to the end of the folio, even if that is beyond the EOF position, so that the cache can do DIO from the page. The excess space is cleared, though mmapped writes clobber it. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-18-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/linux/netfs.h | 1 - include/trace/events/netfs.h | 33 ++------------------------------- 2 files changed, 2 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index f7b444f4f251..bd0e3d147822 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -257,7 +257,6 @@ struct netfs_io_request { unsigned long long i_size; /* Size of the file */ unsigned long long start; /* Start position */ atomic64_t issued_to; /* Write issuer folio cursor */ - unsigned long long contiguity; /* Tracking for gaps in the writeback sequence */ unsigned long long collected_to; /* Point we've collected to */ unsigned long long cleaned_to; /* Position we've cleaned folios to */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 4e13774a06e6..58bf23002fc1 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -512,33 +512,6 @@ TRACE_EVENT(netfs_collect, __entry->start + __entry->len) ); -TRACE_EVENT(netfs_collect_contig, - TP_PROTO(const struct netfs_io_request *wreq, unsigned long long to, - enum netfs_collect_contig_trace type), - - TP_ARGS(wreq, to, type), - - TP_STRUCT__entry( - __field(unsigned int, wreq) - __field(enum netfs_collect_contig_trace, type) - __field(unsigned long long, contiguity) - __field(unsigned long long, to) - ), - - TP_fast_assign( - __entry->wreq = wreq->debug_id; - __entry->type = type; - __entry->contiguity = wreq->contiguity; - __entry->to = to; - ), - - TP_printk("R=%08x %llx -> %llx %s", - __entry->wreq, - __entry->contiguity, - __entry->to, - __print_symbolic(__entry->type, netfs_collect_contig_traces)) - ); - TRACE_EVENT(netfs_collect_sreq, TP_PROTO(const struct netfs_io_request *wreq, const struct netfs_io_subrequest *subreq), @@ -610,7 +583,6 @@ TRACE_EVENT(netfs_collect_state, __field(unsigned int, notes ) __field(unsigned long long, collected_to ) __field(unsigned long long, cleaned_to ) - __field(unsigned long long, contiguity ) ), TP_fast_assign( @@ -618,12 +590,11 @@ TRACE_EVENT(netfs_collect_state, __entry->notes = notes; __entry->collected_to = collected_to; __entry->cleaned_to = wreq->cleaned_to; - __entry->contiguity = wreq->contiguity; ), - TP_printk("R=%08x cto=%llx fto=%llx ctg=%llx n=%x", + TP_printk("R=%08x col=%llx cln=%llx n=%x", __entry->wreq, __entry->collected_to, - __entry->cleaned_to, __entry->contiguity, + __entry->cleaned_to, __entry->notes) ); -- cgit v1.2.3 From ee4cdf7ba857a894ad1650d6ab77669cbbfa329e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jul 2024 00:40:22 +0100 Subject: netfs: Speed up buffered reading Improve the efficiency of buffered reads in a number of ways: (1) Overhaul the algorithm in general so that it's a lot more compact and split the read submission code between buffered and unbuffered versions. The unbuffered version can be vastly simplified. (2) Read-result collection is handed off to a work queue rather than being done in the I/O thread. Multiple subrequests can be processes simultaneously. (3) When a subrequest is collected, any folios it fully spans are collected and "spare" data on either side is donated to either the previous or the next subrequest in the sequence. Notes: (*) Readahead expansion is massively slows down fio, presumably because it causes a load of extra allocations, both folio and xarray, up front before RPC requests can be transmitted. (*) RDMA with cifs does appear to work, both with SIW and RXE. (*) PG_private_2-based reading and copy-to-cache is split out into its own file and altered to use folio_queue. Note that the copy to the cache now creates a new write transaction against the cache and adds the folios to be copied into it. This allows it to use part of the writeback I/O code. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-20-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/linux/folio_queue.h | 18 ++++++++ include/linux/netfs.h | 26 +++++++---- include/trace/events/netfs.h | 103 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 134 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h index 52773613bf23..955680c3bb5f 100644 --- a/include/linux/folio_queue.h +++ b/include/linux/folio_queue.h @@ -27,6 +27,7 @@ struct folio_queue { struct folio_queue *prev; /* Previous queue segment of NULL */ unsigned long marks; /* 1-bit mark per folio */ unsigned long marks2; /* Second 1-bit mark per folio */ + unsigned long marks3; /* Third 1-bit mark per folio */ #if PAGEVEC_SIZE > BITS_PER_LONG #error marks is not big enough #endif @@ -39,6 +40,7 @@ static inline void folioq_init(struct folio_queue *folioq) folioq->prev = NULL; folioq->marks = 0; folioq->marks2 = 0; + folioq->marks3 = 0; } static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq) @@ -87,6 +89,21 @@ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot) clear_bit(slot, &folioq->marks2); } +static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot) +{ + return test_bit(slot, &folioq->marks3); +} + +static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot) +{ + set_bit(slot, &folioq->marks3); +} + +static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot) +{ + clear_bit(slot, &folioq->marks3); +} + static inline unsigned int __folio_order(struct folio *folio) { if (!folio_test_large(folio)) @@ -133,6 +150,7 @@ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot) folioq->vec.folios[slot] = NULL; folioq_unmark(folioq, slot); folioq_unmark2(folioq, slot); + folioq_unmark3(folioq, slot); } #endif /* _LINUX_FOLIO_QUEUE_H */ diff --git a/include/linux/netfs.h b/include/linux/netfs.h index bd0e3d147822..c0f0c9c87d86 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -178,36 +178,43 @@ struct netfs_io_subrequest { unsigned long long start; /* Where to start the I/O */ size_t len; /* Size of the I/O */ size_t transferred; /* Amount of data transferred */ + size_t consumed; /* Amount of read data consumed */ + size_t prev_donated; /* Amount of data donated from previous subreq */ + size_t next_donated; /* Amount of data donated from next subreq */ refcount_t ref; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ unsigned int nr_segs; /* Number of segs in io_iter */ enum netfs_io_source source; /* Where to read from/write to */ unsigned char stream_nr; /* I/O stream this belongs to */ + unsigned char curr_folioq_slot; /* Folio currently being read */ + unsigned char curr_folio_order; /* Order of folio */ + struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */ unsigned long flags; #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ -#define NETFS_SREQ_SHORT_IO 2 /* Set if the I/O was short */ #define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ #define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ #define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */ +#define NETFS_SREQ_HIT_EOF 7 /* Set if short due to EOF */ #define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */ #define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */ #define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */ #define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */ -#define NETFS_SREQ_HIT_EOF 12 /* Set if we hit the EOF */ }; enum netfs_io_origin { NETFS_READAHEAD, /* This read was triggered by readahead */ NETFS_READPAGE, /* This read is a synchronous read */ + NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_WRITEBACK, /* This write was triggered by writepages */ NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ NETFS_DIO_WRITE, /* This is a direct I/O write */ + NETFS_PGPRIV2_COPY_TO_CACHE, /* [DEPRECATED] This is writing read data to the cache */ nr__netfs_io_origin } __mode(byte); @@ -224,6 +231,7 @@ struct netfs_io_request { struct address_space *mapping; /* The mapping being accessed */ struct kiocb *iocb; /* AIO completion vector */ struct netfs_cache_resources cache_resources; + struct readahead_control *ractl; /* Readahead descriptor */ struct list_head proc_link; /* Link in netfs_iorequests */ struct list_head subrequests; /* Contributory I/O operations */ struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ @@ -244,12 +252,10 @@ struct netfs_io_request { unsigned int nr_group_rel; /* Number of refs to release on ->group */ spinlock_t lock; /* Lock for queuing subreqs */ atomic_t nr_outstanding; /* Number of ops in progress */ - atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */ - size_t upper_len; /* Length can be extended to here */ unsigned long long submitted; /* Amount submitted for I/O so far */ unsigned long long len; /* Length of the request */ size_t transferred; /* Amount to be indicated as transferred */ - short error; /* 0 or error that occurred */ + long error; /* 0 or error that occurred */ enum netfs_io_origin origin; /* Origin of the request */ bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ u8 buffer_head_slot; /* First slot in ->buffer */ @@ -260,9 +266,9 @@ struct netfs_io_request { unsigned long long collected_to; /* Point we've collected to */ unsigned long long cleaned_to; /* Position we've cleaned folios to */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ + size_t prev_donated; /* Fallback for subreq->prev_donated */ refcount_t ref; unsigned long flags; -#define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */ #define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */ #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ @@ -274,6 +280,7 @@ struct netfs_io_request { #define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ +#define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; @@ -292,7 +299,7 @@ struct netfs_request_ops { /* Read request handling */ void (*expand_readahead)(struct netfs_io_request *rreq); - bool (*clamp_length)(struct netfs_io_subrequest *subreq); + int (*prepare_read)(struct netfs_io_subrequest *subreq); void (*issue_read)(struct netfs_io_subrequest *subreq); bool (*is_still_valid)(struct netfs_io_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, @@ -422,7 +429,10 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp); vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); /* (Sub)request management API. */ -void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); +void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq, + bool was_async); +void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq, + int error, bool was_async); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what); void netfs_put_subrequest(struct netfs_io_subrequest *subreq, diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 58bf23002fc1..7b26463cb98f 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -20,6 +20,7 @@ EM(netfs_read_trace_expanded, "EXPANDED ") \ EM(netfs_read_trace_readahead, "READAHEAD") \ EM(netfs_read_trace_readpage, "READPAGE ") \ + EM(netfs_read_trace_read_gaps, "READ-GAPS") \ EM(netfs_read_trace_prefetch_for_write, "PREFETCHW") \ E_(netfs_read_trace_write_begin, "WRITEBEGN") @@ -33,12 +34,14 @@ #define netfs_rreq_origins \ EM(NETFS_READAHEAD, "RA") \ EM(NETFS_READPAGE, "RP") \ + EM(NETFS_READ_GAPS, "RG") \ EM(NETFS_READ_FOR_WRITE, "RW") \ EM(NETFS_DIO_READ, "DR") \ EM(NETFS_WRITEBACK, "WB") \ EM(NETFS_WRITETHROUGH, "WT") \ EM(NETFS_UNBUFFERED_WRITE, "UW") \ - E_(NETFS_DIO_WRITE, "DW") + EM(NETFS_DIO_WRITE, "DW") \ + E_(NETFS_PGPRIV2_COPY_TO_CACHE, "2C") #define netfs_rreq_traces \ EM(netfs_rreq_trace_assess, "ASSESS ") \ @@ -69,15 +72,25 @@ E_(NETFS_INVALID_WRITE, "INVL") #define netfs_sreq_traces \ + EM(netfs_sreq_trace_add_donations, "+DON ") \ + EM(netfs_sreq_trace_added, "ADD ") \ + EM(netfs_sreq_trace_clear, "CLEAR") \ EM(netfs_sreq_trace_discard, "DSCRD") \ + EM(netfs_sreq_trace_donate_to_prev, "DON-P") \ + EM(netfs_sreq_trace_donate_to_next, "DON-N") \ EM(netfs_sreq_trace_download_instead, "RDOWN") \ EM(netfs_sreq_trace_fail, "FAIL ") \ EM(netfs_sreq_trace_free, "FREE ") \ + EM(netfs_sreq_trace_hit_eof, "EOF ") \ + EM(netfs_sreq_trace_io_progress, "IO ") \ EM(netfs_sreq_trace_limited, "LIMIT") \ EM(netfs_sreq_trace_prepare, "PREP ") \ EM(netfs_sreq_trace_prep_failed, "PRPFL") \ - EM(netfs_sreq_trace_resubmit_short, "SHORT") \ + EM(netfs_sreq_trace_progress, "PRGRS") \ + EM(netfs_sreq_trace_reprep_failed, "REPFL") \ EM(netfs_sreq_trace_retry, "RETRY") \ + EM(netfs_sreq_trace_short, "SHORT") \ + EM(netfs_sreq_trace_split, "SPLIT") \ EM(netfs_sreq_trace_submit, "SUBMT") \ EM(netfs_sreq_trace_terminated, "TERM ") \ EM(netfs_sreq_trace_write, "WRITE") \ @@ -118,7 +131,7 @@ EM(netfs_sreq_trace_new, "NEW ") \ EM(netfs_sreq_trace_put_cancel, "PUT CANCEL ") \ EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \ - EM(netfs_sreq_trace_put_discard, "PUT DISCARD") \ + EM(netfs_sreq_trace_put_consumed, "PUT CONSUME") \ EM(netfs_sreq_trace_put_done, "PUT DONE ") \ EM(netfs_sreq_trace_put_failed, "PUT FAILED ") \ EM(netfs_sreq_trace_put_merged, "PUT MERGED ") \ @@ -138,6 +151,7 @@ EM(netfs_flush_content, "flush") \ EM(netfs_streaming_filled_page, "mod-streamw-f") \ EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \ + EM(netfs_folio_trace_abandon, "abandon") \ EM(netfs_folio_trace_cancel_copy, "cancel-copy") \ EM(netfs_folio_trace_clear, "clear") \ EM(netfs_folio_trace_clear_cc, "clear-cc") \ @@ -154,7 +168,11 @@ EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \ EM(netfs_folio_trace_not_under_wback, "!wback") \ EM(netfs_folio_trace_put, "put") \ + EM(netfs_folio_trace_read, "read") \ + EM(netfs_folio_trace_read_done, "read-done") \ EM(netfs_folio_trace_read_gaps, "read-gaps") \ + EM(netfs_folio_trace_read_put, "read-put") \ + EM(netfs_folio_trace_read_unlock, "read-unlock") \ EM(netfs_folio_trace_redirtied, "redirtied") \ EM(netfs_folio_trace_store, "store") \ EM(netfs_folio_trace_store_copy, "store-copy") \ @@ -167,6 +185,12 @@ EM(netfs_contig_trace_jump, "-->JUMP-->") \ E_(netfs_contig_trace_unlock, "Unlock") +#define netfs_donate_traces \ + EM(netfs_trace_donate_tail_to_prev, "tail-to-prev") \ + EM(netfs_trace_donate_to_prev, "to-prev") \ + EM(netfs_trace_donate_to_next, "to-next") \ + E_(netfs_trace_donate_to_deferred_next, "defer-next") + #ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY #define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY @@ -184,6 +208,7 @@ enum netfs_rreq_ref_trace { netfs_rreq_ref_traces } __mode(byte); enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte); enum netfs_folio_trace { netfs_folio_traces } __mode(byte); enum netfs_collect_contig_trace { netfs_collect_contig_traces } __mode(byte); +enum netfs_donate_trace { netfs_donate_traces } __mode(byte); #endif @@ -206,6 +231,7 @@ netfs_rreq_ref_traces; netfs_sreq_ref_traces; netfs_folio_traces; netfs_collect_contig_traces; +netfs_donate_traces; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -226,6 +252,7 @@ TRACE_EVENT(netfs_read, TP_STRUCT__entry( __field(unsigned int, rreq ) __field(unsigned int, cookie ) + __field(loff_t, i_size ) __field(loff_t, start ) __field(size_t, len ) __field(enum netfs_read_trace, what ) @@ -235,18 +262,19 @@ TRACE_EVENT(netfs_read, TP_fast_assign( __entry->rreq = rreq->debug_id; __entry->cookie = rreq->cache_resources.debug_id; + __entry->i_size = rreq->i_size; __entry->start = start; __entry->len = len; __entry->what = what; __entry->netfs_inode = rreq->inode->i_ino; ), - TP_printk("R=%08x %s c=%08x ni=%x s=%llx %zx", + TP_printk("R=%08x %s c=%08x ni=%x s=%llx l=%zx sz=%llx", __entry->rreq, __print_symbolic(__entry->what, netfs_read_traces), __entry->cookie, __entry->netfs_inode, - __entry->start, __entry->len) + __entry->start, __entry->len, __entry->i_size) ); TRACE_EVENT(netfs_rreq, @@ -651,6 +679,71 @@ TRACE_EVENT(netfs_collect_stream, __entry->collected_to, __entry->front) ); +TRACE_EVENT(netfs_progress, + TP_PROTO(const struct netfs_io_subrequest *subreq, + unsigned long long start, size_t avail, size_t part), + + TP_ARGS(subreq, start, avail, part), + + TP_STRUCT__entry( + __field(unsigned int, rreq) + __field(unsigned int, subreq) + __field(unsigned int, consumed) + __field(unsigned int, transferred) + __field(unsigned long long, f_start) + __field(unsigned int, f_avail) + __field(unsigned int, f_part) + __field(unsigned char, slot) + ), + + TP_fast_assign( + __entry->rreq = subreq->rreq->debug_id; + __entry->subreq = subreq->debug_index; + __entry->consumed = subreq->consumed; + __entry->transferred = subreq->transferred; + __entry->f_start = start; + __entry->f_avail = avail; + __entry->f_part = part; + __entry->slot = subreq->curr_folioq_slot; + ), + + TP_printk("R=%08x[%02x] s=%llx ct=%x/%x pa=%x/%x sl=%x", + __entry->rreq, __entry->subreq, __entry->f_start, + __entry->consumed, __entry->transferred, + __entry->f_part, __entry->f_avail, __entry->slot) + ); + +TRACE_EVENT(netfs_donate, + TP_PROTO(const struct netfs_io_request *rreq, + const struct netfs_io_subrequest *from, + const struct netfs_io_subrequest *to, + size_t amount, + enum netfs_donate_trace trace), + + TP_ARGS(rreq, from, to, amount, trace), + + TP_STRUCT__entry( + __field(unsigned int, rreq) + __field(unsigned int, from) + __field(unsigned int, to) + __field(unsigned int, amount) + __field(enum netfs_donate_trace, trace) + ), + + TP_fast_assign( + __entry->rreq = rreq->debug_id; + __entry->from = from->debug_index; + __entry->to = to ? to->debug_index : -1; + __entry->amount = amount; + __entry->trace = trace; + ), + + TP_printk("R=%08x[%02x] -> [%02x] %s am=%x", + __entry->rreq, __entry->from, __entry->to, + __print_symbolic(__entry->trace, netfs_donate_traces), + __entry->amount) + ); + #undef EM #undef E_ #endif /* _TRACE_NETFS_H */ -- cgit v1.2.3 From c4f1450ecccc5311db87f806998eda1c824c4e35 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 12 Jul 2024 12:44:30 +0100 Subject: cachefiles, netfs: Fix write to partial block at EOF Because it uses DIO writes, cachefiles is unable to make a write to the backing file if that write is not aligned to and sized according to the backing file's DIO block alignment. This makes it tricky to handle a write to the cache where the EOF on the network file is not correctly aligned. To get around this, netfslib attempts to tell the driver it is calling how much more data there is available beyond the EOF that it can use to pad the write (netfslib preclears the part of the folio above the EOF). However, it tries to tell the cache what the maximum length is, but doesn't calculate this correctly; and, in any case, cachefiles actually ignores the value and just skips the block. Fix this by: (1) Change the value passed to indicate the amount of extra data that can be added to the operation (now ->submit_extendable_to). This is much simpler to calculate as it's just the end of the folio minus the top of the data within the folio - rather than having to account for data spread over multiple folios. (2) Make cachefiles add some of this data if the subrequest it is given ends at the network file's i_size if the extra data is sufficient to pad out to a whole block. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-22-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/linux/netfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c0f0c9c87d86..5eaceef41e6c 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -135,7 +135,7 @@ struct netfs_io_stream { unsigned int sreq_max_segs; /* 0 or max number of segments in an iterator */ unsigned int submit_off; /* Folio offset we're submitting from */ unsigned int submit_len; /* Amount of data left to submit */ - unsigned int submit_max_len; /* Amount I/O can be rounded up to */ + unsigned int submit_extendable_to; /* Amount I/O can be rounded up to */ void (*prepare_write)(struct netfs_io_subrequest *subreq); void (*issue_write)(struct netfs_io_subrequest *subreq); /* Collection tracking */ -- cgit v1.2.3 From 8f246b7c0a1be0882374f2ff831a61f0dbe77678 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 29 Jul 2024 12:23:11 +0100 Subject: netfs: Cancel dirty folios that have no storage destination Kafs wants to be able to cache the contents of directories (and symlinks), but whilst these are downloaded from the server with the FS.FetchData RPC op and similar, the same as for regular files, they can't be updated by FS.StoreData, but rather have special operations (FS.MakeDir, etc.). Now, rather than redownloading a directory's content after each change made to that directory, kafs modifies the local blob. This blob can be saved out to the cache, and since it's using netfslib, kafs just marks the folios dirty and lets ->writepages() on the directory take care of it, as for an regular file. This is fine as long as there's a cache as although the upload stream is disabled, there's a cache stream to drive the procedure. But if the cache goes away in the meantime, suddenly there's no way do any writes and the code gets confused, complains "R=%x: No submit" to dmesg and leaves the dirty folio hanging. Fix this by just cancelling the store of the folio if neither stream is active. (If there's no cache at the time of dirtying, we should just not mark the folio dirty). Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-23-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 7b26463cb98f..76bd42a96815 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -153,6 +153,7 @@ EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \ EM(netfs_folio_trace_abandon, "abandon") \ EM(netfs_folio_trace_cancel_copy, "cancel-copy") \ + EM(netfs_folio_trace_cancel_store, "cancel-store") \ EM(netfs_folio_trace_clear, "clear") \ EM(netfs_folio_trace_clear_cc, "clear-cc") \ EM(netfs_folio_trace_clear_g, "clear-g") \ -- cgit v1.2.3 From 2982c8c19bab020e38da9d503aa21a3b389c53ac Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 26 Jul 2024 20:03:07 +0100 Subject: cifs: Use iterate_and_advance*() routines directly for hashing Replace the bespoke cifs iterators of ITER_BVEC and ITER_KVEC to do hashing with iterate_and_advance_kernel() - a variant on iterate_and_advance() that only supports kernel-internal ITER_* types and not UBUF/IOVEC types. The bespoke ITER_XARRAY is left because we don't really want to be calling crypto_shash_update() under the RCU read lock for large amounts of data; besides, ITER_XARRAY is going to be phased out. Signed-off-by: David Howells cc: Steve French cc: Paulo Alcantara cc: Tom Talpey cc: Enzo Matsumiya cc: linux-cifs@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-24-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/linux/iov_iter.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'include') diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h index a223370a59a7..c4aa58032faf 100644 --- a/include/linux/iov_iter.h +++ b/include/linux/iov_iter.h @@ -328,4 +328,51 @@ size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv, return iterate_and_advance2(iter, len, priv, NULL, ustep, step); } +/** + * iterate_and_advance_kernel - Iterate over a kernel-internal iterator + * @iter: The iterator to iterate over. + * @len: The amount to iterate over. + * @priv: Data for the step functions. + * @priv2: More data for the step functions. + * @step: Function for other iterators; given kernel addresses. + * + * Iterate over the next part of an iterator, up to the specified length. The + * buffer is presented in segments, which for kernel iteration are broken up by + * physical pages and mapped, with the mapped address being presented. + * + * [!] Note This will only handle BVEC, KVEC, FOLIOQ, XARRAY and DISCARD-type + * iterators; it will not handle UBUF or IOVEC-type iterators. + * + * A step functions, @step, must be provided, one for handling mapped kernel + * addresses and the other is given user addresses which have the potential to + * fault since no pinning is performed. + * + * The step functions are passed the address and length of the segment, @priv, + * @priv2 and the amount of data so far iterated over (which can, for example, + * be added to @priv to point to the right part of a second buffer). The step + * functions should return the amount of the segment they didn't process (ie. 0 + * indicates complete processsing). + * + * This function returns the amount of data processed (ie. 0 means nothing was + * processed and the value of @len means processes to completion). + */ +static __always_inline +size_t iterate_and_advance_kernel(struct iov_iter *iter, size_t len, void *priv, + void *priv2, iov_step_f step) +{ + if (unlikely(iter->count < len)) + len = iter->count; + if (unlikely(!len)) + return 0; + if (iov_iter_is_bvec(iter)) + return iterate_bvec(iter, len, priv, priv2, step); + if (iov_iter_is_kvec(iter)) + return iterate_kvec(iter, len, priv, priv2, step); + if (iov_iter_is_folioq(iter)) + return iterate_folioq(iter, len, priv, priv2, step); + if (iov_iter_is_xarray(iter)) + return iterate_xarray(iter, len, priv, priv2, step); + return iterate_discard(iter, len, priv, priv2, step); +} + #endif /* _LINUX_IOV_ITER_H */ -- cgit v1.2.3 From dfa1a7f456f10018229d0d5b3c36dd36a9b5344f Mon Sep 17 00:00:00 2001 From: Brent Lu Date: Thu, 12 Sep 2024 20:03:06 +0800 Subject: ASoC: SOF: Intel: hda: remove common_hdmi_codec_drv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not set common_hdmi_codec_drv in SOF platform driver since no machine driver needs it. Remove member variable common_hdmi_codec_drv from snd_soc_acpi_mach_params structure. Signed-off-by: Brent Lu Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Link: https://patch.msgid.link/20240912120308.134762-6-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-acpi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index c1552bc6e31c..60d3b86a4660 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -62,7 +62,6 @@ static inline struct snd_soc_acpi_mach *snd_soc_acpi_codec_list(void *arg) * @platform: string used for HDAudio codec support * @codec_mask: used for HDAudio support * @dmic_num: number of SoC- or chipset-attached PDM digital microphones - * @common_hdmi_codec_drv: use commom HDAudio HDMI codec driver * @link_mask: SoundWire links enabled on the board * @links: array of SoundWire link _ADR descriptors, null terminated * @i2s_link_mask: I2S/TDM links enabled on the board @@ -80,7 +79,6 @@ struct snd_soc_acpi_mach_params { const char *platform; u32 codec_mask; u32 dmic_num; - bool common_hdmi_codec_drv; u32 link_mask; const struct snd_soc_acpi_link_adr *links; u32 i2s_link_mask; -- cgit v1.2.3 From c45b9a07b6392fa224ca76b89f24dae1046eef09 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 12 Sep 2024 22:30:34 +0900 Subject: Revert "firewire: core: use mutex to coordinate concurrent calls to flush completions" This reverts commit d9605d67562505e27dcc0f71af418118d3db91e5, since this commit is on the following reverted changes. Link: https://lore.kernel.org/r/20240912133038.238786-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 19e8c5f9537c..f815d12deda0 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -512,7 +512,6 @@ union fw_iso_callback { struct fw_iso_context { struct fw_card *card; struct work_struct work; - struct mutex flushing_completions_mutex; int type; int channel; int speed; -- cgit v1.2.3 From 4010cb1efda08ec6fd02ec5db9da909322ef352e Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 12 Sep 2024 22:30:37 +0900 Subject: firewire: core: update documentation of kernel APIs for flushing completions There is a slight difference between fw_iso_context_flush_completions() and fw_iso_context_schedule_flush_completions(). This commit updates the documentations for them. Link: https://lore.kernel.org/r/20240912133038.238786-5-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index f815d12deda0..b632eec3ab52 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -537,9 +537,11 @@ int fw_iso_context_flush_completions(struct fw_iso_context *ctx); * @ctx: the isochronous context * * Schedule a work item on workqueue to process the isochronous context. The registered callback - * function is called in the worker if some packets have been already transferred since the last - * time. If it is required to process the context in the current context, - * fw_iso_context_flush_completions() is available instead. + * function is called by the worker when a queued packet buffer with the interrupt flag is + * completed, either after transmission in the IT context or after being filled in the IR context. + * The callback function is also called when the header buffer in the context becomes full, If it + * is required to process the context in the current context, fw_iso_context_flush_completions() is + * available instead. * * Context: Any context. */ -- cgit v1.2.3 From f1cba5212e252243a539e079813bc96fbf53e241 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 12 Sep 2024 22:30:38 +0900 Subject: firewire: core: rename cause flag of tracepoints event The flag of FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ directly causes hardIRQ request by 1394 OHCI hardware when the corresponding isochronous packet is transferred, however it is not so directly associated to hardIRQ processing itself. This commit renames the flag so that it relates to interrupt parameter of internal packet data. Link: https://lore.kernel.org/r/20240912133038.238786-6-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index b108176deb22..ad0e0cf82b9c 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -830,13 +830,13 @@ TRACE_EVENT_CONDITION(isoc_inbound_multiple_queue, #ifndef show_cause enum fw_iso_context_completions_cause { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_FLUSH = 0, - FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ, + FW_ISO_CONTEXT_COMPLETIONS_CAUSE_INTERRUPT, FW_ISO_CONTEXT_COMPLETIONS_CAUSE_HEADER_OVERFLOW, }; #define show_cause(cause) \ __print_symbolic(cause, \ { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_FLUSH, "FLUSH" }, \ - { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ, "IRQ" }, \ + { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_INTERRUPT, "INTERRUPT" }, \ { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_HEADER_OVERFLOW, "HEADER_OVERFLOW" } \ ) #endif -- cgit v1.2.3 From ede5bbe488d162bcd572880e58f9044c9df84050 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:27 +0300 Subject: ARM: ep93xx: add regmap aux_dev The following driver's should be instantiated by ep93xx syscon driver: - reboot - pinctrl - clock They all require access to DEVCFG register with a shared lock held, to avoid conflict writing to swlocked parts of DEVCFG. Provide common resources such as base, regmap and spinlock via auxiliary bus framework. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Linus Walleij Reviewed-by: Stephen Boyd Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- include/linux/soc/cirrus/ep93xx.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h index 56fbe2dc59b1..a27447971302 100644 --- a/include/linux/soc/cirrus/ep93xx.h +++ b/include/linux/soc/cirrus/ep93xx.h @@ -3,6 +3,18 @@ #define _SOC_EP93XX_H struct platform_device; +struct regmap; +struct spinlock_t; + +enum ep93xx_soc_model { + EP93XX_9301_SOC, + EP93XX_9307_SOC, + EP93XX_9312_SOC, +}; + +#include +#include +#include #define EP93XX_CHIP_REV_D0 3 #define EP93XX_CHIP_REV_D1 4 @@ -10,6 +22,20 @@ struct platform_device; #define EP93XX_CHIP_REV_E1 6 #define EP93XX_CHIP_REV_E2 7 +struct ep93xx_regmap_adev { + struct auxiliary_device adev; + struct regmap *map; + void __iomem *base; + spinlock_t *lock; + void (*write)(struct regmap *map, spinlock_t *lock, unsigned int reg, + unsigned int val); + void (*update_bits)(struct regmap *map, spinlock_t *lock, + unsigned int reg, unsigned int mask, unsigned int val); +}; + +#define to_ep93xx_regmap_adev(_adev) \ + container_of((_adev), struct ep93xx_regmap_adev, adev) + #ifdef CONFIG_ARCH_EP93XX int ep93xx_pwm_acquire_gpio(struct platform_device *pdev); void ep93xx_pwm_release_gpio(struct platform_device *pdev); -- cgit v1.2.3 From eeb3dd5b32e68989bae1a3d4420204cf3a90ce73 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:31 +0300 Subject: dt-bindings: soc: Add Cirrus EP93xx Add device tree bindings for the Cirrus Logic EP93xx SoC. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Krzysztof Kozlowski Reviewed-by: Stephen Boyd Reviewed-by: Andy Shevchenko Reviewed-by: Guenter Roeck Acked-by: Miquel Raynal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- include/dt-bindings/clock/cirrus,ep9301-syscon.h | 46 ++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 include/dt-bindings/clock/cirrus,ep9301-syscon.h (limited to 'include') diff --git a/include/dt-bindings/clock/cirrus,ep9301-syscon.h b/include/dt-bindings/clock/cirrus,ep9301-syscon.h new file mode 100644 index 000000000000..6bb8f532e7d0 --- /dev/null +++ b/include/dt-bindings/clock/cirrus,ep9301-syscon.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +#ifndef DT_BINDINGS_CIRRUS_EP93XX_CLOCK_H +#define DT_BINDINGS_CIRRUS_EP93XX_CLOCK_H + +#define EP93XX_CLK_PLL1 0 +#define EP93XX_CLK_PLL2 1 + +#define EP93XX_CLK_FCLK 2 +#define EP93XX_CLK_HCLK 3 +#define EP93XX_CLK_PCLK 4 + +#define EP93XX_CLK_UART 5 +#define EP93XX_CLK_SPI 6 +#define EP93XX_CLK_PWM 7 +#define EP93XX_CLK_USB 8 + +#define EP93XX_CLK_M2M0 9 +#define EP93XX_CLK_M2M1 10 + +#define EP93XX_CLK_M2P0 11 +#define EP93XX_CLK_M2P1 12 +#define EP93XX_CLK_M2P2 13 +#define EP93XX_CLK_M2P3 14 +#define EP93XX_CLK_M2P4 15 +#define EP93XX_CLK_M2P5 16 +#define EP93XX_CLK_M2P6 17 +#define EP93XX_CLK_M2P7 18 +#define EP93XX_CLK_M2P8 19 +#define EP93XX_CLK_M2P9 20 + +#define EP93XX_CLK_UART1 21 +#define EP93XX_CLK_UART2 22 +#define EP93XX_CLK_UART3 23 + +#define EP93XX_CLK_ADC 24 +#define EP93XX_CLK_ADC_EN 25 + +#define EP93XX_CLK_KEYPAD 26 + +#define EP93XX_CLK_VIDEO 27 + +#define EP93XX_CLK_I2S_MCLK 28 +#define EP93XX_CLK_I2S_SCLK 29 +#define EP93XX_CLK_I2S_LRCLK 30 + +#endif /* DT_BINDINGS_CIRRUS_EP93XX_CLOCK_H */ -- cgit v1.2.3 From 2e7f55ce430240a5547b8a94b4c532fc8c20b18b Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:34 +0300 Subject: dmaengine: cirrus: Convert to DT for Cirrus EP93xx Convert Cirrus EP93xx DMA to device tree usage: - add OF ID match table with data - add of_probe for device tree - add xlate for m2m/m2p - drop subsys_initcall code - drop platform probe - drop platform structs usage >From now on it only supports device tree probing. Co-developed-by: Alexander Sverdlin Signed-off-by: Alexander Sverdlin Acked-by: Vinod Koul Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Signed-off-by: Arnd Bergmann --- include/linux/platform_data/dma-ep93xx.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/dma-ep93xx.h b/include/linux/platform_data/dma-ep93xx.h index eb9805bb3fe8..9ec5cdd5a1eb 100644 --- a/include/linux/platform_data/dma-ep93xx.h +++ b/include/linux/platform_data/dma-ep93xx.h @@ -3,8 +3,11 @@ #define __ASM_ARCH_DMA_H #include +#include #include #include +#include +#include /* * M2P channels. @@ -70,6 +73,9 @@ struct ep93xx_dma_platform_data { static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan) { + if (device_is_compatible(chan->device->dev, "cirrus,ep9301-dma-m2p")) + return true; + return !strcmp(dev_name(chan->device->dev), "ep93xx-dma-m2p"); } -- cgit v1.2.3 From b3ab5787e7acb02874ae86cbe13969d4dd01a585 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:48 +0300 Subject: input: keypad: ep93xx: add DT support for Cirrus EP93xx - drop flags, they were not used anyway - add OF ID match table - process "autorepeat", "debounce-delay-ms", prescale from device tree - drop platform data usage and it's header - keymap goes from device tree now on Signed-off-by: Nikita Shubin Acked-by: Dmitry Torokhov Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- include/linux/soc/cirrus/ep93xx.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h index a27447971302..8942bfaf1545 100644 --- a/include/linux/soc/cirrus/ep93xx.h +++ b/include/linux/soc/cirrus/ep93xx.h @@ -41,8 +41,6 @@ int ep93xx_pwm_acquire_gpio(struct platform_device *pdev); void ep93xx_pwm_release_gpio(struct platform_device *pdev); int ep93xx_ide_acquire_gpio(struct platform_device *pdev); void ep93xx_ide_release_gpio(struct platform_device *pdev); -int ep93xx_keypad_acquire_gpio(struct platform_device *pdev); -void ep93xx_keypad_release_gpio(struct platform_device *pdev); int ep93xx_i2s_acquire(void); void ep93xx_i2s_release(void); unsigned int ep93xx_chip_revision(void); @@ -52,8 +50,6 @@ static inline int ep93xx_pwm_acquire_gpio(struct platform_device *pdev) { return static inline void ep93xx_pwm_release_gpio(struct platform_device *pdev) {} static inline int ep93xx_ide_acquire_gpio(struct platform_device *pdev) { return 0; } static inline void ep93xx_ide_release_gpio(struct platform_device *pdev) {} -static inline int ep93xx_keypad_acquire_gpio(struct platform_device *pdev) { return 0; } -static inline void ep93xx_keypad_release_gpio(struct platform_device *pdev) {} static inline int ep93xx_i2s_acquire(void) { return 0; } static inline void ep93xx_i2s_release(void) {} static inline unsigned int ep93xx_chip_revision(void) { return 0; } -- cgit v1.2.3 From a48ac3dc569771c18fcafbc8351d820cc343c54a Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:58 +0300 Subject: pwm: ep93xx: drop legacy pinctrl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop legacy gpio request/free since we are using pinctrl for this now. Signed-off-by: Nikita Shubin Acked-by: Uwe Kleine-König Acked-by: Thierry Reding Acked-by: Linus Walleij Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- include/linux/soc/cirrus/ep93xx.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h index 8942bfaf1545..f6376edc1b33 100644 --- a/include/linux/soc/cirrus/ep93xx.h +++ b/include/linux/soc/cirrus/ep93xx.h @@ -37,8 +37,6 @@ struct ep93xx_regmap_adev { container_of((_adev), struct ep93xx_regmap_adev, adev) #ifdef CONFIG_ARCH_EP93XX -int ep93xx_pwm_acquire_gpio(struct platform_device *pdev); -void ep93xx_pwm_release_gpio(struct platform_device *pdev); int ep93xx_ide_acquire_gpio(struct platform_device *pdev); void ep93xx_ide_release_gpio(struct platform_device *pdev); int ep93xx_i2s_acquire(void); @@ -46,8 +44,6 @@ void ep93xx_i2s_release(void); unsigned int ep93xx_chip_revision(void); #else -static inline int ep93xx_pwm_acquire_gpio(struct platform_device *pdev) { return 0; } -static inline void ep93xx_pwm_release_gpio(struct platform_device *pdev) {} static inline int ep93xx_ide_acquire_gpio(struct platform_device *pdev) { return 0; } static inline void ep93xx_ide_release_gpio(struct platform_device *pdev) {} static inline int ep93xx_i2s_acquire(void) { return 0; } -- cgit v1.2.3 From a632229be268dde8f6d407638b5cfba8b78201d6 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:10:59 +0300 Subject: ata: pata_ep93xx: remove legacy pinctrl use Drop legacy acquire/release since we are using pinctrl for this now. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Reviewed-by: Sergey Shtylyov Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko Reviewed-by: Mark Brown Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andy Shevchenko Acked-by: Damien Le Moal Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- include/linux/soc/cirrus/ep93xx.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h index f6376edc1b33..142c33a2d7db 100644 --- a/include/linux/soc/cirrus/ep93xx.h +++ b/include/linux/soc/cirrus/ep93xx.h @@ -37,15 +37,11 @@ struct ep93xx_regmap_adev { container_of((_adev), struct ep93xx_regmap_adev, adev) #ifdef CONFIG_ARCH_EP93XX -int ep93xx_ide_acquire_gpio(struct platform_device *pdev); -void ep93xx_ide_release_gpio(struct platform_device *pdev); int ep93xx_i2s_acquire(void); void ep93xx_i2s_release(void); unsigned int ep93xx_chip_revision(void); #else -static inline int ep93xx_ide_acquire_gpio(struct platform_device *pdev) { return 0; } -static inline void ep93xx_ide_release_gpio(struct platform_device *pdev) {} static inline int ep93xx_i2s_acquire(void) { return 0; } static inline void ep93xx_i2s_release(void) {} static inline unsigned int ep93xx_chip_revision(void) { return 0; } -- cgit v1.2.3 From e5ef574dda702e62081d5c7991949cbdb7f65c08 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:11:00 +0300 Subject: ARM: ep93xx: delete all boardfiles Delete the ep93xx board files. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- include/linux/platform_data/spi-ep93xx.h | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 include/linux/platform_data/spi-ep93xx.h (limited to 'include') diff --git a/include/linux/platform_data/spi-ep93xx.h b/include/linux/platform_data/spi-ep93xx.h deleted file mode 100644 index b439f2a896e0..000000000000 --- a/include/linux/platform_data/spi-ep93xx.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_MACH_EP93XX_SPI_H -#define __ASM_MACH_EP93XX_SPI_H - -struct spi_device; - -/** - * struct ep93xx_spi_info - EP93xx specific SPI descriptor - * @use_dma: use DMA for the transfers - */ -struct ep93xx_spi_info { - bool use_dma; -}; - -#endif /* __ASM_MACH_EP93XX_SPI_H */ -- cgit v1.2.3 From 43528a72526152f17a918973b3b8b6f383b90f98 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:11:01 +0300 Subject: ARM: ep93xx: soc: drop defines Remove unnecessary defines, as we dropped board files. Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Acked-by: Vinod Koul Signed-off-by: Arnd Bergmann --- include/linux/platform_data/eth-ep93xx.h | 10 --------- include/linux/platform_data/keypad-ep93xx.h | 32 ----------------------------- include/linux/soc/cirrus/ep93xx.h | 13 ------------ 3 files changed, 55 deletions(-) delete mode 100644 include/linux/platform_data/eth-ep93xx.h delete mode 100644 include/linux/platform_data/keypad-ep93xx.h (limited to 'include') diff --git a/include/linux/platform_data/eth-ep93xx.h b/include/linux/platform_data/eth-ep93xx.h deleted file mode 100644 index 8eef637a804d..000000000000 --- a/include/linux/platform_data/eth-ep93xx.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_PLATFORM_DATA_ETH_EP93XX -#define _LINUX_PLATFORM_DATA_ETH_EP93XX - -struct ep93xx_eth_data { - unsigned char dev_addr[6]; - unsigned char phy_id; -}; - -#endif diff --git a/include/linux/platform_data/keypad-ep93xx.h b/include/linux/platform_data/keypad-ep93xx.h deleted file mode 100644 index 3054fced8509..000000000000 --- a/include/linux/platform_data/keypad-ep93xx.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __KEYPAD_EP93XX_H -#define __KEYPAD_EP93XX_H - -struct matrix_keymap_data; - -/* flags for the ep93xx_keypad driver */ -#define EP93XX_KEYPAD_DISABLE_3_KEY (1<<0) /* disable 3-key reset */ -#define EP93XX_KEYPAD_DIAG_MODE (1<<1) /* diagnostic mode */ -#define EP93XX_KEYPAD_BACK_DRIVE (1<<2) /* back driving mode */ -#define EP93XX_KEYPAD_TEST_MODE (1<<3) /* scan only column 0 */ -#define EP93XX_KEYPAD_AUTOREPEAT (1<<4) /* enable key autorepeat */ - -/** - * struct ep93xx_keypad_platform_data - platform specific device structure - * @keymap_data: pointer to &matrix_keymap_data - * @debounce: debounce start count; terminal count is 0xff - * @prescale: row/column counter pre-scaler load value - * @flags: see above - */ -struct ep93xx_keypad_platform_data { - struct matrix_keymap_data *keymap_data; - unsigned int debounce; - unsigned int prescale; - unsigned int flags; - unsigned int clk_rate; -}; - -#define EP93XX_MATRIX_ROWS (8) -#define EP93XX_MATRIX_COLS (8) - -#endif /* __KEYPAD_EP93XX_H */ diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h index 142c33a2d7db..3e6cf2b25a97 100644 --- a/include/linux/soc/cirrus/ep93xx.h +++ b/include/linux/soc/cirrus/ep93xx.h @@ -2,7 +2,6 @@ #ifndef _SOC_EP93XX_H #define _SOC_EP93XX_H -struct platform_device; struct regmap; struct spinlock_t; @@ -36,16 +35,4 @@ struct ep93xx_regmap_adev { #define to_ep93xx_regmap_adev(_adev) \ container_of((_adev), struct ep93xx_regmap_adev, adev) -#ifdef CONFIG_ARCH_EP93XX -int ep93xx_i2s_acquire(void); -void ep93xx_i2s_release(void); -unsigned int ep93xx_chip_revision(void); - -#else -static inline int ep93xx_i2s_acquire(void) { return 0; } -static inline void ep93xx_i2s_release(void) {} -static inline unsigned int ep93xx_chip_revision(void) { return 0; } - -#endif - #endif -- cgit v1.2.3 From a015b1828653b591de0aa5303c0dbc4235935f94 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 9 Sep 2024 11:11:03 +0300 Subject: dmaengine: cirrus: remove platform code Remove DMA platform header, from now on we use device tree for DMA clients. Acked-by: Vinod Koul Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Signed-off-by: Arnd Bergmann --- include/linux/platform_data/dma-ep93xx.h | 100 ------------------------------- 1 file changed, 100 deletions(-) delete mode 100644 include/linux/platform_data/dma-ep93xx.h (limited to 'include') diff --git a/include/linux/platform_data/dma-ep93xx.h b/include/linux/platform_data/dma-ep93xx.h deleted file mode 100644 index 9ec5cdd5a1eb..000000000000 --- a/include/linux/platform_data/dma-ep93xx.h +++ /dev/null @@ -1,100 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_ARCH_DMA_H -#define __ASM_ARCH_DMA_H - -#include -#include -#include -#include -#include -#include - -/* - * M2P channels. - * - * Note that these values are also directly used for setting the PPALLOC - * register. - */ -#define EP93XX_DMA_I2S1 0 -#define EP93XX_DMA_I2S2 1 -#define EP93XX_DMA_AAC1 2 -#define EP93XX_DMA_AAC2 3 -#define EP93XX_DMA_AAC3 4 -#define EP93XX_DMA_I2S3 5 -#define EP93XX_DMA_UART1 6 -#define EP93XX_DMA_UART2 7 -#define EP93XX_DMA_UART3 8 -#define EP93XX_DMA_IRDA 9 -/* M2M channels */ -#define EP93XX_DMA_SSP 10 -#define EP93XX_DMA_IDE 11 - -/** - * struct ep93xx_dma_data - configuration data for the EP93xx dmaengine - * @port: peripheral which is requesting the channel - * @direction: TX/RX channel - * @name: optional name for the channel, this is displayed in /proc/interrupts - * - * This information is passed as private channel parameter in a filter - * function. Note that this is only needed for slave/cyclic channels. For - * memcpy channels %NULL data should be passed. - */ -struct ep93xx_dma_data { - int port; - enum dma_transfer_direction direction; - const char *name; -}; - -/** - * struct ep93xx_dma_chan_data - platform specific data for a DMA channel - * @name: name of the channel, used for getting the right clock for the channel - * @base: mapped registers - * @irq: interrupt number used by this channel - */ -struct ep93xx_dma_chan_data { - const char *name; - void __iomem *base; - int irq; -}; - -/** - * struct ep93xx_dma_platform_data - platform data for the dmaengine driver - * @channels: array of channels which are passed to the driver - * @num_channels: number of channels in the array - * - * This structure is passed to the DMA engine driver via platform data. For - * M2P channels, contract is that even channels are for TX and odd for RX. - * There is no requirement for the M2M channels. - */ -struct ep93xx_dma_platform_data { - struct ep93xx_dma_chan_data *channels; - size_t num_channels; -}; - -static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan) -{ - if (device_is_compatible(chan->device->dev, "cirrus,ep9301-dma-m2p")) - return true; - - return !strcmp(dev_name(chan->device->dev), "ep93xx-dma-m2p"); -} - -/** - * ep93xx_dma_chan_direction - returns direction the channel can be used - * @chan: channel - * - * This function can be used in filter functions to find out whether the - * channel supports given DMA direction. Only M2P channels have such - * limitation, for M2M channels the direction is configurable. - */ -static inline enum dma_transfer_direction -ep93xx_dma_chan_direction(struct dma_chan *chan) -{ - if (!ep93xx_dma_chan_is_m2p(chan)) - return DMA_TRANS_NONE; - - /* even channels are for TX, odd for RX */ - return (chan->chan_id % 2 == 0) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM; -} - -#endif /* __ASM_ARCH_DMA_H */ -- cgit v1.2.3 From 8d8081cecfb9940beeb4a8a700db34e615a96056 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 5 Sep 2024 15:35:46 -0700 Subject: cxl: Move mailbox related bits to the same context Create a new 'struct cxl_mailbox' and move all mailbox related bits to it. This allows isolation of all CXL mailbox data in order to export some of the calls to external kernel callers and avoid exporting of CXL driver specific bits such has device states. The allocation of 'struct cxl_mailbox' is also split out with cxl_mailbox_init() so the mailbox can be created independently. Reviewed-by: Jonathan Cameron Reviewed-by: Alejandro Lucero Reviewed-by: Fan Ni Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20240905223711.1990186-3-dave.jiang@intel.com Signed-off-by: Dave Jiang --- include/cxl/mailbox.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/cxl/mailbox.h (limited to 'include') diff --git a/include/cxl/mailbox.h b/include/cxl/mailbox.h new file mode 100644 index 000000000000..bacd111e75f1 --- /dev/null +++ b/include/cxl/mailbox.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation. */ +#ifndef __CXL_MBOX_H__ +#define __CXL_MBOX_H__ +#include + +struct cxl_mbox_cmd; + +/** + * struct cxl_mailbox - context for CXL mailbox operations + * @host: device that hosts the mailbox + * @payload_size: Size of space for payload + * (CXL 3.1 8.2.8.4.3 Mailbox Capabilities Register) + * @mbox_mutex: mutex protects device mailbox and firmware + * @mbox_wait: rcuwait for mailbox + * @mbox_send: @dev specific transport for transmitting mailbox commands + */ +struct cxl_mailbox { + struct device *host; + size_t payload_size; + struct mutex mbox_mutex; /* lock to protect mailbox context */ + struct rcuwait mbox_wait; + int (*mbox_send)(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *cmd); +}; + +int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host); + +#endif -- cgit v1.2.3 From 7cc2a6eadcd7a5aa36ac63e6659f5c6138c7f4d2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 11 Sep 2024 13:56:08 -0600 Subject: io_uring: add IORING_REGISTER_COPY_BUFFERS method Buffers can get registered with io_uring, which allows to skip the repeated pin_pages, unpin/unref pages for each O_DIRECT operation. This reduces the overhead of O_DIRECT IO. However, registrering buffers can take some time. Normally this isn't an issue as it's done at initialization time (and hence less critical), but for cases where rings can be created and destroyed as part of an IO thread pool, registering the same buffers for multiple rings become a more time sensitive proposition. As an example, let's say an application has an IO memory pool of 500G. Initial registration takes: Got 500 huge pages (each 1024MB) Registered 500 pages in 409 msec or about 0.4 seconds. If we go higher to 900 1GB huge pages being registered: Registered 900 pages in 738 msec which is, as expected, a fully linear scaling. Rather than have each ring pin/map/register the same buffer pool, provide an io_uring_register(2) opcode to simply duplicate the buffers that are registered with another ring. Adding the same 900GB of registered buffers to the target ring can then be accomplished in: Copied 900 pages in 17 usec While timing differs a bit, this provides around a 25,000-40,000x speedup for this use case. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index a275f91d2ac0..9dc5bb428c8a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -609,6 +609,9 @@ enum io_uring_register_op { IORING_REGISTER_CLOCK = 29, + /* copy registered buffers from source ring to current ring */ + IORING_REGISTER_COPY_BUFFERS = 30, + /* this goes last */ IORING_REGISTER_LAST, @@ -694,6 +697,16 @@ struct io_uring_clock_register { __u32 __resv[3]; }; +enum { + IORING_REGISTER_SRC_REGISTERED = 1, +}; + +struct io_uring_copy_buffers { + __u32 src_fd; + __u32 flags; + __u32 pad[6]; +}; + struct io_uring_buf { __u64 addr; __u32 len; -- cgit v1.2.3 From 433d7ce2d86d21274838c9e8c796f4232cd13cdb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 6 Aug 2024 15:38:12 -0700 Subject: security,bpf: constify struct path in bpf_token_create() LSM hook There is no reason why struct path pointer shouldn't be const-qualified when being passed into bpf_token_create() LSM hook. Add that const. Acked-by: Paul Moore (LSM/SELinux) Suggested-by: Al Viro Signed-off-by: Andrii Nakryiko --- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 855db460e08b..462b55378241 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -431,7 +431,7 @@ LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr, struct bpf_token *token) LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog) LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr, - struct path *path) + const struct path *path) LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token) LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd) LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap) diff --git a/include/linux/security.h b/include/linux/security.h index 1390f1efb4f0..31523a2c71c4 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2137,7 +2137,7 @@ extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, struct bpf_token *token); extern void security_bpf_prog_free(struct bpf_prog *prog); extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path); + const struct path *path); extern void security_bpf_token_free(struct bpf_token *token); extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd); extern int security_bpf_token_capable(const struct bpf_token *token, int cap); @@ -2177,7 +2177,7 @@ static inline void security_bpf_prog_free(struct bpf_prog *prog) { } static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path) + const struct path *path) { return 0; } -- cgit v1.2.3 From b2155807893aac40f1a1cdf43f7fcc270cbfc05a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Sep 2024 17:21:42 -0700 Subject: uapi: libc-compat: remove ipx leftovers The uAPI headers for IPX were deleted 3 years ago in commit 6c9b40844751 ("net: Remove net/ipx.h and uapi/linux/ipx.h header files") Delete the leftover defines from libc-compat.h Link: https://patch.msgid.link/20240911002142.1508694-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/libc-compat.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 8254c937c9f4..0eca95ccb41e 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -140,25 +140,6 @@ #endif /* _NETINET_IN_H */ -/* Coordinate with glibc netipx/ipx.h header. */ -#if defined(__NETIPX_IPX_H) - -#define __UAPI_DEF_SOCKADDR_IPX 0 -#define __UAPI_DEF_IPX_ROUTE_DEFINITION 0 -#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 0 -#define __UAPI_DEF_IPX_CONFIG_DATA 0 -#define __UAPI_DEF_IPX_ROUTE_DEF 0 - -#else /* defined(__NETIPX_IPX_H) */ - -#define __UAPI_DEF_SOCKADDR_IPX 1 -#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 -#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 -#define __UAPI_DEF_IPX_CONFIG_DATA 1 -#define __UAPI_DEF_IPX_ROUTE_DEF 1 - -#endif /* defined(__NETIPX_IPX_H) */ - /* Definitions for xattr.h */ #if defined(_SYS_XATTR_H) #define __UAPI_DEF_XATTR 0 @@ -240,23 +221,6 @@ #define __UAPI_DEF_IP6_MTUINFO 1 #endif -/* Definitions for ipx.h */ -#ifndef __UAPI_DEF_SOCKADDR_IPX -#define __UAPI_DEF_SOCKADDR_IPX 1 -#endif -#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION -#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 -#endif -#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION -#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 -#endif -#ifndef __UAPI_DEF_IPX_CONFIG_DATA -#define __UAPI_DEF_IPX_CONFIG_DATA 1 -#endif -#ifndef __UAPI_DEF_IPX_ROUTE_DEF -#define __UAPI_DEF_IPX_ROUTE_DEF 1 -#endif - /* Definitions for xattr.h */ #ifndef __UAPI_DEF_XATTR #define __UAPI_DEF_XATTR 1 -- cgit v1.2.3 From da2f660b3ba1be33310452959ab72d1d7ce39350 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 11 Sep 2024 13:17:46 -0700 Subject: net/mlx5: fs, make get_root_namespace API function As preparation for HW Steering support, where the function get_root_namespace() is needed to get root FDB, make it an API function and rename it to mlx5_get_root_namespace(). Reviewed-by: Yevgeny Kliteynik Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Reviewed-by: Jacob Keller Reviewed-by: Kalesh AP Link: https://patch.msgid.link/20240911201757.1505453-5-saeed@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/mlx5/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 3fb428ce7d1c..b744e554f014 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -342,4 +342,7 @@ void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, struct mlx5_pkt_reformat *reformat); u32 mlx5_flow_table_id(struct mlx5_flow_table *ft); + +struct mlx5_flow_root_namespace * +mlx5_get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type); #endif -- cgit v1.2.3 From 9947204cdad97d22d171039019a4aad4d6899cdd Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 11 Sep 2024 13:17:51 -0700 Subject: net/mlx5: Add device cap for supporting hot reset in sync reset flow New devices with new FW can support sync reset for firmware activate using hot reset. Add capability for supporting it and add MFRL field to query from FW which type of PCI reset method to use while handling sync reset events. Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20240911201757.1505453-10-saeed@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2045575b70d4..620a5c305123 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1856,7 +1856,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_328[0x2]; u8 relaxed_ordering_read[0x1]; u8 log_max_pd[0x5]; - u8 reserved_at_330[0x6]; + u8 reserved_at_330[0x5]; + u8 pcie_reset_using_hotreset_method[0x1]; u8 pci_sync_for_fw_update_with_driver_unload[0x1]; u8 vnic_env_cnt_steering_fail[0x1]; u8 vport_counter_local_loopback[0x1]; @@ -11188,6 +11189,11 @@ struct mlx5_ifc_mcda_reg_bits { u8 data[][0x20]; }; +enum { + MLX5_MFRL_REG_PCI_RESET_METHOD_LINK_TOGGLE = 0, + MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET = 1, +}; + enum { MLX5_MFRL_REG_RESET_STATE_IDLE = 0, MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION = 1, @@ -11215,7 +11221,8 @@ struct mlx5_ifc_mfrl_reg_bits { u8 pci_sync_for_fw_update_start[0x1]; u8 pci_sync_for_fw_update_resp[0x2]; u8 rst_type_sel[0x3]; - u8 reserved_at_28[0x4]; + u8 pci_reset_req_method[0x3]; + u8 reserved_at_2b[0x1]; u8 reset_state[0x4]; u8 reset_type[0x8]; u8 reset_level[0x8]; -- cgit v1.2.3 From 5bd877093fd0b2e9e5f0c03b466669f761b5849c Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 11 Sep 2024 13:17:55 -0700 Subject: net/mlx5: Add NOT_READY command return status Add a new command status MLX5_CMD_STAT_NOT_READY to handle cases where the firmware is not ready. Signed-off-by: Shay Drory Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Reviewed-by: Jacob Keller Reviewed-by: Kalesh AP Link: https://patch.msgid.link/20240911201757.1505453-14-saeed@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/mlx5/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index a94bc9e3af96..d0f7d1f36c5e 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1449,6 +1449,7 @@ enum { MLX5_CMD_STAT_BAD_SYS_STATE_ERR = 0x4, MLX5_CMD_STAT_BAD_RES_ERR = 0x5, MLX5_CMD_STAT_RES_BUSY = 0x6, + MLX5_CMD_STAT_NOT_READY = 0x7, MLX5_CMD_STAT_LIM_ERR = 0x8, MLX5_CMD_STAT_BAD_RES_STATE_ERR = 0x9, MLX5_CMD_STAT_IX_ERR = 0xa, -- cgit v1.2.3 From 52fa3b6532ec6f3a1e39bf869b304d3560dd983b Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 13 Sep 2024 03:28:24 +0000 Subject: memory-provider: fix compilation issue without SYSFS When CONFIG_SYSFS is not set, the kernel fails to compile: net/core/page_pool_user.c:368:45: error: implicit declaration of function 'get_netdev_rx_queue_index' [-Werror=implicit-function-declaration] 368 | if (pool->slow.queue_idx == get_netdev_rx_queue_index(rxq)) { | ^~~~~~~~~~~~~~~~~~~~~~~~~ When CONFIG_SYSFS is not set, get_netdev_rx_queue_index() is not defined as well. Fix by removing the ifdef around get_netdev_rx_queue_index(). It is not needed anymore after commit e817f85652c1 ("xdp: generic XDP handling of xdp_rxq_info") removed most of the CONFIG_SYSFS ifdefs. Fixes: 0f9214046893 ("memory-provider: dmabuf devmem memory provider") Cc: Matthieu Baerts (NGI0) Signed-off-by: Mina Almasry Link: https://patch.msgid.link/20240913032824.2117095-1-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/net/netdev_rx_queue.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index ac34f5fb4f71..596836abf7bf 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -45,7 +45,6 @@ __netif_get_rx_queue(struct net_device *dev, unsigned int rxq) return dev->_rx + rxq; } -#ifdef CONFIG_SYSFS static inline unsigned int get_netdev_rx_queue_index(struct netdev_rx_queue *queue) { @@ -55,7 +54,6 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue) BUG_ON(index >= dev->num_rx_queues); return index; } -#endif int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq); -- cgit v1.2.3 From 907936b6f4e630718cc31ddea79cc76a3e32080a Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Mon, 9 Sep 2024 13:05:04 +0300 Subject: net/mlx5: Handle memory scheme ODP capabilities When running over new FW that supports the new memory scheme ODP, set the cap in the FW to signal the FW we are working in the new scheme. In the memory scheme ODP the per_transport_service capabilities are RO for the driver so we skip their setting. Signed-off-by: Michael Guralnik Link: https://patch.msgid.link/20240909100504.29797-9-michaelgur@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 154095256d0d..57c9b18c3adb 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1389,9 +1389,13 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap) -#define MLX5_CAP_ODP_SCHEME(mdev, cap) \ - MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, \ - transport_page_fault_scheme_cap.cap) +#define MLX5_CAP_ODP_SCHEME(mdev, cap) \ + (MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, \ + mem_page_fault) ? \ + MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, \ + memory_page_fault_scheme_cap.cap) : \ + MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, \ + transport_page_fault_scheme_cap.cap)) #define MLX5_CAP_ODP_MAX(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->max, cap) -- cgit v1.2.3 From 8d159eb2117b2e3697a31785662b653938f007cb Mon Sep 17 00:00:00 2001 From: Chiara Meiohas Date: Mon, 9 Sep 2024 20:30:23 +0300 Subject: RDMA/mlx5: Use IB set_netdev and get_netdev functions The IB layer provides a common interface to store and get net devices associated to an IB device port (ib_device_set_netdev() and ib_device_get_netdev()). Previously, mlx5_ib stored and managed the associated net devices internally. Replace internal net device management in mlx5_ib with ib_device_set_netdev() when attaching/detaching a net device and ib_device_get_netdev() when retrieving the net device. Export ib_device_get_netdev(). For mlx5 representors/PFs/VFs and lag creation we replace the netdev assignments with the IB set/get netdev functions. In active-backup mode lag the active slave net device is stored in the lag itself. To assure the net device stored in a lag bond IB device is the active slave we implement the following: - mlx5_core: when modifying the slave of a bond we send the internal driver event MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE. - mlx5_ib: when catching the event call ib_device_set_netdev() This patch also ensures the correct IB events are sent in switchdev lag. While at it, when in multiport eswitch mode, only a single IB device is created for all ports. The said IB device will receive all netdev events of its VFs once loaded, thus to avoid overwriting the mapping of PF IB device to PF netdev, ignore NETDEV_REGISTER events if the ib device has already been mapped to a netdev. Signed-off-by: Chiara Meiohas Signed-off-by: Michael Guralnik Link: https://patch.msgid.link/20240909173025.30422-6-michaelgur@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 1 + include/linux/mlx5/driver.h | 2 +- include/rdma/ib_verbs.h | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 57c9b18c3adb..dac33cfe9c0c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -371,6 +371,7 @@ enum mlx5_driver_event { MLX5_DRIVER_EVENT_SF_PEER_DEVLINK, MLX5_DRIVER_EVENT_AFFILIATION_DONE, MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, + MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE, }; enum { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a96438ded15f..46a7a3d11048 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -643,6 +643,7 @@ struct mlx5_priv { struct mlx5_sf_hw_table *sf_hw_table; struct mlx5_sf_table *sf_table; #endif + struct blocking_notifier_head lag_nh; }; enum mlx5_device_state { @@ -1181,7 +1182,6 @@ bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev); bool mlx5_lag_is_master(struct mlx5_core_dev *dev); bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev); bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev); -struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, struct net_device *slave); int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a1dcf812d787..aa8ede439905 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4453,6 +4453,8 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u32 port, const struct sockaddr *addr); int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, unsigned int port); +struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, + u32 port); struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata); -- cgit v1.2.3 From 9cbed5aab5aeea420d0aa945733bf608449d44fb Mon Sep 17 00:00:00 2001 From: Chiara Meiohas Date: Mon, 9 Sep 2024 20:30:24 +0300 Subject: RDMA/nldev: Add support for RDMA monitoring Introduce a new netlink command to allow rdma event monitoring. The rdma events supported now are IB device registration/unregistration and net device attachment/detachment. Example output of rdma monitor and the commands which trigger the events: $ rdma monitor $ rmmod mlx5_ib [UNREGISTER] dev 1 rocep8s0f1 [UNREGISTER] dev 0 rocep8s0f0 $ modprobe mlx5_ib [REGISTER] dev 2 mlx5_0 [NETDEV_ATTACH] dev 2 mlx5_0 port 1 netdev 4 eth2 [REGISTER] dev 3 mlx5_1 [NETDEV_ATTACH] dev 3 mlx5_1 port 1 netdev 5 eth3 $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev [UNREGISTER] dev 2 rocep8s0f0 [REGISTER] dev 4 mlx5_0 [NETDEV_ATTACH] dev 4 mlx5_0 port 30 netdev 4 eth2 $ echo 4 > /sys/class/net/eth2/device/sriov_numvfs [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 2 netdev 7 eth4 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 3 netdev 8 eth5 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 4 netdev 9 eth6 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 5 netdev 10 eth7 [REGISTER] dev 5 mlx5_0 [NETDEV_ATTACH] dev 5 mlx5_0 port 1 netdev 11 eth8 [REGISTER] dev 6 mlx5_0 [NETDEV_ATTACH] dev 6 mlx5_0 port 1 netdev 12 eth9 [REGISTER] dev 7 mlx5_0 [NETDEV_ATTACH] dev 7 mlx5_0 port 1 netdev 13 eth10 [REGISTER] dev 8 mlx5_0 [NETDEV_ATTACH] dev 8 mlx5_0 port 1 netdev 14 eth11 $ echo 0 > /sys/class/net/eth2/device/sriov_numvfs [UNREGISTER] dev 5 rocep8s0f0v0 [UNREGISTER] dev 6 rocep8s0f0v1 [UNREGISTER] dev 7 rocep8s0f0v2 [UNREGISTER] dev 8 rocep8s0f0v3 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 2 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 3 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 4 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 5 Signed-off-by: Chiara Meiohas Signed-off-by: Michael Guralnik Link: https://patch.msgid.link/20240909173025.30422-7-michaelgur@nvidia.com Signed-off-by: Leon Romanovsky --- include/rdma/rdma_netlink.h | 12 ++++++++++++ include/uapi/rdma/rdma_netlink.h | 15 +++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index c2a79aeee113..326deaf56d5d 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -6,6 +6,8 @@ #include #include +struct ib_device; + enum { RDMA_NLDEV_ATTR_EMPTY_STRING = 1, RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16, @@ -110,6 +112,16 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb, */ bool rdma_nl_chk_listeners(unsigned int group); +/** + * Prepare and send an event message + * @ib: the IB device which triggered the event + * @port_num: the port number which triggered the event - 0 if unused + * @type: the event type + * Returns 0 on success or a negative error code + */ +int rdma_nl_notify_event(struct ib_device *ib, u32 port_num, + enum rdma_nl_notify_event_type type); + struct rdma_link_ops { struct list_head list; const char *type; diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 2f37568f5556..5f9636d26050 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -15,6 +15,7 @@ enum { enum { RDMA_NL_GROUP_IWPM = 2, RDMA_NL_GROUP_LS, + RDMA_NL_GROUP_NOTIFY, RDMA_NL_NUM_GROUPS }; @@ -305,6 +306,8 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_DELDEV, + RDMA_NLDEV_CMD_MONITOR, + RDMA_NLDEV_NUM_OPS }; @@ -574,6 +577,8 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE, /* u8 */ + RDMA_NLDEV_ATTR_EVENT_TYPE, /* u8 */ + /* * Always the end */ @@ -624,4 +629,14 @@ enum rdma_nl_name_assign_type { RDMA_NAME_ASSIGN_TYPE_USER = 1, /* Provided by user-space */ }; +/* + * Supported rdma monitoring event types. + */ +enum rdma_nl_notify_event_type { + RDMA_REGISTER_EVENT, + RDMA_UNREGISTER_EVENT, + RDMA_NETDEV_ATTACH_EVENT, + RDMA_NETDEV_DETACH_EVENT, +}; + #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 12fb1153c53bf9b53e299c9775b84fa7838640f7 Mon Sep 17 00:00:00 2001 From: Chiara Meiohas Date: Mon, 9 Sep 2024 20:30:25 +0300 Subject: RDMA/nldev: Expose whether RDMA monitoring is supported Extend the "rdma sys" command to display whether RDMA monitoring is supported. RDMA monitoring is not supported in mlx4 because it does not use the ib_device_set_netdev() API, which sends the RDMA events. Example output for kernel where monitoring is supported: $ rdma sys show netns shared privileged-qkey off monitor on copy-on-fork on Example output for kernel where monitoring is not supported: $ rdma sys show netns shared privileged-qkey off monitor off copy-on-fork on Signed-off-by: Chiara Meiohas Signed-off-by: Michael Guralnik Link: https://patch.msgid.link/20240909173025.30422-8-michaelgur@nvidia.com Signed-off-by: Leon Romanovsky --- include/uapi/rdma/rdma_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 5f9636d26050..39be09c0ffbb 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -579,6 +579,7 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_EVENT_TYPE, /* u8 */ + RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, /* u8 */ /* * Always the end */ -- cgit v1.2.3 From 96f8052822e03c6f49b6b28fc1d6e5e0522ecbb9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 12 Sep 2024 15:39:54 -0700 Subject: locking/mutex: Define mutex_init() once With CONFIG_PREEMPT_RT disabled __mutex_init() is a function. With CONFIG_PREEMPT_RT enabled, __mutex_init() is a macro. I assume this is why mutex_init() is defined twice as exactly the same macro. Prepare for introducing a new macro for mutex initialization by combining the two identical mutex_init() definitions into a single definition. This patch does not change any functionality because the C preprocessor expands macros when it encounters the macro name and not when a macro definition is encountered. See also commit bb630f9f7a7d ("locking/rtmutex: Add mutex variant for RT"). Acked-by: Peter Zijlstra (Intel) Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240912223956.3554086-2-bvanassche@acm.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mutex.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index a561c629d89f..ef617089db19 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -49,7 +49,6 @@ static inline void mutex_destroy(struct mutex *lock) {} #endif -#ifndef CONFIG_PREEMPT_RT /** * mutex_init - initialize the mutex * @mutex: the mutex to be initialized @@ -65,6 +64,7 @@ do { \ __mutex_init((mutex), #mutex, &__key); \ } while (0) +#ifndef CONFIG_PREEMPT_RT #define __MUTEX_INITIALIZER(lockname) \ { .owner = ATOMIC_LONG_INIT(0) \ , .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ @@ -111,12 +111,6 @@ do { \ __mutex_rt_init((mutex), name, key); \ } while (0) -#define mutex_init(mutex) \ -do { \ - static struct lock_class_key __key; \ - \ - __mutex_init((mutex), #mutex, &__key); \ -} while (0) #endif /* CONFIG_PREEMPT_RT */ #ifdef CONFIG_DEBUG_MUTEXES -- cgit v1.2.3 From e837d833a13461c10f265a65ce6612e6dd43e76f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 12 Sep 2024 15:39:55 -0700 Subject: locking/mutex: Introduce mutex_init_with_key() The following pattern occurs 5 times in kernel drivers: lockdep_register_key(key); __mutex_init(mutex, name, key); In several cases the 'name' argument matches #mutex. Hence, introduce the mutex_init_with_key() macro. This macro derives the 'name' argument from the 'mutex' argument. Suggested-by: Andy Shevchenko Acked-by: Peter Zijlstra (Intel) Reviewed-by: Andy Shevchenko Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240912223956.3554086-3-bvanassche@acm.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mutex.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index ef617089db19..2bf91b57591b 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -64,6 +64,17 @@ do { \ __mutex_init((mutex), #mutex, &__key); \ } while (0) +/** + * mutex_init_with_key - initialize a mutex with a given lockdep key + * @mutex: the mutex to be initialized + * @key: the lockdep key to be associated with the mutex + * + * Initialize the mutex to the unlocked state. + * + * It is not allowed to initialize an already locked mutex. + */ +#define mutex_init_with_key(mutex, key) __mutex_init((mutex), #mutex, (key)) + #ifndef CONFIG_PREEMPT_RT #define __MUTEX_INITIALIZER(lockname) \ { .owner = ATOMIC_LONG_INIT(0) \ -- cgit v1.2.3 From 4b7ff9ab98af11a477d50f08382bcc4c2f899926 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 13 Sep 2024 10:15:56 +0200 Subject: mm, slab: restore kerneldoc for kmem_cache_create() As kmem_cache_create() became a _Generic() wrapper macro, it currently has no kerneldoc despite being the main API to use. Add it. Also adjust kmem_cache_create_usercopy() kerneldoc to indicate it is now a legacy wrapper. Also expand the kerneldoc for struct kmem_cache_args, especially for the freeptr_offset field, where important details were removed with the removal of kmem_cache_create_rcu(). Signed-off-by: Vlastimil Babka Reviewed-by: Christian Brauner --- include/linux/slab.h | 114 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 91 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 331412a9f4f2..6a8ab7ef3af7 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -242,19 +242,72 @@ bool slab_is_available(void); /** * struct kmem_cache_args - Less common arguments for kmem_cache_create() - * @align: The required alignment for the objects. - * @useroffset: Usercopy region offset - * @usersize: Usercopy region size - * @freeptr_offset: Custom offset for the free pointer in RCU caches - * @use_freeptr_offset: Whether a @freeptr_offset is used - * @ctor: A constructor for the objects. + * + * Any uninitialized fields of the structure are interpreted as unused. The + * exception is @freeptr_offset where %0 is a valid value, so + * @use_freeptr_offset must be also set to %true in order to interpret the field + * as used. For @useroffset %0 is also valid, but only with non-%0 + * @usersize. + * + * When %NULL args is passed to kmem_cache_create(), it is equivalent to all + * fields unused. */ struct kmem_cache_args { + /** + * @align: The required alignment for the objects. + * + * %0 means no specific alignment is requested. + */ unsigned int align; + /** + * @useroffset: Usercopy region offset. + * + * %0 is a valid offset, when @usersize is non-%0 + */ unsigned int useroffset; + /** + * @usersize: Usercopy region size. + * + * %0 means no usercopy region is specified. + */ unsigned int usersize; + /** + * @freeptr_offset: Custom offset for the free pointer + * in &SLAB_TYPESAFE_BY_RCU caches + * + * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer + * outside of the object. This might cause the object to grow in size. + * Cache creators that have a reason to avoid this can specify a custom + * free pointer offset in their struct where the free pointer will be + * placed. + * + * Note that placing the free pointer inside the object requires the + * caller to ensure that no fields are invalidated that are required to + * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for + * details). + * + * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset + * is specified, %use_freeptr_offset must be set %true. + * + * Note that @ctor currently isn't supported with custom free pointers + * as a @ctor requires an external free pointer. + */ unsigned int freeptr_offset; + /** + * @use_freeptr_offset: Whether a @freeptr_offset is used. + */ bool use_freeptr_offset; + /** + * @ctor: A constructor for the objects. + * + * The constructor is invoked for each object in a newly allocated slab + * page. It is the cache user's responsibility to free object in the + * same state as after calling the constructor, or deal appropriately + * with any differences between a freshly constructed and a reallocated + * object. + * + * %NULL means no constructor. + */ void (*ctor)(void *); }; @@ -275,30 +328,20 @@ __kmem_cache_create(const char *name, unsigned int size, unsigned int align, } /** - * kmem_cache_create_usercopy - Create a cache with a region suitable - * for copying to userspace + * kmem_cache_create_usercopy - Create a kmem cache with a region suitable + * for copying to userspace. * @name: A string which is used in /proc/slabinfo to identify this cache. * @size: The size of objects to be created in this cache. * @align: The required alignment for the objects. * @flags: SLAB flags * @useroffset: Usercopy region offset * @usersize: Usercopy region size - * @ctor: A constructor for the objects. - * - * Cannot be called within a interrupt, but can be interrupted. - * The @ctor is run when new pages are allocated by the cache. - * - * The flags are + * @ctor: A constructor for the objects, or %NULL. * - * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) - * to catch references to uninitialised memory. - * - * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check - * for buffer overruns. - * - * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware - * cacheline. This can be beneficial if you're counting cycles as closely - * as davem. + * This is a legacy wrapper, new code should use either KMEM_CACHE_USERCOPY() + * if whitelisting a single field is sufficient, or kmem_cache_create() with + * the necessary parameters passed via the args parameter (see + * &struct kmem_cache_args) * * Return: a pointer to the cache on success, NULL on failure. */ @@ -333,6 +376,31 @@ __kmem_cache_default_args(const char *name, unsigned int size, return __kmem_cache_create_args(name, size, &kmem_default_args, flags); } +/** + * kmem_cache_create - Create a kmem cache. + * @__name: A string which is used in /proc/slabinfo to identify this cache. + * @__object_size: The size of objects to be created in this cache. + * @__args: Optional arguments, see &struct kmem_cache_args. Passing %NULL + * means defaults will be used for all the arguments. + * + * This is currently implemented as a macro using ``_Generic()`` to call + * either the new variant of the function, or a legacy one. + * + * The new variant has 4 parameters: + * ``kmem_cache_create(name, object_size, args, flags)`` + * + * See __kmem_cache_create_args() which implements this. + * + * The legacy variant has 5 parameters: + * ``kmem_cache_create(name, object_size, align, flags, ctor)`` + * + * The align and ctor parameters map to the respective fields of + * &struct kmem_cache_args + * + * Context: Cannot be called within a interrupt, but can be interrupted. + * + * Return: a pointer to the cache on success, NULL on failure. + */ #define kmem_cache_create(__name, __object_size, __args, ...) \ _Generic((__args), \ struct kmem_cache_args *: __kmem_cache_create_args, \ -- cgit v1.2.3 From 6a36d828bdef0e02b1e6c12e2160f5b83be6aab5 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 13 Sep 2024 02:09:55 +0100 Subject: driver core: attribute_container: Remove unused functions I can't find any use of 'attribute_container_add_class_device_adapter' or 'attribute_container_trigger' in git history. Their export decls went in 2006: commit 1740757e8f94 ("[PATCH] Driver Core: remove unused exports") and their docs disappeared in 2016: commit 47cb398dd75a ("Docs: sphinxify device-drivers.tmpl") Remove them. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20240913010955.1393995-1-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- include/linux/attribute_container.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h index e4004d1e6725..b3643de9931d 100644 --- a/include/linux/attribute_container.h +++ b/include/linux/attribute_container.h @@ -61,14 +61,8 @@ int attribute_container_device_trigger_safe(struct device *dev, int (*undo)(struct attribute_container *, struct device *, struct device *)); -void attribute_container_trigger(struct device *dev, - int (*fn)(struct attribute_container *, - struct device *)); int attribute_container_add_attrs(struct device *classdev); int attribute_container_add_class_device(struct device *classdev); -int attribute_container_add_class_device_adapter(struct attribute_container *cont, - struct device *dev, - struct device *classdev); void attribute_container_remove_attrs(struct device *classdev); void attribute_container_class_device_del(struct device *classdev); struct attribute_container *attribute_container_classdev_to_container(struct device *); -- cgit v1.2.3 From 7860df5b29945cfab40dd667f576af31401d7c43 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 13 Sep 2024 14:36:29 +0530 Subject: ASoC: sdw_util/intel: move soundwire endpoint and dai link structures Move Soundwire endpoint and dai link structures from Intel generic machine driver code to common place holder(soc_sdw_utils.h). These structures will be used in other platform SoundWire machine driver code. Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Link: https://patch.msgid.link/20240913090631.1834543-4-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index e366b7968c2d..e3482720a3eb 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -93,6 +93,27 @@ struct asoc_sdw_mc_private { int codec_info_list_count; }; +struct asoc_sdw_endpoint { + struct list_head list; + + u32 link_mask; + const char *codec_name; + const char *name_prefix; + bool include_sidecar; + + struct asoc_sdw_codec_info *codec_info; + const struct asoc_sdw_dai_info *dai_info; +}; + +struct asoc_sdw_dailink { + bool initialised; + + u8 group_id; + u32 link_mask[SNDRV_PCM_STREAM_LAST + 1]; + int num_devs[SNDRV_PCM_STREAM_LAST + 1]; + struct list_head endpoints; +}; + extern struct asoc_sdw_codec_info codec_info_list[]; int asoc_sdw_get_codec_info_list_count(void); -- cgit v1.2.3 From 13b24f84782d6c0373f62eb645353883d94d1dcd Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 13 Sep 2024 14:36:30 +0530 Subject: ASoC: sdw_utils/intel: move soundwire endpoint parsing helper functions Move SoundWire endpoint parsing helper functions to common place holder. These functions will be used by other platform machine driver code. Signed-off-by: Vijendar Mukunda Reviewed-by: Bard Liao Link: https://patch.msgid.link/20240913090631.1834543-5-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index e3482720a3eb..f68c1f193b3b 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -161,6 +161,16 @@ int asoc_sdw_init_simple_dai_link(struct device *dev, struct snd_soc_dai_link *d int (*init)(struct snd_soc_pcm_runtime *rtd), const struct snd_soc_ops *ops); +int asoc_sdw_count_sdw_endpoints(struct snd_soc_card *card, int *num_devs, int *num_ends); + +struct asoc_sdw_dailink *asoc_sdw_find_dailink(struct asoc_sdw_dailink *dailinks, + const struct snd_soc_acpi_endpoint *new); + +int asoc_sdw_parse_sdw_endpoints(struct snd_soc_card *card, + struct asoc_sdw_dailink *soc_dais, + struct asoc_sdw_endpoint *soc_ends, + int *num_devs); + int asoc_sdw_rtd_init(struct snd_soc_pcm_runtime *rtd); /* DMIC support */ -- cgit v1.2.3 From 2cb4acf2140be8a4f299c0b887cc314845ef6ec8 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 13 Sep 2024 07:11:42 -0700 Subject: hwmon: Remove devm_hwmon_device_unregister() API function devm_hwmon_device_unregister() has no in-tree user, and its implementation is wrong since it does not pass the to-be-removed hardware monitoring device as parameter. I do not envision a valid use for it; drivers needing it should not have called devm_hwmon_device_register_with_info() in the first place. Remove it. Reported-by: Matthew Sanders Closes: https://lore.kernel.org/linux-hwmon/488b3bdf870ea76c4b943dbe5fd15ac8113019dc.camel@kernel.org/ Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index e94314760aab..5c6a421ad580 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -481,7 +481,6 @@ devm_hwmon_device_register_with_info(struct device *dev, const struct attribute_group **extra_groups); void hwmon_device_unregister(struct device *dev); -void devm_hwmon_device_unregister(struct device *dev); int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel); -- cgit v1.2.3 From 6fd13b282f62bd840f2410692deaa23a42fd91fa Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 27 Aug 2024 17:14:18 +0200 Subject: random: vDSO: move prototype of arch chacha function to vdso/getrandom.h Having the prototype for __arch_chacha20_blocks_nostack in arch/x86/include/asm/vdso/getrandom.h meant that the prototype and large doc comment were cloned by every architecture, which has been causing unnecessary churn. Instead move it into include/vdso/getrandom.h, where it can be shared by all archs implementing it. As a side bonus, this then lets us use that prototype in the vdso_test_chacha self test, to ensure that it matches the source, and indeed doing so turned up some inconsistencies, which are rectified here. Suggested-by: Christophe Leroy Signed-off-by: Jason A. Donenfeld --- include/vdso/getrandom.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/vdso/getrandom.h b/include/vdso/getrandom.h index a8b7c14b0ae0..4cf02e678f5e 100644 --- a/include/vdso/getrandom.h +++ b/include/vdso/getrandom.h @@ -43,4 +43,17 @@ struct vgetrandom_state { bool in_use; }; +/** + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack. + * @dst_bytes: Destination buffer to hold @nblocks * 64 bytes of output. + * @key: 32-byte input key. + * @counter: 8-byte counter, read on input and updated on return. + * @nblocks: Number of blocks to generate. + * + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data + * leaking into forked child processes. + */ +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks); + #endif /* _VDSO_GETRANDOM_H */ -- cgit v1.2.3 From 7f053812dab3946cb704520b72c381f605ecdf95 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 27 Aug 2024 09:31:47 +0200 Subject: random: vDSO: minimize and simplify header includes Depending on the architecture, building a 32-bit vDSO on a 64-bit kernel is problematic when some system headers are included. Minimise the amount of headers by moving needed items, such as __{get,put}_unaligned_t, into dedicated common headers and in general use more specific headers, similar to what was done in commit 8165b57bca21 ("linux/const.h: Extract common header for vDSO") and commit 8c59ab839f52 ("lib/vdso: Enable common headers"). On some architectures this results in missing PAGE_SIZE, as was described by commit 8b3843ae3634 ("vdso/datapage: Quick fix - use asm/page-def.h for ARM64"), so define this if necessary, in the same way as done prior by commit cffaefd15a8f ("vdso: Use CONFIG_PAGE_SHIFT in vdso/datapage.h"). Removing linux/time64.h leads to missing 'struct timespec64' in x86's asm/pvclock.h. Add a forward declaration of that struct in that file. Signed-off-by: Christophe Leroy Signed-off-by: Jason A. Donenfeld --- include/asm-generic/unaligned.h | 11 +---------- include/vdso/helpers.h | 1 + include/vdso/unaligned.h | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 10 deletions(-) create mode 100644 include/vdso/unaligned.h (limited to 'include') diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h index a84c64e5f11e..95acdd70b3b2 100644 --- a/include/asm-generic/unaligned.h +++ b/include/asm-generic/unaligned.h @@ -8,16 +8,7 @@ */ #include #include - -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ -}) - -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x = (val); \ -} while (0) +#include #define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) #define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr)) diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h index 73501149439d..3ddb03bb05cb 100644 --- a/include/vdso/helpers.h +++ b/include/vdso/helpers.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ +#include #include static __always_inline u32 vdso_read_begin(const struct vdso_data *vd) diff --git a/include/vdso/unaligned.h b/include/vdso/unaligned.h new file mode 100644 index 000000000000..eee3d2a4dbe4 --- /dev/null +++ b/include/vdso/unaligned.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_UNALIGNED_H +#define __VDSO_UNALIGNED_H + +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x = (val); \ +} while (0) + +#endif /* __VDSO_UNALIGNED_H */ -- cgit v1.2.3 From 4d456f0c87c95efb6197a30cd76b5b6ee5fb012e Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sun, 1 Sep 2024 14:13:10 +0800 Subject: random: vDSO: add a __vdso_getrandom prototype for all architectures Without a prototype, we'll have to add a prototype for each architecture implementing vDSO getrandom. As most architectures will likely have the vDSO getrandom implemented in a near future, and we'd like to keep the declarations compatible everywhere (to ease the libc implementor work), we should really just have one copy of the prototype. This also is what's already done inside of include/vdso/gettime.h for those vDSO functions, so this continues that convention. Suggested-by: Huacai Chen Signed-off-by: Xi Ruoyao Acked-by: Huacai Chen [Jason: rewrite docbook comment for prototype.] Signed-off-by: Jason A. Donenfeld --- include/vdso/getrandom.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/vdso/getrandom.h b/include/vdso/getrandom.h index 4cf02e678f5e..6ca4d6de9e46 100644 --- a/include/vdso/getrandom.h +++ b/include/vdso/getrandom.h @@ -56,4 +56,19 @@ struct vgetrandom_state { */ extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks); +/** + * __vdso_getrandom - Architecture-specific vDSO implementation of getrandom() syscall. + * @buffer: Passed to __cvdso_getrandom(). + * @len: Passed to __cvdso_getrandom(). + * @flags: Passed to __cvdso_getrandom(). + * @opaque_state: Passed to __cvdso_getrandom(). + * @opaque_len: Passed to __cvdso_getrandom(); + * + * This function is implemented by making a single call to to __cvdso_getrandom(), whose + * documentation may be consulted for more information. + * + * Returns: The return value of __cvdso_getrandom(). + */ +extern ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); + #endif /* _VDSO_GETRANDOM_H */ -- cgit v1.2.3 From d175ee98fe545d2c56df22751314584cce228307 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 2 Sep 2024 21:17:18 +0200 Subject: mm: Define VM_DROPPABLE for powerpc/32 Commit 9651fcedf7b9 ("mm: add MAP_DROPPABLE for designating always lazily freeable mappings") only adds VM_DROPPABLE for 64 bits architectures. In order to also use the getrandom vDSO implementation on powerpc/32, use VM_ARCH_1 for VM_DROPPABLE on powerpc/32. This is possible because VM_ARCH_1 is used for VM_SAO on powerpc and VM_SAO is only for powerpc/64. It is used in combination with PROT_SAO in some parts of code that are restricted to CONFIG_PPC64 through #ifdefs, it is therefore possible to define VM_SAO for CONFIG_PPC64 only. Signed-off-by: Christophe Leroy Acked-by: Michael Ellerman Signed-off-by: Jason A. Donenfeld --- include/linux/mm.h | 4 +++- include/trace/events/mmflags.h | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6549d0979b28..028847f39442 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -359,7 +359,7 @@ extern unsigned int kobjsize(const void *objp); #if defined(CONFIG_X86) # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ -#elif defined(CONFIG_PPC) +#elif defined(CONFIG_PPC64) # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 @@ -409,6 +409,8 @@ extern unsigned int kobjsize(const void *objp); #ifdef CONFIG_64BIT #define VM_DROPPABLE_BIT 40 #define VM_DROPPABLE BIT(VM_DROPPABLE_BIT) +#elif defined(CONFIG_PPC32) +#define VM_DROPPABLE VM_ARCH_1 #else #define VM_DROPPABLE VM_NONE #endif diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index b63d211bd141..37265977d524 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -143,7 +143,7 @@ IF_HAVE_PG_ARCH_X(arch_3) #if defined(CONFIG_X86) #define __VM_ARCH_SPECIFIC_1 {VM_PAT, "pat" } -#elif defined(CONFIG_PPC) +#elif defined(CONFIG_PPC64) #define __VM_ARCH_SPECIFIC_1 {VM_SAO, "sao" } #elif defined(CONFIG_PARISC) #define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP, "growsup" } @@ -165,7 +165,7 @@ IF_HAVE_PG_ARCH_X(arch_3) # define IF_HAVE_UFFD_MINOR(flag, name) #endif -#ifdef CONFIG_64BIT +#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32) # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name}, #else # define IF_HAVE_VM_DROPPABLE(flag, name) -- cgit v1.2.3 From 49e2e353fb0dbef8dced3e8e65365580349c4b14 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Thu, 12 Sep 2024 07:27:37 +0800 Subject: ASoC: tas2781: Add Calibration Kcontrols for Chromebook Add calibration related kcontrol for speaker impedance calibration and speaker leakage check for Chromebook. Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20240911232739.1509-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781.h | 68 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'include') diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index dbda552398b5..8cd6da0480b7 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -49,12 +49,59 @@ /*I2C Checksum */ #define TASDEVICE_I2CChecksum TASDEVICE_REG(0x0, 0x0, 0x7E) +/* XM_340 */ +#define TASDEVICE_XM_A1_REG TASDEVICE_REG(0x64, 0x63, 0x3c) +/* XM_341 */ +#define TASDEVICE_XM_A2_REG TASDEVICE_REG(0x64, 0x63, 0x38) + /* Volume control */ #define TAS2563_DVC_LVL TASDEVICE_REG(0x00, 0x02, 0x0C) #define TAS2781_DVC_LVL TASDEVICE_REG(0x0, 0x0, 0x1A) #define TAS2781_AMP_LEVEL TASDEVICE_REG(0x0, 0x0, 0x03) #define TAS2781_AMP_LEVEL_MASK GENMASK(5, 1) +#define TAS2563_IDLE TASDEVICE_REG(0x00, 0x00, 0x3e) +#define TAS2563_PRM_R0_REG TASDEVICE_REG(0x00, 0x0f, 0x34) + +#define TAS2563_RUNTIME_RE_REG_TF TASDEVICE_REG(0x64, 0x02, 0x70) +#define TAS2563_RUNTIME_RE_REG TASDEVICE_REG(0x64, 0x02, 0x48) + +#define TAS2563_PRM_ENFF_REG TASDEVICE_REG(0x00, 0x0d, 0x54) +#define TAS2563_PRM_DISTCK_REG TASDEVICE_REG(0x00, 0x0d, 0x58) +#define TAS2563_PRM_TE_SCTHR_REG TASDEVICE_REG(0x00, 0x0f, 0x60) +#define TAS2563_PRM_PLT_FLAG_REG TASDEVICE_REG(0x00, 0x0d, 0x74) +#define TAS2563_PRM_SINEGAIN_REG TASDEVICE_REG(0x00, 0x0d, 0x7c) +/* prm_Int_B0 */ +#define TAS2563_TE_TA1_REG TASDEVICE_REG(0x00, 0x10, 0x0c) +/* prm_Int_A1 */ +#define TAS2563_TE_TA1_AT_REG TASDEVICE_REG(0x00, 0x10, 0x10) +/* prm_TE_Beta */ +#define TAS2563_TE_TA2_REG TASDEVICE_REG(0x00, 0x0f, 0x64) +/* prm_TE_Beta1 */ +#define TAS2563_TE_AT_REG TASDEVICE_REG(0x00, 0x0f, 0x68) +/* prm_TE_1_Beta1 */ +#define TAS2563_TE_DT_REG TASDEVICE_REG(0x00, 0x0f, 0x70) + +#define TAS2781_PRM_INT_MASK_REG TASDEVICE_REG(0x00, 0x00, 0x3b) +#define TAS2781_PRM_CLK_CFG_REG TASDEVICE_REG(0x00, 0x00, 0x5c) +#define TAS2781_PRM_RSVD_REG TASDEVICE_REG(0x00, 0x01, 0x19) +#define TAS2781_PRM_TEST_57_REG TASDEVICE_REG(0x00, 0xfd, 0x39) +#define TAS2781_PRM_TEST_62_REG TASDEVICE_REG(0x00, 0xfd, 0x3e) +#define TAS2781_PRM_PVDD_UVLO_REG TASDEVICE_REG(0x00, 0x00, 0x71) +#define TAS2781_PRM_CHNL_0_REG TASDEVICE_REG(0x00, 0x00, 0x03) +#define TAS2781_PRM_NG_CFG0_REG TASDEVICE_REG(0x00, 0x00, 0x35) +#define TAS2781_PRM_IDLE_CH_DET_REG TASDEVICE_REG(0x00, 0x00, 0x66) +#define TAS2781_PRM_PLT_FLAG_REG TASDEVICE_REG(0x00, 0x14, 0x38) +#define TAS2781_PRM_SINEGAIN_REG TASDEVICE_REG(0x00, 0x14, 0x40) +#define TAS2781_PRM_SINEGAIN2_REG TASDEVICE_REG(0x00, 0x14, 0x44) + +#define TAS2781_TEST_UNLOCK_REG TASDEVICE_REG(0x00, 0xFD, 0x0D) +#define TAS2781_TEST_PAGE_UNLOCK 0x0D + +#define TAS2781_RUNTIME_LATCH_RE_REG TASDEVICE_REG(0x00, 0x00, 0x49) +#define TAS2781_RUNTIME_RE_REG_TF TASDEVICE_REG(0x64, 0x62, 0x48) +#define TAS2781_RUNTIME_RE_REG TASDEVICE_REG(0x64, 0x63, 0x44) + #define TASDEVICE_CMD_SING_W 0x1 #define TASDEVICE_CMD_BURST 0x2 #define TASDEVICE_CMD_DELAY 0x3 @@ -70,7 +117,15 @@ enum device_catlog_id { OTHERS }; +struct bulk_reg_val { + int reg; + unsigned char val[4]; + unsigned char val_len; + bool is_locked; +}; + struct tasdevice { + struct bulk_reg_val *cali_data_backup; struct tasdevice_fw *cali_data_fmw; unsigned int dev_addr; unsigned int err_code; @@ -81,9 +136,19 @@ struct tasdevice { bool is_loaderr; }; +struct cali_reg { + unsigned int r0_reg; + unsigned int r0_low_reg; + unsigned int invr0_reg; + unsigned int pow_reg; + unsigned int tlimit_reg; +}; + struct calidata { unsigned char *data; unsigned long total_sz; + struct cali_reg cali_reg_array; + unsigned int cali_dat_sz_per_dev; }; struct tasdevice_priv { @@ -119,6 +184,7 @@ struct tasdevice_priv { bool force_fwload_status; bool playback_started; bool isacpi; + bool is_user_space_calidata; unsigned int global_addr; int (*fw_parse_variable_header)(struct tasdevice_priv *tas_priv, @@ -145,6 +211,8 @@ int tasdevice_init(struct tasdevice_priv *tas_priv); void tasdevice_remove(struct tasdevice_priv *tas_priv); int tasdevice_save_calibration(struct tasdevice_priv *tas_priv); void tasdevice_apply_calibration(struct tasdevice_priv *tas_priv); +int tasdev_chn_switch(struct tasdevice_priv *tas_priv, + unsigned short chn); int tasdevice_dev_read(struct tasdevice_priv *tas_priv, unsigned short chn, unsigned int reg, unsigned int *value); int tasdevice_dev_write(struct tasdevice_priv *tas_priv, -- cgit v1.2.3 From 2b018086143d638de8d67ae5be6e8c1afb413193 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 13 Sep 2024 11:28:46 -0700 Subject: blk-mq: unconditional nr_integrity_segments Always defining the field will make using it easier and less error prone in future patches. There shouldn't be any downside to this: the field fits in what would otherwise be a 2-byte hole, so we're not saving space by conditionally leaving it out. Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20240913182854.2445457-2-kbusch@meta.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8d304b1d16b1..4fecf46ef681 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -149,10 +149,7 @@ struct request { * physical address coalescing is performed. */ unsigned short nr_phys_segments; - -#ifdef CONFIG_BLK_DEV_INTEGRITY unsigned short nr_integrity_segments; -#endif #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct bio_crypt_ctx *crypt_ctx; -- cgit v1.2.3 From d2c5b1faccd5ef6352456f817e941945d3b3fe62 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 13 Sep 2024 11:28:50 -0700 Subject: block: provide a request helper for user integrity segments Provide a helper to keep the request flags and nr_integrity_segments in sync with the bio's integrity payload. This is an integrity equivalent to the normal data helper function, 'blk_rq_map_user()'. Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Kanchan Joshi Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20240913182854.2445457-6-kbusch@meta.com Signed-off-by: Jens Axboe --- include/linux/blk-integrity.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index de98049b7ded..793dbb1e0672 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -28,6 +28,8 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t, int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, struct scatterlist *); int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); +int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, + ssize_t bytes, u32 seed); static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) @@ -102,6 +104,13 @@ static inline int blk_rq_map_integrity_sg(struct request_queue *q, { return 0; } +static inline int blk_rq_integrity_map_user(struct request *rq, + void __user *ubuf, + ssize_t bytes, + u32 seed) +{ + return -EINVAL; +} static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) { return NULL; -- cgit v1.2.3 From 58b3ac2447c22189cd3a26ac1babb2dae13fd514 Mon Sep 17 00:00:00 2001 From: Nikunj Kela Date: Tue, 10 Sep 2024 09:26:37 -0700 Subject: dt-bindings: interrupt-controller: arm,gic: add ESPI and EPPI specifiers Extended SPI and extended PPI interrupts are in the range [0-1023] and [0-127] respectively, supported by GICv3.1. Qualcomm SA8255p platform uses extended SPI for SCMI 'a2p' doorbells. Signed-off-by: Nikunj Kela Link: https://lore.kernel.org/r/20240910162637.2382656-1-quic_nkela@quicinc.com Signed-off-by: Rob Herring (Arm) --- include/dt-bindings/interrupt-controller/arm-gic.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/interrupt-controller/arm-gic.h b/include/dt-bindings/interrupt-controller/arm-gic.h index 35b6f69b7db6..887f53363e8a 100644 --- a/include/dt-bindings/interrupt-controller/arm-gic.h +++ b/include/dt-bindings/interrupt-controller/arm-gic.h @@ -12,6 +12,8 @@ #define GIC_SPI 0 #define GIC_PPI 1 +#define GIC_ESPI 2 +#define GIC_EPPI 3 /* * Interrupt specifier cell 2. -- cgit v1.2.3 From 76c313f658d2752e8527610677164aa7094ef7a5 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 13 Sep 2024 12:17:46 -0700 Subject: blk-integrity: improved sg segment mapping Make the integrity mapping more like data mapping, blk_rq_map_sg. Use the request to validate the segment count, and update the callers so they don't have to. Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20240913191746.2628196-1-kbusch@meta.com Signed-off-by: Jens Axboe --- include/linux/blk-integrity.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 793dbb1e0672..676f8f860c47 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -25,8 +25,7 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t, } #ifdef CONFIG_BLK_DEV_INTEGRITY -int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, - struct scatterlist *); +int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, ssize_t bytes, u32 seed); @@ -98,8 +97,7 @@ static inline int blk_rq_count_integrity_sg(struct request_queue *q, { return 0; } -static inline int blk_rq_map_integrity_sg(struct request_queue *q, - struct bio *b, +static inline int blk_rq_map_integrity_sg(struct request *q, struct scatterlist *s) { return 0; -- cgit v1.2.3 From 32556ce93bc45c730829083cb60f95a2728ea48b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Sep 2024 21:17:48 +0200 Subject: bpf: Fix helper writes to read-only maps Lonial found an issue that despite user- and BPF-side frozen BPF map (like in case of .rodata), it was still possible to write into it from a BPF program side through specific helpers having ARG_PTR_TO_{LONG,INT} as arguments. In check_func_arg() when the argument is as mentioned, the meta->raw_mode is never set. Later, check_helper_mem_access(), under the case of PTR_TO_MAP_VALUE as register base type, it assumes BPF_READ for the subsequent call to check_map_access_type() and given the BPF map is read-only it succeeds. The helpers really need to be annotated as ARG_PTR_TO_{LONG,INT} | MEM_UNINIT when results are written into them as opposed to read out of them. The latter indicates that it's okay to pass a pointer to uninitialized memory as the memory is written to anyway. However, ARG_PTR_TO_{LONG,INT} is a special case of ARG_PTR_TO_FIXED_SIZE_MEM just with additional alignment requirement. So it is better to just get rid of the ARG_PTR_TO_{LONG,INT} special cases altogether and reuse the fixed size memory types. For this, add MEM_ALIGNED to additionally ensure alignment given these helpers write directly into the args via * = val. The .arg*_size has been initialized reflecting the actual sizeof(*). MEM_ALIGNED can only be used in combination with MEM_FIXED_SIZE annotated argument types, since in !MEM_FIXED_SIZE cases the verifier does not know the buffer size a priori and therefore cannot blindly write * = val. Fixes: 57c3bb725a3d ("bpf: Introduce ARG_PTR_TO_{INT,LONG} arg types") Reported-by: Lonial Con Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20240913191754.13290-3-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7f3e8477d5e7..0c3893c47171 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -695,6 +695,11 @@ enum bpf_type_flag { /* DYNPTR points to xdp_buff */ DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS), + /* Memory must be aligned on some architectures, used in combination with + * MEM_FIXED_SIZE. + */ + MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; @@ -732,8 +737,6 @@ enum bpf_arg_type { ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ - ARG_PTR_TO_INT, /* pointer to int */ - ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */ -- cgit v1.2.3 From 0cca961a026177af69044f10d6ae76d8ce043764 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 12 Sep 2024 11:00:25 +0530 Subject: PCI: Pass domain number to pci_bus_release_domain_nr() explicitly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pci_bus_release_domain_nr() API is supposed to free the domain number allocated by pci_bus_find_domain_nr(). Most of the callers of pci_bus_find_domain_nr(), store the domain number in pci_bus::domain_nr. As such, the pci_bus_release_domain_nr() implicitly frees the domain number by dereferencing 'struct pci_bus'. However, one of the callers of this API, the PCI endpoint subsystem, doesn't have 'struct pci_bus', so it only passes NULL. Due to this, the API will end up dereferencing the NULL pointer. To fix this issue, pass the domain number to this API explicitly. Since 'struct pci_bus' is not used for anything else other than extracting the domain number, it makes sense to pass the domain number directly. Fixes: 0328947c5032 ("PCI: endpoint: Assign PCI domain number for endpoint controllers") Closes: https://lore.kernel.org/linux-pci/c0c40ddb-bf64-4b22-9dd1-8dbb18aa2813@stanley.mountain Link: https://lore.kernel.org/linux-pci/20240912053025.25314-1-manivannan.sadhasivam@linaro.org Reported-by: Dan Carpenter Signed-off-by: Manivannan Sadhasivam [kwilczynski: commit log] Signed-off-by: Krzysztof Wilczyński --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4cf89a4b4cbc..37d97bef060f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1884,7 +1884,7 @@ static inline int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) { return 0; } #endif int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent); -void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent); +void pci_bus_release_domain_nr(struct device *parent, int domain_nr); #endif /* Some architectures require additional setup to direct VGA traffic */ -- cgit v1.2.3 From c951a29f6ba52b86223eb00bbcff43142d59a901 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 11 Sep 2024 12:37:43 +0300 Subject: net: fib_rules: Add DSCP selector attribute The FIB rule TOS selector is implemented differently between IPv4 and IPv6. In IPv4 it is used to match on the three "Type of Services" bits specified in RFC 791, while in IPv6 is it is used to match on the six DSCP bits specified in RFC 2474. Add a new FIB rule attribute to allow matching on DSCP. The attribute will be used to implement a 'dscp' selector in ip-rule with a consistent behavior between IPv4 and IPv6. For now, set the type of the attribute to 'NLA_REJECT' so that user space will not be able to configure it. This restriction will be lifted once both IPv4 and IPv6 support the new attribute. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://patch.msgid.link/20240911093748.3662015-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/fib_rules.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 232df14e1287..a6924dd3aff1 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -67,6 +67,7 @@ enum { FRA_IP_PROTO, /* ip proto */ FRA_SPORT_RANGE, /* sport */ FRA_DPORT_RANGE, /* dport */ + FRA_DSCP, /* dscp */ __FRA_MAX }; -- cgit v1.2.3 From 1b8c9cb3151a541a4197d2bb449233064f747832 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 12 Sep 2024 09:19:49 +0800 Subject: MIPS: Remove the obsoleted code for include/linux/mv643xx.h Most of the drivers which used this header have been deleted, most of these code is obsoleted, move the only defines that are actually used into arch/powerpc/platforms/chrp/pegasos_eth.c and delete the file completely. Signed-off-by: Gaosheng Cui Link: https://patch.msgid.link/20240912011949.2726928-1-cuigaosheng1@huawei.com Signed-off-by: Jakub Kicinski --- include/linux/mv643xx.h | 921 ------------------------------------------------ 1 file changed, 921 deletions(-) delete mode 100644 include/linux/mv643xx.h (limited to 'include') diff --git a/include/linux/mv643xx.h b/include/linux/mv643xx.h deleted file mode 100644 index 000b126acfb6..000000000000 --- a/include/linux/mv643xx.h +++ /dev/null @@ -1,921 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * mv643xx.h - MV-643XX Internal registers definition file. - * - * Copyright 2002 Momentum Computer, Inc. - * Author: Matthew Dharm - * Copyright 2002 GALILEO TECHNOLOGY, LTD. - */ -#ifndef __ASM_MV643XX_H -#define __ASM_MV643XX_H - -#include -#include -#include - -/****************************************/ -/* Processor Address Space */ -/****************************************/ - -/* DDR SDRAM BAR and size registers */ - -#define MV64340_CS_0_BASE_ADDR 0x008 -#define MV64340_CS_0_SIZE 0x010 -#define MV64340_CS_1_BASE_ADDR 0x208 -#define MV64340_CS_1_SIZE 0x210 -#define MV64340_CS_2_BASE_ADDR 0x018 -#define MV64340_CS_2_SIZE 0x020 -#define MV64340_CS_3_BASE_ADDR 0x218 -#define MV64340_CS_3_SIZE 0x220 - -/* Devices BAR and size registers */ - -#define MV64340_DEV_CS0_BASE_ADDR 0x028 -#define MV64340_DEV_CS0_SIZE 0x030 -#define MV64340_DEV_CS1_BASE_ADDR 0x228 -#define MV64340_DEV_CS1_SIZE 0x230 -#define MV64340_DEV_CS2_BASE_ADDR 0x248 -#define MV64340_DEV_CS2_SIZE 0x250 -#define MV64340_DEV_CS3_BASE_ADDR 0x038 -#define MV64340_DEV_CS3_SIZE 0x040 -#define MV64340_BOOTCS_BASE_ADDR 0x238 -#define MV64340_BOOTCS_SIZE 0x240 - -/* PCI 0 BAR and size registers */ - -#define MV64340_PCI_0_IO_BASE_ADDR 0x048 -#define MV64340_PCI_0_IO_SIZE 0x050 -#define MV64340_PCI_0_MEMORY0_BASE_ADDR 0x058 -#define MV64340_PCI_0_MEMORY0_SIZE 0x060 -#define MV64340_PCI_0_MEMORY1_BASE_ADDR 0x080 -#define MV64340_PCI_0_MEMORY1_SIZE 0x088 -#define MV64340_PCI_0_MEMORY2_BASE_ADDR 0x258 -#define MV64340_PCI_0_MEMORY2_SIZE 0x260 -#define MV64340_PCI_0_MEMORY3_BASE_ADDR 0x280 -#define MV64340_PCI_0_MEMORY3_SIZE 0x288 - -/* PCI 1 BAR and size registers */ -#define MV64340_PCI_1_IO_BASE_ADDR 0x090 -#define MV64340_PCI_1_IO_SIZE 0x098 -#define MV64340_PCI_1_MEMORY0_BASE_ADDR 0x0a0 -#define MV64340_PCI_1_MEMORY0_SIZE 0x0a8 -#define MV64340_PCI_1_MEMORY1_BASE_ADDR 0x0b0 -#define MV64340_PCI_1_MEMORY1_SIZE 0x0b8 -#define MV64340_PCI_1_MEMORY2_BASE_ADDR 0x2a0 -#define MV64340_PCI_1_MEMORY2_SIZE 0x2a8 -#define MV64340_PCI_1_MEMORY3_BASE_ADDR 0x2b0 -#define MV64340_PCI_1_MEMORY3_SIZE 0x2b8 - -/* SRAM base address */ -#define MV64340_INTEGRATED_SRAM_BASE_ADDR 0x268 - -/* internal registers space base address */ -#define MV64340_INTERNAL_SPACE_BASE_ADDR 0x068 - -/* Enables the CS , DEV_CS , PCI 0 and PCI 1 - windows above */ -#define MV64340_BASE_ADDR_ENABLE 0x278 - -/****************************************/ -/* PCI remap registers */ -/****************************************/ - /* PCI 0 */ -#define MV64340_PCI_0_IO_ADDR_REMAP 0x0f0 -#define MV64340_PCI_0_MEMORY0_LOW_ADDR_REMAP 0x0f8 -#define MV64340_PCI_0_MEMORY0_HIGH_ADDR_REMAP 0x320 -#define MV64340_PCI_0_MEMORY1_LOW_ADDR_REMAP 0x100 -#define MV64340_PCI_0_MEMORY1_HIGH_ADDR_REMAP 0x328 -#define MV64340_PCI_0_MEMORY2_LOW_ADDR_REMAP 0x2f8 -#define MV64340_PCI_0_MEMORY2_HIGH_ADDR_REMAP 0x330 -#define MV64340_PCI_0_MEMORY3_LOW_ADDR_REMAP 0x300 -#define MV64340_PCI_0_MEMORY3_HIGH_ADDR_REMAP 0x338 - /* PCI 1 */ -#define MV64340_PCI_1_IO_ADDR_REMAP 0x108 -#define MV64340_PCI_1_MEMORY0_LOW_ADDR_REMAP 0x110 -#define MV64340_PCI_1_MEMORY0_HIGH_ADDR_REMAP 0x340 -#define MV64340_PCI_1_MEMORY1_LOW_ADDR_REMAP 0x118 -#define MV64340_PCI_1_MEMORY1_HIGH_ADDR_REMAP 0x348 -#define MV64340_PCI_1_MEMORY2_LOW_ADDR_REMAP 0x310 -#define MV64340_PCI_1_MEMORY2_HIGH_ADDR_REMAP 0x350 -#define MV64340_PCI_1_MEMORY3_LOW_ADDR_REMAP 0x318 -#define MV64340_PCI_1_MEMORY3_HIGH_ADDR_REMAP 0x358 - -#define MV64340_CPU_PCI_0_HEADERS_RETARGET_CONTROL 0x3b0 -#define MV64340_CPU_PCI_0_HEADERS_RETARGET_BASE 0x3b8 -#define MV64340_CPU_PCI_1_HEADERS_RETARGET_CONTROL 0x3c0 -#define MV64340_CPU_PCI_1_HEADERS_RETARGET_BASE 0x3c8 -#define MV64340_CPU_GE_HEADERS_RETARGET_CONTROL 0x3d0 -#define MV64340_CPU_GE_HEADERS_RETARGET_BASE 0x3d8 -#define MV64340_CPU_IDMA_HEADERS_RETARGET_CONTROL 0x3e0 -#define MV64340_CPU_IDMA_HEADERS_RETARGET_BASE 0x3e8 - -/****************************************/ -/* CPU Control Registers */ -/****************************************/ - -#define MV64340_CPU_CONFIG 0x000 -#define MV64340_CPU_MODE 0x120 -#define MV64340_CPU_MASTER_CONTROL 0x160 -#define MV64340_CPU_CROSS_BAR_CONTROL_LOW 0x150 -#define MV64340_CPU_CROSS_BAR_CONTROL_HIGH 0x158 -#define MV64340_CPU_CROSS_BAR_TIMEOUT 0x168 - -/****************************************/ -/* SMP RegisterS */ -/****************************************/ - -#define MV64340_SMP_WHO_AM_I 0x200 -#define MV64340_SMP_CPU0_DOORBELL 0x214 -#define MV64340_SMP_CPU0_DOORBELL_CLEAR 0x21C -#define MV64340_SMP_CPU1_DOORBELL 0x224 -#define MV64340_SMP_CPU1_DOORBELL_CLEAR 0x22C -#define MV64340_SMP_CPU0_DOORBELL_MASK 0x234 -#define MV64340_SMP_CPU1_DOORBELL_MASK 0x23C -#define MV64340_SMP_SEMAPHOR0 0x244 -#define MV64340_SMP_SEMAPHOR1 0x24c -#define MV64340_SMP_SEMAPHOR2 0x254 -#define MV64340_SMP_SEMAPHOR3 0x25c -#define MV64340_SMP_SEMAPHOR4 0x264 -#define MV64340_SMP_SEMAPHOR5 0x26c -#define MV64340_SMP_SEMAPHOR6 0x274 -#define MV64340_SMP_SEMAPHOR7 0x27c - -/****************************************/ -/* CPU Sync Barrier Register */ -/****************************************/ - -#define MV64340_CPU_0_SYNC_BARRIER_TRIGGER 0x0c0 -#define MV64340_CPU_0_SYNC_BARRIER_VIRTUAL 0x0c8 -#define MV64340_CPU_1_SYNC_BARRIER_TRIGGER 0x0d0 -#define MV64340_CPU_1_SYNC_BARRIER_VIRTUAL 0x0d8 - -/****************************************/ -/* CPU Access Protect */ -/****************************************/ - -#define MV64340_CPU_PROTECT_WINDOW_0_BASE_ADDR 0x180 -#define MV64340_CPU_PROTECT_WINDOW_0_SIZE 0x188 -#define MV64340_CPU_PROTECT_WINDOW_1_BASE_ADDR 0x190 -#define MV64340_CPU_PROTECT_WINDOW_1_SIZE 0x198 -#define MV64340_CPU_PROTECT_WINDOW_2_BASE_ADDR 0x1a0 -#define MV64340_CPU_PROTECT_WINDOW_2_SIZE 0x1a8 -#define MV64340_CPU_PROTECT_WINDOW_3_BASE_ADDR 0x1b0 -#define MV64340_CPU_PROTECT_WINDOW_3_SIZE 0x1b8 - - -/****************************************/ -/* CPU Error Report */ -/****************************************/ - -#define MV64340_CPU_ERROR_ADDR_LOW 0x070 -#define MV64340_CPU_ERROR_ADDR_HIGH 0x078 -#define MV64340_CPU_ERROR_DATA_LOW 0x128 -#define MV64340_CPU_ERROR_DATA_HIGH 0x130 -#define MV64340_CPU_ERROR_PARITY 0x138 -#define MV64340_CPU_ERROR_CAUSE 0x140 -#define MV64340_CPU_ERROR_MASK 0x148 - -/****************************************/ -/* CPU Interface Debug Registers */ -/****************************************/ - -#define MV64340_PUNIT_SLAVE_DEBUG_LOW 0x360 -#define MV64340_PUNIT_SLAVE_DEBUG_HIGH 0x368 -#define MV64340_PUNIT_MASTER_DEBUG_LOW 0x370 -#define MV64340_PUNIT_MASTER_DEBUG_HIGH 0x378 -#define MV64340_PUNIT_MMASK 0x3e4 - -/****************************************/ -/* Integrated SRAM Registers */ -/****************************************/ - -#define MV64340_SRAM_CONFIG 0x380 -#define MV64340_SRAM_TEST_MODE 0X3F4 -#define MV64340_SRAM_ERROR_CAUSE 0x388 -#define MV64340_SRAM_ERROR_ADDR 0x390 -#define MV64340_SRAM_ERROR_ADDR_HIGH 0X3F8 -#define MV64340_SRAM_ERROR_DATA_LOW 0x398 -#define MV64340_SRAM_ERROR_DATA_HIGH 0x3a0 -#define MV64340_SRAM_ERROR_DATA_PARITY 0x3a8 - -/****************************************/ -/* SDRAM Configuration */ -/****************************************/ - -#define MV64340_SDRAM_CONFIG 0x1400 -#define MV64340_D_UNIT_CONTROL_LOW 0x1404 -#define MV64340_D_UNIT_CONTROL_HIGH 0x1424 -#define MV64340_SDRAM_TIMING_CONTROL_LOW 0x1408 -#define MV64340_SDRAM_TIMING_CONTROL_HIGH 0x140c -#define MV64340_SDRAM_ADDR_CONTROL 0x1410 -#define MV64340_SDRAM_OPEN_PAGES_CONTROL 0x1414 -#define MV64340_SDRAM_OPERATION 0x1418 -#define MV64340_SDRAM_MODE 0x141c -#define MV64340_EXTENDED_DRAM_MODE 0x1420 -#define MV64340_SDRAM_CROSS_BAR_CONTROL_LOW 0x1430 -#define MV64340_SDRAM_CROSS_BAR_CONTROL_HIGH 0x1434 -#define MV64340_SDRAM_CROSS_BAR_TIMEOUT 0x1438 -#define MV64340_SDRAM_ADDR_CTRL_PADS_CALIBRATION 0x14c0 -#define MV64340_SDRAM_DATA_PADS_CALIBRATION 0x14c4 - -/****************************************/ -/* SDRAM Error Report */ -/****************************************/ - -#define MV64340_SDRAM_ERROR_DATA_LOW 0x1444 -#define MV64340_SDRAM_ERROR_DATA_HIGH 0x1440 -#define MV64340_SDRAM_ERROR_ADDR 0x1450 -#define MV64340_SDRAM_RECEIVED_ECC 0x1448 -#define MV64340_SDRAM_CALCULATED_ECC 0x144c -#define MV64340_SDRAM_ECC_CONTROL 0x1454 -#define MV64340_SDRAM_ECC_ERROR_COUNTER 0x1458 - -/******************************************/ -/* Controlled Delay Line (CDL) Registers */ -/******************************************/ - -#define MV64340_DFCDL_CONFIG0 0x1480 -#define MV64340_DFCDL_CONFIG1 0x1484 -#define MV64340_DLL_WRITE 0x1488 -#define MV64340_DLL_READ 0x148c -#define MV64340_SRAM_ADDR 0x1490 -#define MV64340_SRAM_DATA0 0x1494 -#define MV64340_SRAM_DATA1 0x1498 -#define MV64340_SRAM_DATA2 0x149c -#define MV64340_DFCL_PROBE 0x14a0 - -/******************************************/ -/* Debug Registers */ -/******************************************/ - -#define MV64340_DUNIT_DEBUG_LOW 0x1460 -#define MV64340_DUNIT_DEBUG_HIGH 0x1464 -#define MV64340_DUNIT_MMASK 0X1b40 - -/****************************************/ -/* Device Parameters */ -/****************************************/ - -#define MV64340_DEVICE_BANK0_PARAMETERS 0x45c -#define MV64340_DEVICE_BANK1_PARAMETERS 0x460 -#define MV64340_DEVICE_BANK2_PARAMETERS 0x464 -#define MV64340_DEVICE_BANK3_PARAMETERS 0x468 -#define MV64340_DEVICE_BOOT_BANK_PARAMETERS 0x46c -#define MV64340_DEVICE_INTERFACE_CONTROL 0x4c0 -#define MV64340_DEVICE_INTERFACE_CROSS_BAR_CONTROL_LOW 0x4c8 -#define MV64340_DEVICE_INTERFACE_CROSS_BAR_CONTROL_HIGH 0x4cc -#define MV64340_DEVICE_INTERFACE_CROSS_BAR_TIMEOUT 0x4c4 - -/****************************************/ -/* Device interrupt registers */ -/****************************************/ - -#define MV64340_DEVICE_INTERRUPT_CAUSE 0x4d0 -#define MV64340_DEVICE_INTERRUPT_MASK 0x4d4 -#define MV64340_DEVICE_ERROR_ADDR 0x4d8 -#define MV64340_DEVICE_ERROR_DATA 0x4dc -#define MV64340_DEVICE_ERROR_PARITY 0x4e0 - -/****************************************/ -/* Device debug registers */ -/****************************************/ - -#define MV64340_DEVICE_DEBUG_LOW 0x4e4 -#define MV64340_DEVICE_DEBUG_HIGH 0x4e8 -#define MV64340_RUNIT_MMASK 0x4f0 - -/****************************************/ -/* PCI Slave Address Decoding registers */ -/****************************************/ - -#define MV64340_PCI_0_CS_0_BANK_SIZE 0xc08 -#define MV64340_PCI_1_CS_0_BANK_SIZE 0xc88 -#define MV64340_PCI_0_CS_1_BANK_SIZE 0xd08 -#define MV64340_PCI_1_CS_1_BANK_SIZE 0xd88 -#define MV64340_PCI_0_CS_2_BANK_SIZE 0xc0c -#define MV64340_PCI_1_CS_2_BANK_SIZE 0xc8c -#define MV64340_PCI_0_CS_3_BANK_SIZE 0xd0c -#define MV64340_PCI_1_CS_3_BANK_SIZE 0xd8c -#define MV64340_PCI_0_DEVCS_0_BANK_SIZE 0xc10 -#define MV64340_PCI_1_DEVCS_0_BANK_SIZE 0xc90 -#define MV64340_PCI_0_DEVCS_1_BANK_SIZE 0xd10 -#define MV64340_PCI_1_DEVCS_1_BANK_SIZE 0xd90 -#define MV64340_PCI_0_DEVCS_2_BANK_SIZE 0xd18 -#define MV64340_PCI_1_DEVCS_2_BANK_SIZE 0xd98 -#define MV64340_PCI_0_DEVCS_3_BANK_SIZE 0xc14 -#define MV64340_PCI_1_DEVCS_3_BANK_SIZE 0xc94 -#define MV64340_PCI_0_DEVCS_BOOT_BANK_SIZE 0xd14 -#define MV64340_PCI_1_DEVCS_BOOT_BANK_SIZE 0xd94 -#define MV64340_PCI_0_P2P_MEM0_BAR_SIZE 0xd1c -#define MV64340_PCI_1_P2P_MEM0_BAR_SIZE 0xd9c -#define MV64340_PCI_0_P2P_MEM1_BAR_SIZE 0xd20 -#define MV64340_PCI_1_P2P_MEM1_BAR_SIZE 0xda0 -#define MV64340_PCI_0_P2P_I_O_BAR_SIZE 0xd24 -#define MV64340_PCI_1_P2P_I_O_BAR_SIZE 0xda4 -#define MV64340_PCI_0_CPU_BAR_SIZE 0xd28 -#define MV64340_PCI_1_CPU_BAR_SIZE 0xda8 -#define MV64340_PCI_0_INTERNAL_SRAM_BAR_SIZE 0xe00 -#define MV64340_PCI_1_INTERNAL_SRAM_BAR_SIZE 0xe80 -#define MV64340_PCI_0_EXPANSION_ROM_BAR_SIZE 0xd2c -#define MV64340_PCI_1_EXPANSION_ROM_BAR_SIZE 0xd9c -#define MV64340_PCI_0_BASE_ADDR_REG_ENABLE 0xc3c -#define MV64340_PCI_1_BASE_ADDR_REG_ENABLE 0xcbc -#define MV64340_PCI_0_CS_0_BASE_ADDR_REMAP 0xc48 -#define MV64340_PCI_1_CS_0_BASE_ADDR_REMAP 0xcc8 -#define MV64340_PCI_0_CS_1_BASE_ADDR_REMAP 0xd48 -#define MV64340_PCI_1_CS_1_BASE_ADDR_REMAP 0xdc8 -#define MV64340_PCI_0_CS_2_BASE_ADDR_REMAP 0xc4c -#define MV64340_PCI_1_CS_2_BASE_ADDR_REMAP 0xccc -#define MV64340_PCI_0_CS_3_BASE_ADDR_REMAP 0xd4c -#define MV64340_PCI_1_CS_3_BASE_ADDR_REMAP 0xdcc -#define MV64340_PCI_0_CS_0_BASE_HIGH_ADDR_REMAP 0xF04 -#define MV64340_PCI_1_CS_0_BASE_HIGH_ADDR_REMAP 0xF84 -#define MV64340_PCI_0_CS_1_BASE_HIGH_ADDR_REMAP 0xF08 -#define MV64340_PCI_1_CS_1_BASE_HIGH_ADDR_REMAP 0xF88 -#define MV64340_PCI_0_CS_2_BASE_HIGH_ADDR_REMAP 0xF0C -#define MV64340_PCI_1_CS_2_BASE_HIGH_ADDR_REMAP 0xF8C -#define MV64340_PCI_0_CS_3_BASE_HIGH_ADDR_REMAP 0xF10 -#define MV64340_PCI_1_CS_3_BASE_HIGH_ADDR_REMAP 0xF90 -#define MV64340_PCI_0_DEVCS_0_BASE_ADDR_REMAP 0xc50 -#define MV64340_PCI_1_DEVCS_0_BASE_ADDR_REMAP 0xcd0 -#define MV64340_PCI_0_DEVCS_1_BASE_ADDR_REMAP 0xd50 -#define MV64340_PCI_1_DEVCS_1_BASE_ADDR_REMAP 0xdd0 -#define MV64340_PCI_0_DEVCS_2_BASE_ADDR_REMAP 0xd58 -#define MV64340_PCI_1_DEVCS_2_BASE_ADDR_REMAP 0xdd8 -#define MV64340_PCI_0_DEVCS_3_BASE_ADDR_REMAP 0xc54 -#define MV64340_PCI_1_DEVCS_3_BASE_ADDR_REMAP 0xcd4 -#define MV64340_PCI_0_DEVCS_BOOTCS_BASE_ADDR_REMAP 0xd54 -#define MV64340_PCI_1_DEVCS_BOOTCS_BASE_ADDR_REMAP 0xdd4 -#define MV64340_PCI_0_P2P_MEM0_BASE_ADDR_REMAP_LOW 0xd5c -#define MV64340_PCI_1_P2P_MEM0_BASE_ADDR_REMAP_LOW 0xddc -#define MV64340_PCI_0_P2P_MEM0_BASE_ADDR_REMAP_HIGH 0xd60 -#define MV64340_PCI_1_P2P_MEM0_BASE_ADDR_REMAP_HIGH 0xde0 -#define MV64340_PCI_0_P2P_MEM1_BASE_ADDR_REMAP_LOW 0xd64 -#define MV64340_PCI_1_P2P_MEM1_BASE_ADDR_REMAP_LOW 0xde4 -#define MV64340_PCI_0_P2P_MEM1_BASE_ADDR_REMAP_HIGH 0xd68 -#define MV64340_PCI_1_P2P_MEM1_BASE_ADDR_REMAP_HIGH 0xde8 -#define MV64340_PCI_0_P2P_I_O_BASE_ADDR_REMAP 0xd6c -#define MV64340_PCI_1_P2P_I_O_BASE_ADDR_REMAP 0xdec -#define MV64340_PCI_0_CPU_BASE_ADDR_REMAP_LOW 0xd70 -#define MV64340_PCI_1_CPU_BASE_ADDR_REMAP_LOW 0xdf0 -#define MV64340_PCI_0_CPU_BASE_ADDR_REMAP_HIGH 0xd74 -#define MV64340_PCI_1_CPU_BASE_ADDR_REMAP_HIGH 0xdf4 -#define MV64340_PCI_0_INTEGRATED_SRAM_BASE_ADDR_REMAP 0xf00 -#define MV64340_PCI_1_INTEGRATED_SRAM_BASE_ADDR_REMAP 0xf80 -#define MV64340_PCI_0_EXPANSION_ROM_BASE_ADDR_REMAP 0xf38 -#define MV64340_PCI_1_EXPANSION_ROM_BASE_ADDR_REMAP 0xfb8 -#define MV64340_PCI_0_ADDR_DECODE_CONTROL 0xd3c -#define MV64340_PCI_1_ADDR_DECODE_CONTROL 0xdbc -#define MV64340_PCI_0_HEADERS_RETARGET_CONTROL 0xF40 -#define MV64340_PCI_1_HEADERS_RETARGET_CONTROL 0xFc0 -#define MV64340_PCI_0_HEADERS_RETARGET_BASE 0xF44 -#define MV64340_PCI_1_HEADERS_RETARGET_BASE 0xFc4 -#define MV64340_PCI_0_HEADERS_RETARGET_HIGH 0xF48 -#define MV64340_PCI_1_HEADERS_RETARGET_HIGH 0xFc8 - -/***********************************/ -/* PCI Control Register Map */ -/***********************************/ - -#define MV64340_PCI_0_DLL_STATUS_AND_COMMAND 0x1d20 -#define MV64340_PCI_1_DLL_STATUS_AND_COMMAND 0x1da0 -#define MV64340_PCI_0_MPP_PADS_DRIVE_CONTROL 0x1d1C -#define MV64340_PCI_1_MPP_PADS_DRIVE_CONTROL 0x1d9C -#define MV64340_PCI_0_COMMAND 0xc00 -#define MV64340_PCI_1_COMMAND 0xc80 -#define MV64340_PCI_0_MODE 0xd00 -#define MV64340_PCI_1_MODE 0xd80 -#define MV64340_PCI_0_RETRY 0xc04 -#define MV64340_PCI_1_RETRY 0xc84 -#define MV64340_PCI_0_READ_BUFFER_DISCARD_TIMER 0xd04 -#define MV64340_PCI_1_READ_BUFFER_DISCARD_TIMER 0xd84 -#define MV64340_PCI_0_MSI_TRIGGER_TIMER 0xc38 -#define MV64340_PCI_1_MSI_TRIGGER_TIMER 0xcb8 -#define MV64340_PCI_0_ARBITER_CONTROL 0x1d00 -#define MV64340_PCI_1_ARBITER_CONTROL 0x1d80 -#define MV64340_PCI_0_CROSS_BAR_CONTROL_LOW 0x1d08 -#define MV64340_PCI_1_CROSS_BAR_CONTROL_LOW 0x1d88 -#define MV64340_PCI_0_CROSS_BAR_CONTROL_HIGH 0x1d0c -#define MV64340_PCI_1_CROSS_BAR_CONTROL_HIGH 0x1d8c -#define MV64340_PCI_0_CROSS_BAR_TIMEOUT 0x1d04 -#define MV64340_PCI_1_CROSS_BAR_TIMEOUT 0x1d84 -#define MV64340_PCI_0_SYNC_BARRIER_TRIGGER_REG 0x1D18 -#define MV64340_PCI_1_SYNC_BARRIER_TRIGGER_REG 0x1D98 -#define MV64340_PCI_0_SYNC_BARRIER_VIRTUAL_REG 0x1d10 -#define MV64340_PCI_1_SYNC_BARRIER_VIRTUAL_REG 0x1d90 -#define MV64340_PCI_0_P2P_CONFIG 0x1d14 -#define MV64340_PCI_1_P2P_CONFIG 0x1d94 - -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_0_LOW 0x1e00 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_0_HIGH 0x1e04 -#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_0 0x1e08 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_1_LOW 0x1e10 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_1_HIGH 0x1e14 -#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_1 0x1e18 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_2_LOW 0x1e20 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_2_HIGH 0x1e24 -#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_2 0x1e28 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_3_LOW 0x1e30 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_3_HIGH 0x1e34 -#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_3 0x1e38 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_4_LOW 0x1e40 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_4_HIGH 0x1e44 -#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_4 0x1e48 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_5_LOW 0x1e50 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_5_HIGH 0x1e54 -#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_5 0x1e58 - -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_0_LOW 0x1e80 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_0_HIGH 0x1e84 -#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_0 0x1e88 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_1_LOW 0x1e90 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_1_HIGH 0x1e94 -#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_1 0x1e98 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_2_LOW 0x1ea0 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_2_HIGH 0x1ea4 -#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_2 0x1ea8 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_3_LOW 0x1eb0 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_3_HIGH 0x1eb4 -#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_3 0x1eb8 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_4_LOW 0x1ec0 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_4_HIGH 0x1ec4 -#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_4 0x1ec8 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_5_LOW 0x1ed0 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_5_HIGH 0x1ed4 -#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_5 0x1ed8 - -/****************************************/ -/* PCI Configuration Access Registers */ -/****************************************/ - -#define MV64340_PCI_0_CONFIG_ADDR 0xcf8 -#define MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG 0xcfc -#define MV64340_PCI_1_CONFIG_ADDR 0xc78 -#define MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG 0xc7c -#define MV64340_PCI_0_INTERRUPT_ACKNOWLEDGE_VIRTUAL_REG 0xc34 -#define MV64340_PCI_1_INTERRUPT_ACKNOWLEDGE_VIRTUAL_REG 0xcb4 - -/****************************************/ -/* PCI Error Report Registers */ -/****************************************/ - -#define MV64340_PCI_0_SERR_MASK 0xc28 -#define MV64340_PCI_1_SERR_MASK 0xca8 -#define MV64340_PCI_0_ERROR_ADDR_LOW 0x1d40 -#define MV64340_PCI_1_ERROR_ADDR_LOW 0x1dc0 -#define MV64340_PCI_0_ERROR_ADDR_HIGH 0x1d44 -#define MV64340_PCI_1_ERROR_ADDR_HIGH 0x1dc4 -#define MV64340_PCI_0_ERROR_ATTRIBUTE 0x1d48 -#define MV64340_PCI_1_ERROR_ATTRIBUTE 0x1dc8 -#define MV64340_PCI_0_ERROR_COMMAND 0x1d50 -#define MV64340_PCI_1_ERROR_COMMAND 0x1dd0 -#define MV64340_PCI_0_ERROR_CAUSE 0x1d58 -#define MV64340_PCI_1_ERROR_CAUSE 0x1dd8 -#define MV64340_PCI_0_ERROR_MASK 0x1d5c -#define MV64340_PCI_1_ERROR_MASK 0x1ddc - -/****************************************/ -/* PCI Debug Registers */ -/****************************************/ - -#define MV64340_PCI_0_MMASK 0X1D24 -#define MV64340_PCI_1_MMASK 0X1DA4 - -/*********************************************/ -/* PCI Configuration, Function 0, Registers */ -/*********************************************/ - -#define MV64340_PCI_DEVICE_AND_VENDOR_ID 0x000 -#define MV64340_PCI_STATUS_AND_COMMAND 0x004 -#define MV64340_PCI_CLASS_CODE_AND_REVISION_ID 0x008 -#define MV64340_PCI_BIST_HEADER_TYPE_LATENCY_TIMER_CACHE_LINE 0x00C - -#define MV64340_PCI_SCS_0_BASE_ADDR_LOW 0x010 -#define MV64340_PCI_SCS_0_BASE_ADDR_HIGH 0x014 -#define MV64340_PCI_SCS_1_BASE_ADDR_LOW 0x018 -#define MV64340_PCI_SCS_1_BASE_ADDR_HIGH 0x01C -#define MV64340_PCI_INTERNAL_REG_MEM_MAPPED_BASE_ADDR_LOW 0x020 -#define MV64340_PCI_INTERNAL_REG_MEM_MAPPED_BASE_ADDR_HIGH 0x024 -#define MV64340_PCI_SUBSYSTEM_ID_AND_SUBSYSTEM_VENDOR_ID 0x02c -#define MV64340_PCI_EXPANSION_ROM_BASE_ADDR_REG 0x030 -#define MV64340_PCI_CAPABILTY_LIST_POINTER 0x034 -#define MV64340_PCI_INTERRUPT_PIN_AND_LINE 0x03C - /* capability list */ -#define MV64340_PCI_POWER_MANAGEMENT_CAPABILITY 0x040 -#define MV64340_PCI_POWER_MANAGEMENT_STATUS_AND_CONTROL 0x044 -#define MV64340_PCI_VPD_ADDR 0x048 -#define MV64340_PCI_VPD_DATA 0x04c -#define MV64340_PCI_MSI_MESSAGE_CONTROL 0x050 -#define MV64340_PCI_MSI_MESSAGE_ADDR 0x054 -#define MV64340_PCI_MSI_MESSAGE_UPPER_ADDR 0x058 -#define MV64340_PCI_MSI_MESSAGE_DATA 0x05c -#define MV64340_PCI_X_COMMAND 0x060 -#define MV64340_PCI_X_STATUS 0x064 -#define MV64340_PCI_COMPACT_PCI_HOT_SWAP 0x068 - -/***********************************************/ -/* PCI Configuration, Function 1, Registers */ -/***********************************************/ - -#define MV64340_PCI_SCS_2_BASE_ADDR_LOW 0x110 -#define MV64340_PCI_SCS_2_BASE_ADDR_HIGH 0x114 -#define MV64340_PCI_SCS_3_BASE_ADDR_LOW 0x118 -#define MV64340_PCI_SCS_3_BASE_ADDR_HIGH 0x11c -#define MV64340_PCI_INTERNAL_SRAM_BASE_ADDR_LOW 0x120 -#define MV64340_PCI_INTERNAL_SRAM_BASE_ADDR_HIGH 0x124 - -/***********************************************/ -/* PCI Configuration, Function 2, Registers */ -/***********************************************/ - -#define MV64340_PCI_DEVCS_0_BASE_ADDR_LOW 0x210 -#define MV64340_PCI_DEVCS_0_BASE_ADDR_HIGH 0x214 -#define MV64340_PCI_DEVCS_1_BASE_ADDR_LOW 0x218 -#define MV64340_PCI_DEVCS_1_BASE_ADDR_HIGH 0x21c -#define MV64340_PCI_DEVCS_2_BASE_ADDR_LOW 0x220 -#define MV64340_PCI_DEVCS_2_BASE_ADDR_HIGH 0x224 - -/***********************************************/ -/* PCI Configuration, Function 3, Registers */ -/***********************************************/ - -#define MV64340_PCI_DEVCS_3_BASE_ADDR_LOW 0x310 -#define MV64340_PCI_DEVCS_3_BASE_ADDR_HIGH 0x314 -#define MV64340_PCI_BOOT_CS_BASE_ADDR_LOW 0x318 -#define MV64340_PCI_BOOT_CS_BASE_ADDR_HIGH 0x31c -#define MV64340_PCI_CPU_BASE_ADDR_LOW 0x220 -#define MV64340_PCI_CPU_BASE_ADDR_HIGH 0x224 - -/***********************************************/ -/* PCI Configuration, Function 4, Registers */ -/***********************************************/ - -#define MV64340_PCI_P2P_MEM0_BASE_ADDR_LOW 0x410 -#define MV64340_PCI_P2P_MEM0_BASE_ADDR_HIGH 0x414 -#define MV64340_PCI_P2P_MEM1_BASE_ADDR_LOW 0x418 -#define MV64340_PCI_P2P_MEM1_BASE_ADDR_HIGH 0x41c -#define MV64340_PCI_P2P_I_O_BASE_ADDR 0x420 -#define MV64340_PCI_INTERNAL_REGS_I_O_MAPPED_BASE_ADDR 0x424 - -/****************************************/ -/* Messaging Unit Registers (I20) */ -/****************************************/ - -#define MV64340_I2O_INBOUND_MESSAGE_REG0_PCI_0_SIDE 0x010 -#define MV64340_I2O_INBOUND_MESSAGE_REG1_PCI_0_SIDE 0x014 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_PCI_0_SIDE 0x018 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_PCI_0_SIDE 0x01C -#define MV64340_I2O_INBOUND_DOORBELL_REG_PCI_0_SIDE 0x020 -#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_PCI_0_SIDE 0x024 -#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_PCI_0_SIDE 0x028 -#define MV64340_I2O_OUTBOUND_DOORBELL_REG_PCI_0_SIDE 0x02C -#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_PCI_0_SIDE 0x030 -#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_PCI_0_SIDE 0x034 -#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_0_SIDE 0x040 -#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_0_SIDE 0x044 -#define MV64340_I2O_QUEUE_CONTROL_REG_PCI_0_SIDE 0x050 -#define MV64340_I2O_QUEUE_BASE_ADDR_REG_PCI_0_SIDE 0x054 -#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_PCI_0_SIDE 0x060 -#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_PCI_0_SIDE 0x064 -#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_PCI_0_SIDE 0x068 -#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_PCI_0_SIDE 0x06C -#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_PCI_0_SIDE 0x070 -#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_PCI_0_SIDE 0x074 -#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_PCI_0_SIDE 0x0F8 -#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_PCI_0_SIDE 0x0FC - -#define MV64340_I2O_INBOUND_MESSAGE_REG0_PCI_1_SIDE 0x090 -#define MV64340_I2O_INBOUND_MESSAGE_REG1_PCI_1_SIDE 0x094 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_PCI_1_SIDE 0x098 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_PCI_1_SIDE 0x09C -#define MV64340_I2O_INBOUND_DOORBELL_REG_PCI_1_SIDE 0x0A0 -#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_PCI_1_SIDE 0x0A4 -#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_PCI_1_SIDE 0x0A8 -#define MV64340_I2O_OUTBOUND_DOORBELL_REG_PCI_1_SIDE 0x0AC -#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_PCI_1_SIDE 0x0B0 -#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_PCI_1_SIDE 0x0B4 -#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_1_SIDE 0x0C0 -#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_1_SIDE 0x0C4 -#define MV64340_I2O_QUEUE_CONTROL_REG_PCI_1_SIDE 0x0D0 -#define MV64340_I2O_QUEUE_BASE_ADDR_REG_PCI_1_SIDE 0x0D4 -#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_PCI_1_SIDE 0x0E0 -#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_PCI_1_SIDE 0x0E4 -#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_PCI_1_SIDE 0x0E8 -#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_PCI_1_SIDE 0x0EC -#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_PCI_1_SIDE 0x0F0 -#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_PCI_1_SIDE 0x0F4 -#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_PCI_1_SIDE 0x078 -#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_PCI_1_SIDE 0x07C - -#define MV64340_I2O_INBOUND_MESSAGE_REG0_CPU0_SIDE 0x1C10 -#define MV64340_I2O_INBOUND_MESSAGE_REG1_CPU0_SIDE 0x1C14 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_CPU0_SIDE 0x1C18 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_CPU0_SIDE 0x1C1C -#define MV64340_I2O_INBOUND_DOORBELL_REG_CPU0_SIDE 0x1C20 -#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_CPU0_SIDE 0x1C24 -#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_CPU0_SIDE 0x1C28 -#define MV64340_I2O_OUTBOUND_DOORBELL_REG_CPU0_SIDE 0x1C2C -#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_CPU0_SIDE 0x1C30 -#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_CPU0_SIDE 0x1C34 -#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_CPU0_SIDE 0x1C40 -#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_CPU0_SIDE 0x1C44 -#define MV64340_I2O_QUEUE_CONTROL_REG_CPU0_SIDE 0x1C50 -#define MV64340_I2O_QUEUE_BASE_ADDR_REG_CPU0_SIDE 0x1C54 -#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_CPU0_SIDE 0x1C60 -#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_CPU0_SIDE 0x1C64 -#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_CPU0_SIDE 0x1C68 -#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_CPU0_SIDE 0x1C6C -#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_CPU0_SIDE 0x1C70 -#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_CPU0_SIDE 0x1C74 -#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_CPU0_SIDE 0x1CF8 -#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_CPU0_SIDE 0x1CFC -#define MV64340_I2O_INBOUND_MESSAGE_REG0_CPU1_SIDE 0x1C90 -#define MV64340_I2O_INBOUND_MESSAGE_REG1_CPU1_SIDE 0x1C94 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_CPU1_SIDE 0x1C98 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_CPU1_SIDE 0x1C9C -#define MV64340_I2O_INBOUND_DOORBELL_REG_CPU1_SIDE 0x1CA0 -#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_CPU1_SIDE 0x1CA4 -#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_CPU1_SIDE 0x1CA8 -#define MV64340_I2O_OUTBOUND_DOORBELL_REG_CPU1_SIDE 0x1CAC -#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_CPU1_SIDE 0x1CB0 -#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_CPU1_SIDE 0x1CB4 -#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_CPU1_SIDE 0x1CC0 -#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_CPU1_SIDE 0x1CC4 -#define MV64340_I2O_QUEUE_CONTROL_REG_CPU1_SIDE 0x1CD0 -#define MV64340_I2O_QUEUE_BASE_ADDR_REG_CPU1_SIDE 0x1CD4 -#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_CPU1_SIDE 0x1CE0 -#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_CPU1_SIDE 0x1CE4 -#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_CPU1_SIDE 0x1CE8 -#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_CPU1_SIDE 0x1CEC -#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_CPU1_SIDE 0x1CF0 -#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_CPU1_SIDE 0x1CF4 -#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_CPU1_SIDE 0x1C78 -#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_CPU1_SIDE 0x1C7C - -/****************************************/ -/* Ethernet Unit Registers */ -/****************************************/ - -/*******************************************/ -/* CUNIT Registers */ -/*******************************************/ - - /* Address Decoding Register Map */ - -#define MV64340_CUNIT_BASE_ADDR_REG0 0xf200 -#define MV64340_CUNIT_BASE_ADDR_REG1 0xf208 -#define MV64340_CUNIT_BASE_ADDR_REG2 0xf210 -#define MV64340_CUNIT_BASE_ADDR_REG3 0xf218 -#define MV64340_CUNIT_SIZE0 0xf204 -#define MV64340_CUNIT_SIZE1 0xf20c -#define MV64340_CUNIT_SIZE2 0xf214 -#define MV64340_CUNIT_SIZE3 0xf21c -#define MV64340_CUNIT_HIGH_ADDR_REMAP_REG0 0xf240 -#define MV64340_CUNIT_HIGH_ADDR_REMAP_REG1 0xf244 -#define MV64340_CUNIT_BASE_ADDR_ENABLE_REG 0xf250 -#define MV64340_MPSC0_ACCESS_PROTECTION_REG 0xf254 -#define MV64340_MPSC1_ACCESS_PROTECTION_REG 0xf258 -#define MV64340_CUNIT_INTERNAL_SPACE_BASE_ADDR_REG 0xf25C - - /* Error Report Registers */ - -#define MV64340_CUNIT_INTERRUPT_CAUSE_REG 0xf310 -#define MV64340_CUNIT_INTERRUPT_MASK_REG 0xf314 -#define MV64340_CUNIT_ERROR_ADDR 0xf318 - - /* Cunit Control Registers */ - -#define MV64340_CUNIT_ARBITER_CONTROL_REG 0xf300 -#define MV64340_CUNIT_CONFIG_REG 0xb40c -#define MV64340_CUNIT_CRROSBAR_TIMEOUT_REG 0xf304 - - /* Cunit Debug Registers */ - -#define MV64340_CUNIT_DEBUG_LOW 0xf340 -#define MV64340_CUNIT_DEBUG_HIGH 0xf344 -#define MV64340_CUNIT_MMASK 0xf380 - - /* MPSCs Clocks Routing Registers */ - -#define MV64340_MPSC_ROUTING_REG 0xb400 -#define MV64340_MPSC_RX_CLOCK_ROUTING_REG 0xb404 -#define MV64340_MPSC_TX_CLOCK_ROUTING_REG 0xb408 - - /* MPSCs Interrupts Registers */ - -#define MV64340_MPSC_CAUSE_REG(port) (0xb804 + (port<<3)) -#define MV64340_MPSC_MASK_REG(port) (0xb884 + (port<<3)) - -#define MV64340_MPSC_MAIN_CONFIG_LOW(port) (0x8000 + (port<<12)) -#define MV64340_MPSC_MAIN_CONFIG_HIGH(port) (0x8004 + (port<<12)) -#define MV64340_MPSC_PROTOCOL_CONFIG(port) (0x8008 + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG1(port) (0x800c + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG2(port) (0x8010 + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG3(port) (0x8014 + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG4(port) (0x8018 + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG5(port) (0x801c + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG6(port) (0x8020 + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG7(port) (0x8024 + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG8(port) (0x8028 + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG9(port) (0x802c + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG10(port) (0x8030 + (port<<12)) - - /* MPSC0 Registers */ - - -/***************************************/ -/* SDMA Registers */ -/***************************************/ - -#define MV64340_SDMA_CONFIG_REG(channel) (0x4000 + (channel<<13)) -#define MV64340_SDMA_COMMAND_REG(channel) (0x4008 + (channel<<13)) -#define MV64340_SDMA_CURRENT_RX_DESCRIPTOR_POINTER(channel) (0x4810 + (channel<<13)) -#define MV64340_SDMA_CURRENT_TX_DESCRIPTOR_POINTER(channel) (0x4c10 + (channel<<13)) -#define MV64340_SDMA_FIRST_TX_DESCRIPTOR_POINTER(channel) (0x4c14 + (channel<<13)) - -#define MV64340_SDMA_CAUSE_REG 0xb800 -#define MV64340_SDMA_MASK_REG 0xb880 - -/* BRG Interrupts */ - -#define MV64340_BRG_CONFIG_REG(brg) (0xb200 + (brg<<3)) -#define MV64340_BRG_BAUDE_TUNING_REG(brg) (0xb208 + (brg<<3)) -#define MV64340_BRG_CAUSE_REG 0xb834 -#define MV64340_BRG_MASK_REG 0xb8b4 - -/****************************************/ -/* DMA Channel Control */ -/****************************************/ - -#define MV64340_DMA_CHANNEL0_CONTROL 0x840 -#define MV64340_DMA_CHANNEL0_CONTROL_HIGH 0x880 -#define MV64340_DMA_CHANNEL1_CONTROL 0x844 -#define MV64340_DMA_CHANNEL1_CONTROL_HIGH 0x884 -#define MV64340_DMA_CHANNEL2_CONTROL 0x848 -#define MV64340_DMA_CHANNEL2_CONTROL_HIGH 0x888 -#define MV64340_DMA_CHANNEL3_CONTROL 0x84C -#define MV64340_DMA_CHANNEL3_CONTROL_HIGH 0x88C - - -/****************************************/ -/* IDMA Registers */ -/****************************************/ - -#define MV64340_DMA_CHANNEL0_BYTE_COUNT 0x800 -#define MV64340_DMA_CHANNEL1_BYTE_COUNT 0x804 -#define MV64340_DMA_CHANNEL2_BYTE_COUNT 0x808 -#define MV64340_DMA_CHANNEL3_BYTE_COUNT 0x80C -#define MV64340_DMA_CHANNEL0_SOURCE_ADDR 0x810 -#define MV64340_DMA_CHANNEL1_SOURCE_ADDR 0x814 -#define MV64340_DMA_CHANNEL2_SOURCE_ADDR 0x818 -#define MV64340_DMA_CHANNEL3_SOURCE_ADDR 0x81c -#define MV64340_DMA_CHANNEL0_DESTINATION_ADDR 0x820 -#define MV64340_DMA_CHANNEL1_DESTINATION_ADDR 0x824 -#define MV64340_DMA_CHANNEL2_DESTINATION_ADDR 0x828 -#define MV64340_DMA_CHANNEL3_DESTINATION_ADDR 0x82C -#define MV64340_DMA_CHANNEL0_NEXT_DESCRIPTOR_POINTER 0x830 -#define MV64340_DMA_CHANNEL1_NEXT_DESCRIPTOR_POINTER 0x834 -#define MV64340_DMA_CHANNEL2_NEXT_DESCRIPTOR_POINTER 0x838 -#define MV64340_DMA_CHANNEL3_NEXT_DESCRIPTOR_POINTER 0x83C -#define MV64340_DMA_CHANNEL0_CURRENT_DESCRIPTOR_POINTER 0x870 -#define MV64340_DMA_CHANNEL1_CURRENT_DESCRIPTOR_POINTER 0x874 -#define MV64340_DMA_CHANNEL2_CURRENT_DESCRIPTOR_POINTER 0x878 -#define MV64340_DMA_CHANNEL3_CURRENT_DESCRIPTOR_POINTER 0x87C - - /* IDMA Address Decoding Base Address Registers */ - -#define MV64340_DMA_BASE_ADDR_REG0 0xa00 -#define MV64340_DMA_BASE_ADDR_REG1 0xa08 -#define MV64340_DMA_BASE_ADDR_REG2 0xa10 -#define MV64340_DMA_BASE_ADDR_REG3 0xa18 -#define MV64340_DMA_BASE_ADDR_REG4 0xa20 -#define MV64340_DMA_BASE_ADDR_REG5 0xa28 -#define MV64340_DMA_BASE_ADDR_REG6 0xa30 -#define MV64340_DMA_BASE_ADDR_REG7 0xa38 - - /* IDMA Address Decoding Size Address Register */ - -#define MV64340_DMA_SIZE_REG0 0xa04 -#define MV64340_DMA_SIZE_REG1 0xa0c -#define MV64340_DMA_SIZE_REG2 0xa14 -#define MV64340_DMA_SIZE_REG3 0xa1c -#define MV64340_DMA_SIZE_REG4 0xa24 -#define MV64340_DMA_SIZE_REG5 0xa2c -#define MV64340_DMA_SIZE_REG6 0xa34 -#define MV64340_DMA_SIZE_REG7 0xa3C - - /* IDMA Address Decoding High Address Remap and Access - Protection Registers */ - -#define MV64340_DMA_HIGH_ADDR_REMAP_REG0 0xa60 -#define MV64340_DMA_HIGH_ADDR_REMAP_REG1 0xa64 -#define MV64340_DMA_HIGH_ADDR_REMAP_REG2 0xa68 -#define MV64340_DMA_HIGH_ADDR_REMAP_REG3 0xa6C -#define MV64340_DMA_BASE_ADDR_ENABLE_REG 0xa80 -#define MV64340_DMA_CHANNEL0_ACCESS_PROTECTION_REG 0xa70 -#define MV64340_DMA_CHANNEL1_ACCESS_PROTECTION_REG 0xa74 -#define MV64340_DMA_CHANNEL2_ACCESS_PROTECTION_REG 0xa78 -#define MV64340_DMA_CHANNEL3_ACCESS_PROTECTION_REG 0xa7c -#define MV64340_DMA_ARBITER_CONTROL 0x860 -#define MV64340_DMA_CROSS_BAR_TIMEOUT 0x8d0 - - /* IDMA Headers Retarget Registers */ - -#define MV64340_DMA_HEADERS_RETARGET_CONTROL 0xa84 -#define MV64340_DMA_HEADERS_RETARGET_BASE 0xa88 - - /* IDMA Interrupt Register */ - -#define MV64340_DMA_INTERRUPT_CAUSE_REG 0x8c0 -#define MV64340_DMA_INTERRUPT_CAUSE_MASK 0x8c4 -#define MV64340_DMA_ERROR_ADDR 0x8c8 -#define MV64340_DMA_ERROR_SELECT 0x8cc - - /* IDMA Debug Register ( for internal use ) */ - -#define MV64340_DMA_DEBUG_LOW 0x8e0 -#define MV64340_DMA_DEBUG_HIGH 0x8e4 -#define MV64340_DMA_SPARE 0xA8C - -/****************************************/ -/* Timer_Counter */ -/****************************************/ - -#define MV64340_TIMER_COUNTER0 0x850 -#define MV64340_TIMER_COUNTER1 0x854 -#define MV64340_TIMER_COUNTER2 0x858 -#define MV64340_TIMER_COUNTER3 0x85C -#define MV64340_TIMER_COUNTER_0_3_CONTROL 0x864 -#define MV64340_TIMER_COUNTER_0_3_INTERRUPT_CAUSE 0x868 -#define MV64340_TIMER_COUNTER_0_3_INTERRUPT_MASK 0x86c - -/****************************************/ -/* Watchdog registers */ -/****************************************/ - -#define MV64340_WATCHDOG_CONFIG_REG 0xb410 -#define MV64340_WATCHDOG_VALUE_REG 0xb414 - -/****************************************/ -/* I2C Registers */ -/****************************************/ - -#define MV64XXX_I2C_OFFSET 0xc000 -#define MV64XXX_I2C_REG_BLOCK_SIZE 0x0020 - -/****************************************/ -/* GPP Interface Registers */ -/****************************************/ - -#define MV64340_GPP_IO_CONTROL 0xf100 -#define MV64340_GPP_LEVEL_CONTROL 0xf110 -#define MV64340_GPP_VALUE 0xf104 -#define MV64340_GPP_INTERRUPT_CAUSE 0xf108 -#define MV64340_GPP_INTERRUPT_MASK0 0xf10c -#define MV64340_GPP_INTERRUPT_MASK1 0xf114 -#define MV64340_GPP_VALUE_SET 0xf118 -#define MV64340_GPP_VALUE_CLEAR 0xf11c - -/****************************************/ -/* Interrupt Controller Registers */ -/****************************************/ - -/****************************************/ -/* Interrupts */ -/****************************************/ - -#define MV64340_MAIN_INTERRUPT_CAUSE_LOW 0x004 -#define MV64340_MAIN_INTERRUPT_CAUSE_HIGH 0x00c -#define MV64340_CPU_INTERRUPT0_MASK_LOW 0x014 -#define MV64340_CPU_INTERRUPT0_MASK_HIGH 0x01c -#define MV64340_CPU_INTERRUPT0_SELECT_CAUSE 0x024 -#define MV64340_CPU_INTERRUPT1_MASK_LOW 0x034 -#define MV64340_CPU_INTERRUPT1_MASK_HIGH 0x03c -#define MV64340_CPU_INTERRUPT1_SELECT_CAUSE 0x044 -#define MV64340_INTERRUPT0_MASK_0_LOW 0x054 -#define MV64340_INTERRUPT0_MASK_0_HIGH 0x05c -#define MV64340_INTERRUPT0_SELECT_CAUSE 0x064 -#define MV64340_INTERRUPT1_MASK_0_LOW 0x074 -#define MV64340_INTERRUPT1_MASK_0_HIGH 0x07c -#define MV64340_INTERRUPT1_SELECT_CAUSE 0x084 - -/****************************************/ -/* MPP Interface Registers */ -/****************************************/ - -#define MV64340_MPP_CONTROL0 0xf000 -#define MV64340_MPP_CONTROL1 0xf004 -#define MV64340_MPP_CONTROL2 0xf008 -#define MV64340_MPP_CONTROL3 0xf00c - -/****************************************/ -/* Serial Initialization registers */ -/****************************************/ - -#define MV64340_SERIAL_INIT_LAST_DATA 0xf324 -#define MV64340_SERIAL_INIT_CONTROL 0xf328 -#define MV64340_SERIAL_INIT_STATUS 0xf32c - -extern void mv64340_irq_init(unsigned int base); - -#endif /* __ASM_MV643XX_H */ -- cgit v1.2.3 From 636119af94f2fbf3e4458be66a1bc740ba69ce6d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 14 Sep 2024 08:51:15 -0600 Subject: io_uring: rename "copy buffers" to "clone buffers" A recent commit added support for copying registered buffers from one ring to another. But that term is a bit confusing, as no copying of buffer data is done here. What is being done is simply cloning the buffer registrations from one ring to another. Rename it while we still can, so that it's more descriptive. No functional changes in this patch. Fixes: 7cc2a6eadcd7 ("io_uring: add IORING_REGISTER_COPY_BUFFERS method") Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 9dc5bb428c8a..1fe79e750470 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -609,8 +609,8 @@ enum io_uring_register_op { IORING_REGISTER_CLOCK = 29, - /* copy registered buffers from source ring to current ring */ - IORING_REGISTER_COPY_BUFFERS = 30, + /* clone registered buffers from source ring to current ring */ + IORING_REGISTER_CLONE_BUFFERS = 30, /* this goes last */ IORING_REGISTER_LAST, @@ -701,7 +701,7 @@ enum { IORING_REGISTER_SRC_REGISTERED = 1, }; -struct io_uring_copy_buffers { +struct io_uring_clone_buffers { __u32 src_fd; __u32 flags; __u32 pad[6]; -- cgit v1.2.3 From 21d52e295ad2afc76bbd105da82a003b96f6ac77 Mon Sep 17 00:00:00 2001 From: Tahera Fahimi Date: Wed, 4 Sep 2024 18:13:55 -0600 Subject: landlock: Add abstract UNIX socket scoping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new "scoped" member to landlock_ruleset_attr that can specify LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET to restrict connection to abstract UNIX sockets from a process outside of the socket's domain. Two hooks are implemented to enforce these restrictions: unix_stream_connect and unix_may_send. Closes: https://github.com/landlock-lsm/linux/issues/7 Signed-off-by: Tahera Fahimi Link: https://lore.kernel.org/r/5f7ad85243b78427242275b93481cfc7c127764b.1725494372.git.fahimitahera@gmail.com [mic: Fix commit message formatting, improve documentation, simplify hook_unix_may_send(), and cosmetic fixes including rename of LANDLOCK_SCOPED_ABSTRACT_UNIX_SOCKET] Co-developed-by: Mickaël Salaün Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 2c8dbc74b955..70edd17bafdc 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -44,6 +44,12 @@ struct landlock_ruleset_attr { * flags`_). */ __u64 handled_access_net; + /** + * @scoped: Bitmask of scopes (cf. `Scope flags`_) + * restricting a Landlock domain from accessing outside + * resources (e.g. IPCs). + */ + __u64 scoped; }; /* @@ -274,4 +280,25 @@ struct landlock_net_port_attr { #define LANDLOCK_ACCESS_NET_BIND_TCP (1ULL << 0) #define LANDLOCK_ACCESS_NET_CONNECT_TCP (1ULL << 1) /* clang-format on */ + +/** + * DOC: scope + * + * Scope flags + * ~~~~~~~~~~~ + * + * These flags enable to isolate a sandboxed process from a set of IPC actions. + * Setting a flag for a ruleset will isolate the Landlock domain to forbid + * connections to resources outside the domain. + * + * Scopes: + * + * - %LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET: Restrict a sandboxed process from + * connecting to an abstract UNIX socket created by a process outside the + * related Landlock domain (e.g. a parent domain or a non-sandboxed process). + */ +/* clang-format off */ +#define LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET (1ULL << 0) +/* clang-format on*/ + #endif /* _UAPI_LINUX_LANDLOCK_H */ -- cgit v1.2.3 From 54a6e6bbf3bef25c8eb65619edde70af49bd3db0 Mon Sep 17 00:00:00 2001 From: Tahera Fahimi Date: Fri, 6 Sep 2024 15:30:03 -0600 Subject: landlock: Add signal scoping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, a sandbox process is not restricted to sending a signal (e.g. SIGKILL) to a process outside the sandbox environment. The ability to send a signal for a sandboxed process should be scoped the same way abstract UNIX sockets are scoped. Therefore, we extend the "scoped" field in a ruleset with LANDLOCK_SCOPE_SIGNAL to specify that a ruleset will deny sending any signal from within a sandbox process to its parent (i.e. any parent sandbox or non-sandboxed processes). This patch adds file_set_fowner and file_free_security hooks to set and release a pointer to the file owner's domain. This pointer, fown_domain in landlock_file_security will be used in file_send_sigiotask to check if the process can send a signal. The ruleset_with_unknown_scope test is updated to support LANDLOCK_SCOPE_SIGNAL. This depends on two new changes: - commit 1934b212615d ("file: reclaim 24 bytes from f_owner"): replace container_of(fown, struct file, f_owner) with fown->file . - commit 26f204380a3c ("fs: Fix file_set_fowner LSM hook inconsistencies"): lock before calling the hook. Signed-off-by: Tahera Fahimi Closes: https://github.com/landlock-lsm/linux/issues/8 Link: https://lore.kernel.org/r/df2b4f880a2ed3042992689a793ea0951f6798a5.1725657727.git.fahimitahera@gmail.com [mic: Update landlock_get_current_domain()'s return type, improve and fix locking in hook_file_set_fowner(), simplify and fix sleepable call and locking issue in hook_file_send_sigiotask() and rebase on the latest VFS tree, simplify hook_task_kill() and quickly return when not sandboxed, improve comments, rename LANDLOCK_SCOPED_SIGNAL] Co-developed-by: Mickaël Salaün Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 70edd17bafdc..33745642f787 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -296,9 +296,12 @@ struct landlock_net_port_attr { * - %LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET: Restrict a sandboxed process from * connecting to an abstract UNIX socket created by a process outside the * related Landlock domain (e.g. a parent domain or a non-sandboxed process). + * - %LANDLOCK_SCOPE_SIGNAL: Restrict a sandboxed process from sending a signal + * to another process outside the domain. */ /* clang-format off */ #define LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET (1ULL << 0) +#define LANDLOCK_SCOPE_SIGNAL (1ULL << 1) /* clang-format on*/ #endif /* _UAPI_LINUX_LANDLOCK_H */ -- cgit v1.2.3 From 4208c562a27899212e8046080555e0f204e0579a Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Tue, 17 Sep 2024 10:24:57 +0530 Subject: block: remove bogus union The union around bi_integrity field is pointless. Remove it. Signed-off-by: Kanchan Joshi Link: https://lore.kernel.org/r/20240917045457.429698-1-joshi.k@samsung.com Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 36ed96133217..6d3a0fff2a1d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -248,11 +248,9 @@ struct bio { struct bio_crypt_ctx *bi_crypt_context; #endif - union { #if defined(CONFIG_BLK_DEV_INTEGRITY) - struct bio_integrity_payload *bi_integrity; /* data integrity */ + struct bio_integrity_payload *bi_integrity; /* data integrity */ #endif - }; unsigned short bi_vcnt; /* how many bio_vec's */ -- cgit v1.2.3 From 6857be5fecaebd9773ff27b6d29b6fff3b1abbce Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 26 Aug 2024 16:43:35 -0400 Subject: mm: introduce ARCH_SUPPORTS_HUGE_PFNMAP and special bits to pmd/pud Patch series "mm: Support huge pfnmaps", v2. Overview ======== This series implements huge pfnmaps support for mm in general. Huge pfnmap allows e.g. VM_PFNMAP vmas to map in either PMD or PUD levels, similar to what we do with dax / thp / hugetlb so far to benefit from TLB hits. Now we extend that idea to PFN mappings, e.g. PCI MMIO bars where it can grow as large as 8GB or even bigger. Currently, only x86_64 (1G+2M) and arm64 (2M) are supported. The last patch (from Alex Williamson) will be the first user of huge pfnmap, so as to enable vfio-pci driver to fault in huge pfn mappings. Implementation ============== In reality, it's relatively simple to add such support comparing to many other types of mappings, because of PFNMAP's specialties when there's no vmemmap backing it, so that most of the kernel routines on huge mappings should simply already fail for them, like GUPs or old-school follow_page() (which is recently rewritten to be folio_walk* APIs by David). One trick here is that we're still unmature on PUDs in generic paths here and there, as DAX is so far the only user. This patchset will add the 2nd user of it. Hugetlb can be a 3rd user if the hugetlb unification work can go on smoothly, but to be discussed later. The other trick is how to allow gup-fast working for such huge mappings even if there's no direct sign of knowing whether it's a normal page or MMIO mapping. This series chose to keep the pte_special solution, so that it reuses similar idea on setting a special bit to pfnmap PMDs/PUDs so that gup-fast will be able to identify them and fail properly. Along the way, we'll also notice that the major pgtable pfn walker, aka, follow_pte(), will need to retire soon due to the fact that it only works with ptes. A new set of simple API is introduced (follow_pfnmap* API) to be able to do whatever follow_pte() can already do, plus that it can also process huge pfnmaps now. Half of this series is about that and converting all existing pfnmap walkers to use the new API properly. Hopefully the new API also looks better to avoid exposing e.g. pgtable lock details into the callers, so that it can be used in an even more straightforward way. Here, three more options will be introduced and involved in huge pfnmap: - ARCH_SUPPORTS_HUGE_PFNMAP Arch developers will need to select this option when huge pfnmap is supported in arch's Kconfig. After this patchset applied, both x86_64 and arm64 will start to enable it by default. - ARCH_SUPPORTS_PMD_PFNMAP / ARCH_SUPPORTS_PUD_PFNMAP These options are for driver developers to identify whether current arch / config supports huge pfnmaps, making decision on whether it can use the huge pfnmap APIs to inject them. One can refer to the last vfio-pci patch from Alex on the use of them properly in a device driver. So after the whole set applied, and if one would enable some dynamic debug lines in vfio-pci core files, we should observe things like: vfio-pci 0000:00:06.0: vfio_pci_mmap_huge_fault(,order = 9) BAR 0 page offset 0x0: 0x100 vfio-pci 0000:00:06.0: vfio_pci_mmap_huge_fault(,order = 9) BAR 0 page offset 0x200: 0x100 vfio-pci 0000:00:06.0: vfio_pci_mmap_huge_fault(,order = 9) BAR 0 page offset 0x400: 0x100 In this specific case, it says that vfio-pci faults in PMDs properly for a few BAR0 offsets. Patch Layout ============ Patch 1: Introduce the new options mentioned above for huge PFNMAPs Patch 2: A tiny cleanup Patch 3-8: Preparation patches for huge pfnmap (include introduce special bit for pmd/pud) Patch 9-16: Introduce follow_pfnmap*() API, use it everywhere, and then drop follow_pte() API Patch 17: Add huge pfnmap support for x86_64 Patch 18: Add huge pfnmap support for arm64 Patch 19: Add vfio-pci support for all kinds of huge pfnmaps (Alex) TODO ==== More architectures / More page sizes ------------------------------------ Currently only x86_64 (2M+1G) and arm64 (2M) are supported. There seems to have plan to support arm64 1G later on top of this series [2]. Any arch will need to first support THP / THP_1G, then provide a special bit in pmds/puds to support huge pfnmaps. remap_pfn_range() support ------------------------- Currently, remap_pfn_range() still only maps PTEs. With the new option, remap_pfn_range() can logically start to inject either PMDs or PUDs when the alignment requirements match on the VAs. When the support is there, it should be able to silently benefit all drivers that is using remap_pfn_range() in its mmap() handler on better TLB hit rate and overall faster MMIO accesses similar to processor on hugepages. More driver support ------------------- VFIO is so far the only consumer for the huge pfnmaps after this series applied. Besides above remap_pfn_range() generic optimization, device driver can also try to optimize its mmap() on a better VA alignment for either PMD/PUD sizes. This may, iiuc, normally require userspace changes, as the driver doesn't normally decide the VA to map a bar. But I don't think I know all the drivers to know the full picture. Credits all go to Alex on help testing the GPU/NIC use cases above. [0] https://lore.kernel.org/r/73ad9540-3fb8-4154-9a4f-30a0a2b03d41@lucifer.local [1] https://lore.kernel.org/r/20240807194812.819412-1-peterx@redhat.com [2] https://lore.kernel.org/r/498e0731-81a4-4f75-95b4-a8ad0bcc7665@huawei.com This patch (of 19): This patch introduces the option to introduce special pte bit into pmd/puds. Archs can start to define pmd_special / pud_special when supported by selecting the new option. Per-arch support will be added later. Before that, create fallbacks for these helpers so that they are always available. Link: https://lkml.kernel.org/r/20240826204353.2228736-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20240826204353.2228736-2-peterx@redhat.com Signed-off-by: Peter Xu Cc: Alexander Gordeev Cc: Alex Williamson Cc: Aneesh Kumar K.V Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Dave Hansen Cc: David Hildenbrand Cc: Gavin Shan Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Matthew Wilcox Cc: Niklas Schnelle Cc: Paolo Bonzini Cc: Ryan Roberts Cc: Sean Christopherson Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mm.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 39f6faace590..c2307a2d275d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2643,6 +2643,30 @@ static inline pte_t pte_mkspecial(pte_t pte) } #endif +#ifndef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP +static inline bool pmd_special(pmd_t pmd) +{ + return false; +} + +static inline pmd_t pmd_mkspecial(pmd_t pmd) +{ + return pmd; +} +#endif /* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */ + +#ifndef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP +static inline bool pud_special(pud_t pud) +{ + return false; +} + +static inline pud_t pud_mkspecial(pud_t pud) +{ + return pud; +} +#endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */ + #ifndef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pte_devmap(pte_t pte) { -- cgit v1.2.3 From ef713ec3a566d3e5e011c5d6201eb661ebf94c1f Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 26 Aug 2024 16:43:36 -0400 Subject: mm: drop is_huge_zero_pud() It constantly returns false since 2017. One assertion is added in 2019 but it should never have triggered, IOW it means what is checked should be asserted instead. If it didn't exist for 7 years maybe it's good idea to remove it and only add it when it comes. Link: https://lkml.kernel.org/r/20240826204353.2228736-3-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Acked-by: David Hildenbrand Cc: Matthew Wilcox Cc: Aneesh Kumar K.V Cc: Alexander Gordeev Cc: Alex Williamson Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Dave Hansen Cc: Gavin Shan Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Ingo Molnar Cc: Niklas Schnelle Cc: Paolo Bonzini Cc: Ryan Roberts Cc: Sean Christopherson Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e3896ebf0f94..ffca706bac81 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -433,11 +433,6 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd); } -static inline bool is_huge_zero_pud(pud_t pud) -{ - return false; -} - struct folio *mm_get_huge_zero_folio(struct mm_struct *mm); void mm_put_huge_zero_folio(struct mm_struct *mm); @@ -578,11 +573,6 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) return false; } -static inline bool is_huge_zero_pud(pud_t pud) -{ - return false; -} - static inline void mm_put_huge_zero_folio(struct mm_struct *mm) { return; -- cgit v1.2.3 From 5dd40721f147e83733ad34848330913cb633046e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 26 Aug 2024 16:43:38 -0400 Subject: mm: allow THP orders for PFNMAPs This enables PFNMAPs to be mapped at either pmd/pud layers. Generalize the dax case into vma_is_special_huge() so as to cover both. Meanwhile, rename the macro to THP_ORDERS_ALL_SPECIAL. Link: https://lkml.kernel.org/r/20240826204353.2228736-5-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Acked-by: David Hildenbrand Cc: Matthew Wilcox Cc: Gavin Shan Cc: Ryan Roberts Cc: Zi Yan Cc: Alexander Gordeev Cc: Alex Williamson Cc: Aneesh Kumar K.V Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Dave Hansen Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Ingo Molnar Cc: Niklas Schnelle Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ffca706bac81..0b0539f4ee1a 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -76,9 +76,9 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; /* * Mask of all large folio orders supported for file THP. Folios in a DAX * file is never split and the MAX_PAGECACHE_ORDER limit does not apply to - * it. + * it. Same to PFNMAPs where there's neither page* nor pagecache. */ -#define THP_ORDERS_ALL_FILE_DAX \ +#define THP_ORDERS_ALL_SPECIAL \ (BIT(PMD_ORDER) | BIT(PUD_ORDER)) #define THP_ORDERS_ALL_FILE_DEFAULT \ ((BIT(MAX_PAGECACHE_ORDER + 1) - 1) & ~BIT(0)) @@ -87,7 +87,7 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; * Mask of all large folio orders supported for THP. */ #define THP_ORDERS_ALL \ - (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DAX | THP_ORDERS_ALL_FILE_DEFAULT) + (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_SPECIAL | THP_ORDERS_ALL_FILE_DEFAULT) #define TVA_SMAPS (1 << 0) /* Will be used for procfs */ #define TVA_IN_PF (1 << 1) /* Page fault handler */ -- cgit v1.2.3 From 0515e022e167cfacf1fee092eb93aa9514e23c0a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 26 Aug 2024 16:43:42 -0400 Subject: mm: always define pxx_pgprot() There're: - 8 archs (arc, arm64, include, mips, powerpc, s390, sh, x86) that support pte_pgprot(). - 2 archs (x86, sparc) that support pmd_pgprot(). - 1 arch (x86) that support pud_pgprot(). Always define them to be used in generic code, and then we don't need to fiddle with "#ifdef"s when doing so. Link: https://lkml.kernel.org/r/20240826204353.2228736-9-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Cc: Alexander Gordeev Cc: Alex Williamson Cc: Aneesh Kumar K.V Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Dave Hansen Cc: David Hildenbrand Cc: Gavin Shan Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Niklas Schnelle Cc: Paolo Bonzini Cc: Ryan Roberts Cc: Sean Christopherson Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 780f3b439d98..e8b2ac6bd2ae 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1956,6 +1956,18 @@ typedef unsigned int pgtbl_mod_mask; #define MAX_PTRS_PER_P4D PTRS_PER_P4D #endif +#ifndef pte_pgprot +#define pte_pgprot(x) ((pgprot_t) {0}) +#endif + +#ifndef pmd_pgprot +#define pmd_pgprot(x) ((pgprot_t) {0}) +#endif + +#ifndef pud_pgprot +#define pud_pgprot(x) ((pgprot_t) {0}) +#endif + /* description of effects of mapping type and prot in current implementation. * this is due to the limited x86 page protection hardware. The expected * behavior is in parens: -- cgit v1.2.3 From 6da8e9634bb7e3fdad9ae0e4db873a05036c4343 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 26 Aug 2024 16:43:43 -0400 Subject: mm: new follow_pfnmap API Introduce a pair of APIs to follow pfn mappings to get entry information. It's very similar to what follow_pte() does before, but different in that it recognizes huge pfn mappings. Link: https://lkml.kernel.org/r/20240826204353.2228736-10-peterx@redhat.com Signed-off-by: Peter Xu Cc: Alexander Gordeev Cc: Alex Williamson Cc: Aneesh Kumar K.V Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Dave Hansen Cc: David Hildenbrand Cc: Gavin Shan Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Matthew Wilcox Cc: Niklas Schnelle Cc: Paolo Bonzini Cc: Ryan Roberts Cc: Sean Christopherson Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mm.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c2307a2d275d..62bd89414897 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2373,6 +2373,37 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address, int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); +struct follow_pfnmap_args { + /** + * Inputs: + * @vma: Pointer to @vm_area_struct struct + * @address: the virtual address to walk + */ + struct vm_area_struct *vma; + unsigned long address; + /** + * Internals: + * + * The caller shouldn't touch any of these. + */ + spinlock_t *lock; + pte_t *ptep; + /** + * Outputs: + * + * @pfn: the PFN of the address + * @pgprot: the pgprot_t of the mapping + * @writable: whether the mapping is writable + * @special: whether the mapping is a special mapping (real PFN maps) + */ + unsigned long pfn; + pgprot_t pgprot; + bool writable; + bool special; +}; +int follow_pfnmap_start(struct follow_pfnmap_args *args); +void follow_pfnmap_end(struct follow_pfnmap_args *args); + extern void truncate_pagecache(struct inode *inode, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); -- cgit v1.2.3 From b0a1c0d0edcd75a0f8ec5fd19dbd64b8d097f534 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 26 Aug 2024 16:43:50 -0400 Subject: mm: remove follow_pte() follow_pte() users have been converted to follow_pfnmap*(). Remove the API. Link: https://lkml.kernel.org/r/20240826204353.2228736-17-peterx@redhat.com Signed-off-by: Peter Xu Cc: Alexander Gordeev Cc: Alex Williamson Cc: Aneesh Kumar K.V Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Dave Hansen Cc: David Hildenbrand Cc: Gavin Shan Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Matthew Wilcox Cc: Niklas Schnelle Cc: Paolo Bonzini Cc: Ryan Roberts Cc: Sean Christopherson Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 62bd89414897..d750be768121 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2368,8 +2368,6 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); -int follow_pte(struct vm_area_struct *vma, unsigned long address, - pte_t **ptepp, spinlock_t **ptlp); int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); -- cgit v1.2.3 From 82ce8e2f31a1eb05b1527c3d807bea40031df913 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 7 Sep 2024 17:40:42 +0200 Subject: set_memory: add __must_check to generic stubs Following query shows that architectures that don't provide asm/set_memory.h don't use set_memory_...() functions. $ git grep set_memory_ alpha arc csky hexagon loongarch m68k microblaze mips nios2 openrisc parisc sh sparc um xtensa Following query shows that all core users of set_memory_...() functions always take returned value into account: $ git grep -w -e set_memory_ro -e set_memory_rw -e set_memory_x -e set_memory_nx -e set_memory_rox `find . -maxdepth 1 -type d | grep -v arch | grep /` set_memory_...() functions can fail, leaving the memory attributes unchanged. Make sure all callers check the returned code. Link: https://github.com/KSPP/linux/issues/7 Link: https://lkml.kernel.org/r/6a89ffc69666de84721216947c6b6c7dcca39d7d.1725723347.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Cc: Arnd Bergmann Cc: Kees Cook Signed-off-by: Andrew Morton --- include/linux/set_memory.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index 95ac8398ee72..e7aec20fb44f 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -8,10 +8,10 @@ #ifdef CONFIG_ARCH_HAS_SET_MEMORY #include #else -static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; } -static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; } -static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } -static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } +static inline int __must_check set_memory_ro(unsigned long addr, int numpages) { return 0; } +static inline int __must_check set_memory_rw(unsigned long addr, int numpages) { return 0; } +static inline int __must_check set_memory_x(unsigned long addr, int numpages) { return 0; } +static inline int __must_check set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif #ifndef set_memory_rox -- cgit v1.2.3 From 325efb16da2c840e165d9b620fec8049d4d664cc Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 9 Sep 2024 11:21:18 +1200 Subject: mm: add nr argument in mem_cgroup_swapin_uncharge_swap() helper to support large folios With large folios swap-in, we might need to uncharge multiple entries all together, add nr argument in mem_cgroup_swapin_uncharge_swap(). For the existing two users, just pass nr=1. Link: https://lkml.kernel.org/r/20240908232119.2157-3-21cnbao@gmail.com Signed-off-by: Barry Song Acked-by: Chris Li Reviewed-by: Yosry Ahmed Cc: Shakeel Butt Cc: Baolin Wang Cc: Christoph Hellwig Cc: David Hildenbrand Cc: Gao Xiang Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kairui Song Cc: Kairui Song Cc: Kalesh Singh Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Cc: Nhat Pham Cc: Ryan Roberts Cc: Sergey Senozhatsky Cc: Suren Baghdasaryan Cc: Yang Shi Cc: Chuanhua Han Cc: Kanchana P Sridhar Cc: Usama Arif Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2ef94c74847d..34d2da05f2f1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -699,7 +699,8 @@ int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); -void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); + +void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); void __mem_cgroup_uncharge(struct folio *folio); @@ -1206,7 +1207,7 @@ static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, return 0; } -static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) +static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr) { } -- cgit v1.2.3 From ed8d5b0ce1d738e13c60d6b1a901a56d832e5070 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 11 Sep 2024 15:13:20 +0200 Subject: Revert "uprobes: use vm_special_mapping close() functionality" This reverts commit 08e28de1160a712724268fd33d77b32f1bc84d1c. A malicious application can munmap() its "[uprobes]" vma and in this case xol_mapping.close == uprobe_clear_state() will free the memory which can be used by another thread, or the same thread when it hits the uprobe bp afterwards. Link: https://lkml.kernel.org/r/20240911131320.GA3448@redhat.com Signed-off-by: Oleg Nesterov Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Michael Ellerman Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sven Schnelle Signed-off-by: Andrew Morton --- include/linux/uprobes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 493dc95d912c..b503fafb7fb3 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -126,6 +126,7 @@ extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); extern bool uprobe_deny_signal(void); extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); +extern void uprobe_clear_state(struct mm_struct *mm); extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); -- cgit v1.2.3 From aef79e189ba2b32f78bd35daf2c0b41f3868a321 Mon Sep 17 00:00:00 2001 From: Carlos Song Date: Tue, 10 Sep 2024 13:16:25 +0800 Subject: i3c: master: support to adjust first broadcast address speed According to I3C spec 6.2 Timing Specification, the Open Drain High Period of SCL Clock timing for first broadcast address should be adjusted to 200ns at least. I3C device working as i2c device will see the broadcast to close its Spike Filter then change to work at I3C mode. After that I3C open drain SCL high level should be adjusted back. Signed-off-by: Carlos Song Reviewed-by: Miquel Raynal Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20240910051626.4052552-1-carlos.song@nxp.com Signed-off-by: Alexandre Belloni --- include/linux/i3c/master.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 074f632868d9..2a1ed05d5782 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -277,6 +277,20 @@ enum i3c_bus_mode { I3C_BUS_MODE_MIXED_SLOW, }; +/** + * enum i3c_open_drain_speed - I3C open-drain speed + * @I3C_OPEN_DRAIN_SLOW_SPEED: Slow open-drain speed for sending the first + * broadcast address. The first broadcast address at this speed + * will be visible to all devices on the I3C bus. I3C devices + * working in I2C mode will turn off their spike filter when + * switching into I3C mode. + * @I3C_OPEN_DRAIN_NORMAL_SPEED: Normal open-drain speed in I3C bus mode. + */ +enum i3c_open_drain_speed { + I3C_OPEN_DRAIN_SLOW_SPEED, + I3C_OPEN_DRAIN_NORMAL_SPEED, +}; + /** * enum i3c_addr_slot_status - I3C address slot status * @I3C_ADDR_SLOT_FREE: address is free @@ -436,6 +450,7 @@ struct i3c_bus { * NULL. * @enable_hotjoin: enable hot join event detect. * @disable_hotjoin: disable hot join event detect. + * @set_speed: adjust I3C open drain mode timing. */ struct i3c_master_controller_ops { int (*bus_init)(struct i3c_master_controller *master); @@ -464,6 +479,7 @@ struct i3c_master_controller_ops { struct i3c_ibi_slot *slot); int (*enable_hotjoin)(struct i3c_master_controller *master); int (*disable_hotjoin)(struct i3c_master_controller *master); + int (*set_speed)(struct i3c_master_controller *master, enum i3c_open_drain_speed speed); }; /** -- cgit v1.2.3 From f761fcdd289d07e8547fef7ac76c3760fc7803f2 Mon Sep 17 00:00:00 2001 From: Dongliang Cui Date: Wed, 18 Sep 2024 07:40:05 +0900 Subject: exfat: Implement sops->shutdown and ioctl We found that when writing a large file through buffer write, if the disk is inaccessible, exFAT does not return an error normally, which leads to the writing process not stopping properly. To easily reproduce this issue, you can follow the steps below: 1. format a device to exFAT and then mount (with a full disk erase) 2. dd if=/dev/zero of=/exfat_mount/test.img bs=1M count=8192 3. eject the device You may find that the dd process does not stop immediately and may continue for a long time. The root cause of this issue is that during buffer write process, exFAT does not need to access the disk to look up directory entries or the FAT table (whereas FAT would do) every time data is written. Instead, exFAT simply marks the buffer as dirty and returns, delegating the writeback operation to the writeback process. If the disk cannot be accessed at this time, the error will only be returned to the writeback process, and the original process will not receive the error, so it cannot be returned to the user side. When the disk cannot be accessed normally, an error should be returned to stop the writing process. Implement sops->shutdown and ioctl to shut down the file system when underlying block device is marked dead. Signed-off-by: Dongliang Cui Signed-off-by: Zhiguo Niu Signed-off-by: Namjae Jeon --- include/uapi/linux/exfat.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/uapi/linux/exfat.h (limited to 'include') diff --git a/include/uapi/linux/exfat.h b/include/uapi/linux/exfat.h new file mode 100644 index 000000000000..46d95b16fc4b --- /dev/null +++ b/include/uapi/linux/exfat.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2024 Unisoc Technologies Co., Ltd. + */ + +#ifndef _UAPI_LINUX_EXFAT_H +#define _UAPI_LINUX_EXFAT_H +#include +#include + +/* + * exfat-specific ioctl commands + */ + +#define EXFAT_IOC_SHUTDOWN _IOR('X', 125, __u32) + +/* + * Flags used by EXFAT_IOC_SHUTDOWN + */ + +#define EXFAT_GOING_DOWN_DEFAULT 0x0 /* default with full sync */ +#define EXFAT_GOING_DOWN_FULLSYNC 0x1 /* going down with full sync*/ +#define EXFAT_GOING_DOWN_NOSYNC 0x2 /* going down */ + +#endif /* _UAPI_LINUX_EXFAT_H */ -- cgit v1.2.3 From 9ba5dcc722de4390a1d3211b2ee3c864f84f5461 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 20 Sep 2024 01:48:17 +0100 Subject: block: Remove unused blk_limits_io_{min,opt} blk_limits_io_min and blk_limits_io_opt are unused since the recent commit 0a94a469a4f0 ("dm: stop using blk_limits_io_{min,opt}") Remove them. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20240920004817.676216-1-linux@treblig.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 643c9020a35a..50c3b959da28 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -968,8 +968,6 @@ static inline void blk_queue_disable_write_zeroes(struct request_queue *q) /* * Access functions for manipulating queue properties */ -extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); -extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, -- cgit v1.2.3 From 65f666c6203600053478ce8e34a1db269a8701c9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 19 Sep 2024 10:17:09 +0800 Subject: lib/sbitmap: define swap_lock as raw_spinlock_t When called from sbitmap_queue_get(), sbitmap_deferred_clear() may be run with preempt disabled. In RT kernel, spin_lock() can sleep, then warning of "BUG: sleeping function called from invalid context" can be triggered. Fix it by replacing it with raw_spin_lock. Cc: Yang Yang Fixes: 72d04bdcf3f7 ("sbitmap: fix io hung due to race on sbitmap_word::cleared") Signed-off-by: Ming Lei Reviewed-by: Yang Yang Link: https://lore.kernel.org/r/20240919021709.511329-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index c09cdcc99471..189140bf11fc 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -40,7 +40,7 @@ struct sbitmap_word { /** * @swap_lock: serializes simultaneous updates of ->word and ->cleared */ - spinlock_t swap_lock; + raw_spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** -- cgit v1.2.3 From 652bfcb76fe689f4d85b6f3688025a87fb94f9a1 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Wed, 31 Jul 2024 15:43:13 +0800 Subject: KEYS: Remove unused declarations These declarations are never implemented, remove it. Signed-off-by: Yue Haibing Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/keys/dns_resolver-type.h | 4 ---- include/linux/key.h | 3 --- 2 files changed, 7 deletions(-) (limited to 'include') diff --git a/include/keys/dns_resolver-type.h b/include/keys/dns_resolver-type.h index 218ca22fb056..1b89088a2837 100644 --- a/include/keys/dns_resolver-type.h +++ b/include/keys/dns_resolver-type.h @@ -12,8 +12,4 @@ extern struct key_type key_type_dns_resolver; -extern int request_dns_resolver_key(const char *description, - const char *callout_info, - char **data); - #endif /* _KEYS_DNS_RESOLVER_TYPE_H */ diff --git a/include/linux/key.h b/include/linux/key.h index 943a432da3ae..074dca3222b9 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -436,9 +436,6 @@ extern key_ref_t keyring_search(key_ref_t keyring, const char *description, bool recurse); -extern int keyring_add_key(struct key *keyring, - struct key *key); - extern int keyring_restrict(key_ref_t keyring, const char *type, const char *restriction); -- cgit v1.2.3 From 60749cbe3d8ae572a6c7dda675de3e8b25797a18 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 15 Jul 2024 17:14:18 +1000 Subject: sunrpc: change sp_nrthreads from atomic_t to unsigned int. sp_nrthreads is only ever accessed under the service mutex nlmsvc_mutex nfs_callback_mutex nfsd_mutex so these is no need for it to be an atomic_t. The fact that all code using it is single-threaded means that we can simplify svc_pool_victim and remove the temporary elevation of sp_nrthreads. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index e4fa25fafa97..99e9345d829e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -33,9 +33,9 @@ * node traffic on multi-node NUMA NFS servers. */ struct svc_pool { - unsigned int sp_id; /* pool id; also node id on NUMA */ + unsigned int sp_id; /* pool id; also node id on NUMA */ struct lwq sp_xprts; /* pending transports */ - atomic_t sp_nrthreads; /* # of threads in pool */ + unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ struct llist_head sp_idle_threads; /* idle server threads */ -- cgit v1.2.3 From 3391fc92db8e761f1a2df5612fcb999dac6bc00a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 16 Sep 2024 09:45:40 +1000 Subject: sunrpc: allow svc threads to fail initialisation cleanly If an svc thread needs to perform some initialisation that might fail, it has no good way to handle the failure. Before the thread can exit it must call svc_exit_thread(), but that requires the service mutex to be held. The thread cannot simply take the mutex as that could deadlock if there is a concurrent attempt to shut down all threads (which is unlikely, but not impossible). nfsd currently call svc_exit_thread() unprotected in the unlikely event that unshare_fs_struct() fails. We can clean this up by introducing svc_thread_init_status() by which an svc thread can report whether initialisation has succeeded. If it has, it continues normally into the action loop. If it has not, svc_thread_init_status() immediately aborts the thread. svc_start_kthread() waits for either of these to happen, and calls svc_exit_thread() (under the mutex) if the thread aborted. Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 99e9345d829e..c419a61f60e5 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -21,6 +21,7 @@ #include #include #include +#include /* * @@ -232,6 +233,11 @@ struct svc_rqst { struct net *rq_bc_net; /* pointer to backchannel's * net namespace */ + + int rq_err; /* Thread sets this to inidicate + * initialisation success. + */ + unsigned long bc_to_initval; unsigned int bc_to_retries; void ** rq_lease_breaker; /* The v4 client breaking a lease */ @@ -305,6 +311,31 @@ static inline bool svc_thread_should_stop(struct svc_rqst *rqstp) return test_bit(RQ_VICTIM, &rqstp->rq_flags); } +/** + * svc_thread_init_status - report whether thread has initialised successfully + * @rqstp: the thread in question + * @err: errno code + * + * After performing any initialisation that could fail, and before starting + * normal work, each sunrpc svc_thread must call svc_thread_init_status() + * with an appropriate error, or zero. + * + * If zero is passed, the thread is ready and must continue until + * svc_thread_should_stop() returns true. If a non-zero error is passed + * the call will not return - the thread will exit. + */ +static inline void svc_thread_init_status(struct svc_rqst *rqstp, int err) +{ + rqstp->rq_err = err; + /* memory barrier ensures assignment to error above is visible before + * waitqueue_active() test below completes. + */ + smp_mb(); + wake_up_var(&rqstp->rq_err); + if (err) + kthread_exit(1); +} + struct svc_deferred_req { u32 prot; /* protocol (UDP or TCP) */ struct svc_xprt *xprt; -- cgit v1.2.3 From 36ffa3d0de54c1cf516ea32a5ec556f5c9874795 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Jul 2024 11:47:23 +1000 Subject: nfsd: be more systematic about selecting error codes for internal use. Rather than using ad hoc values for internal errors (30000, 11000, ...) use 'enum' to sequentially allocate numbers starting from the first known available number - now visible as NFS4ERR_FIRST_FREE. The goal is values that are distinct from all be32 error codes. To get those we must first select integers that are not already used, then convert them with cpu_to_be32(). Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: Chuck Lever --- include/linux/nfs4.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index f9df88091c6d..8d7430d9f218 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -281,15 +281,18 @@ enum nfsstat4 { /* nfs42 */ NFS4ERR_PARTNER_NOTSUPP = 10088, NFS4ERR_PARTNER_NO_AUTH = 10089, - NFS4ERR_UNION_NOTSUPP = 10090, - NFS4ERR_OFFLOAD_DENIED = 10091, - NFS4ERR_WRONG_LFS = 10092, - NFS4ERR_BADLABEL = 10093, - NFS4ERR_OFFLOAD_NO_REQS = 10094, + NFS4ERR_UNION_NOTSUPP = 10090, + NFS4ERR_OFFLOAD_DENIED = 10091, + NFS4ERR_WRONG_LFS = 10092, + NFS4ERR_BADLABEL = 10093, + NFS4ERR_OFFLOAD_NO_REQS = 10094, /* xattr (RFC8276) */ - NFS4ERR_NOXATTR = 10095, - NFS4ERR_XATTR2BIG = 10096, + NFS4ERR_NOXATTR = 10095, + NFS4ERR_XATTR2BIG = 10096, + + /* can be used for internal errors */ + NFS4ERR_FIRST_FREE }; /* error codes for internal client use */ -- cgit v1.2.3 From c4de97f7c45434985e5dbf2d6ccc9eca676e37fe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 29 Jul 2024 16:52:32 -0400 Subject: svcrdma: Handle device removal outside of the CM event handler Synchronously wait for all disconnects to complete to ensure the transports have divested all hardware resources before the underlying RDMA device can safely be removed. Reviewed-by: Sagi Grimberg Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 ++ include/trace/events/rpcrdma.h | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d33bab33099a..619fc0bd837a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -76,6 +77,7 @@ struct svcxprt_rdma { struct svc_xprt sc_xprt; /* SVC transport structure */ struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ struct list_head sc_accept_q; /* Conn. waiting accept */ + struct rpcrdma_notification sc_rn; /* removal notification */ int sc_ord; /* RDMA read limit */ int sc_max_send_sges; bool sc_snd_w_inv; /* OK to use Send With Invalidate */ diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index a96a985c49b3..e6a72646c507 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -2172,6 +2172,29 @@ TRACE_EVENT(svcrdma_qp_error, ) ); +TRACE_EVENT(svcrdma_device_removal, + TP_PROTO( + const struct rdma_cm_id *id + ), + + TP_ARGS(id), + + TP_STRUCT__entry( + __string(name, id->device->name) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __assign_str(name); + memcpy(__entry->addr, &id->route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("device %s to be removed, disconnecting %pISpc\n", + __get_str(name), __entry->addr + ) +); + DECLARE_EVENT_CLASS(svcrdma_sendqueue_class, TP_PROTO( const struct svcxprt_rdma *rdma, -- cgit v1.2.3 From 4b132aacb0768ac1e652cf517097ea6f237214b9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Sep 2024 14:08:13 -0400 Subject: tools: Add xdrgen Add a Python-based tool for translating XDR specifications into XDR encoder and decoder functions written in the Linux kernel's C coding style. The generator attempts to match the usual C coding style of the Linux kernel's SunRPC consumers. This approach is similar to the netlink code generator in tools/net/ynl . The maintainability benefits of machine-generated XDR code include: - Stronger type checking - Reduces the number of bugs introduced by human error - Makes the XDR code easier to audit and analyze - Enables rapid prototyping of new RPC-based protocols - Hardens the layering between protocol logic and marshaling - Makes it easier to add observability on demand - Unit tests might be built for both the tool and (automatically) for the generated code In addition, converting the XDR layer to use memory-safe languages such as Rust will be easier if much of the code can be converted automatically. Tested-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdrgen/_builtins.h | 243 ++++++++++++++++++++++++++++++++ include/linux/sunrpc/xdrgen/_defs.h | 26 ++++ 2 files changed, 269 insertions(+) create mode 100644 include/linux/sunrpc/xdrgen/_builtins.h create mode 100644 include/linux/sunrpc/xdrgen/_defs.h (limited to 'include') diff --git a/include/linux/sunrpc/xdrgen/_builtins.h b/include/linux/sunrpc/xdrgen/_builtins.h new file mode 100644 index 000000000000..68746c59fc9a --- /dev/null +++ b/include/linux/sunrpc/xdrgen/_builtins.h @@ -0,0 +1,243 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024 Oracle and/or its affiliates. + * + * This header defines XDR data type primitives specified in + * Section 4 of RFC 4506, used by RPC programs implemented + * in the Linux kernel. + */ + +#ifndef _SUNRPC_XDRGEN__BUILTINS_H_ +#define _SUNRPC_XDRGEN__BUILTINS_H_ + +#include + +static inline bool +xdrgen_decode_void(struct xdr_stream *xdr) +{ + return true; +} + +static inline bool +xdrgen_encode_void(struct xdr_stream *xdr) +{ + return true; +} + +static inline bool +xdrgen_decode_bool(struct xdr_stream *xdr, bool *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = (*p != xdr_zero); + return true; +} + +static inline bool +xdrgen_encode_bool(struct xdr_stream *xdr, bool val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = val ? xdr_one : xdr_zero; + return true; +} + +static inline bool +xdrgen_decode_int(struct xdr_stream *xdr, s32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_int(struct xdr_stream *xdr, s32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_unsigned_int(struct xdr_stream *xdr, u32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_unsigned_int(struct xdr_stream *xdr, u32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_long(struct xdr_stream *xdr, s32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_long(struct xdr_stream *xdr, s32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_unsigned_long(struct xdr_stream *xdr, u32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_unsigned_long(struct xdr_stream *xdr, u32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_hyper(struct xdr_stream *xdr, s64 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + *ptr = get_unaligned_be64(p); + return true; +} + +static inline bool +xdrgen_encode_hyper(struct xdr_stream *xdr, s64 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + put_unaligned_be64(val, p); + return true; +} + +static inline bool +xdrgen_decode_unsigned_hyper(struct xdr_stream *xdr, u64 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + *ptr = get_unaligned_be64(p); + return true; +} + +static inline bool +xdrgen_encode_unsigned_hyper(struct xdr_stream *xdr, u64 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + put_unaligned_be64(val, p); + return true; +} + +static inline bool +xdrgen_decode_string(struct xdr_stream *xdr, string *ptr, u32 maxlen) +{ + __be32 *p; + u32 len; + + if (unlikely(xdr_stream_decode_u32(xdr, &len) != XDR_UNIT)) + return false; + if (unlikely(maxlen && len > maxlen)) + return false; + if (len != 0) { + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return false; + ptr->data = (unsigned char *)p; + } + ptr->len = len; + return true; +} + +static inline bool +xdrgen_encode_string(struct xdr_stream *xdr, string val, u32 maxlen) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT + xdr_align_size(val.len)); + + if (unlikely(!p)) + return false; + xdr_encode_opaque(p, val.data, val.len); + return true; +} + +static inline bool +xdrgen_decode_opaque(struct xdr_stream *xdr, opaque *ptr, u32 maxlen) +{ + __be32 *p; + u32 len; + + if (unlikely(xdr_stream_decode_u32(xdr, &len) != XDR_UNIT)) + return false; + if (unlikely(maxlen && len > maxlen)) + return false; + if (len != 0) { + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return false; + ptr->data = (u8 *)p; + } + ptr->len = len; + return true; +} + +static inline bool +xdrgen_encode_opaque(struct xdr_stream *xdr, opaque val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT + xdr_align_size(val.len)); + + if (unlikely(!p)) + return false; + xdr_encode_opaque(p, val.data, val.len); + return true; +} + +#endif /* _SUNRPC_XDRGEN__BUILTINS_H_ */ diff --git a/include/linux/sunrpc/xdrgen/_defs.h b/include/linux/sunrpc/xdrgen/_defs.h new file mode 100644 index 000000000000..be9e62371758 --- /dev/null +++ b/include/linux/sunrpc/xdrgen/_defs.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024 Oracle and/or its affiliates. + * + * This header defines XDR data type primitives specified in + * Section 4 of RFC 4506, used by RPC programs implemented + * in the Linux kernel. + */ + +#ifndef _SUNRPC_XDRGEN__DEFS_H_ +#define _SUNRPC_XDRGEN__DEFS_H_ + +#define TRUE (true) +#define FALSE (false) + +typedef struct { + u32 len; + unsigned char *data; +} string; + +typedef struct { + u32 len; + u8 *data; +} opaque; + +#endif /* _SUNRPC_XDRGEN__DEFS_H_ */ -- cgit v1.2.3 From 663ad8b1df8724cd5e01df66ea67ce0424fbcdf6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 10 Sep 2024 15:31:19 -0400 Subject: xdrgen: Fix return code checking in built-in XDR decoders xdr_stream_encode_u32() returns XDR_UNIT on success. xdr_stream_decode_u32() returns zero or -EMSGSIZE, but never XDR_UNIT. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdrgen/_builtins.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdrgen/_builtins.h b/include/linux/sunrpc/xdrgen/_builtins.h index 68746c59fc9a..66ca3ece951a 100644 --- a/include/linux/sunrpc/xdrgen/_builtins.h +++ b/include/linux/sunrpc/xdrgen/_builtins.h @@ -184,7 +184,7 @@ xdrgen_decode_string(struct xdr_stream *xdr, string *ptr, u32 maxlen) __be32 *p; u32 len; - if (unlikely(xdr_stream_decode_u32(xdr, &len) != XDR_UNIT)) + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return false; if (unlikely(maxlen && len > maxlen)) return false; @@ -215,7 +215,7 @@ xdrgen_decode_opaque(struct xdr_stream *xdr, opaque *ptr, u32 maxlen) __be32 *p; u32 len; - if (unlikely(xdr_stream_decode_u32(xdr, &len) != XDR_UNIT)) + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return false; if (unlikely(maxlen && len > maxlen)) return false; -- cgit v1.2.3 From bb0e391975f8da826305cbaa3e3d34b03c47e2a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 22 Sep 2024 09:10:17 +0200 Subject: dma-mapping: fix vmap and mmap of noncontiougs allocations Commit b5c58b2fdc42 ("dma-mapping: direct calls for dma-iommu") switched to use direct calls to dma-iommu, but missed the dma_vmap_noncontiguous, dma_vunmap_noncontiguous and dma_mmap_noncontiguous behavior keyed off the presence of the alloc_noncontiguous method. Fix this by removing the now unused alloc_noncontiguous and free_noncontiguous methods and moving the vmapping and mmaping of the noncontiguous allocations into the iommu code, as it is the only provider of actually noncontiguous allocations. Fixes: b5c58b2fdc42 ("dma-mapping: direct calls for dma-iommu") Reported-by: Xi Ruoyao Signed-off-by: Christoph Hellwig Reviewed-by: Leon Romanovsky Tested-by: Xi Ruoyao --- include/linux/dma-map-ops.h | 19 ------------------- include/linux/iommu-dma.h | 6 ++++++ 2 files changed, 6 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 9668ddf3696e..b7773201414c 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -24,11 +24,6 @@ struct dma_map_ops { gfp_t gfp); void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, dma_addr_t dma_handle, enum dma_data_direction dir); - struct sg_table *(*alloc_noncontiguous)(struct device *dev, size_t size, - enum dma_data_direction dir, gfp_t gfp, - unsigned long attrs); - void (*free_noncontiguous)(struct device *dev, size_t size, - struct sg_table *sgt, enum dma_data_direction dir); int (*mmap)(struct device *, struct vm_area_struct *, void *, dma_addr_t, size_t, unsigned long attrs); @@ -206,20 +201,6 @@ static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, } #endif /* CONFIG_DMA_GLOBAL_POOL */ -/* - * This is the actual return value from the ->alloc_noncontiguous method. - * The users of the DMA API should only care about the sg_table, but to make - * the DMA-API internal vmaping and freeing easier we stash away the page - * array as well (except for the fallback case). This can go away any time, - * e.g. when a vmap-variant that takes a scatterlist comes along. - */ -struct dma_sgt_handle { - struct sg_table sgt; - struct page **pages; -}; -#define sgt_handle(sgt) \ - container_of((sgt), struct dma_sgt_handle, sgt) - int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h index 1bb55ca1ab79..7bf145a52d6a 100644 --- a/include/linux/iommu-dma.h +++ b/include/linux/iommu-dma.h @@ -44,6 +44,12 @@ struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); void iommu_dma_free_noncontiguous(struct device *dev, size_t size, struct sg_table *sgt, enum dma_data_direction dir); +void *iommu_dma_vmap_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt); +#define iommu_dma_vunmap_noncontiguous(dev, vaddr) \ + vunmap(vaddr); +int iommu_dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma, + size_t size, struct sg_table *sgt); void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir); void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, -- cgit v1.2.3 From 3d09ff45469eb2912ce2227c47efac297230d6d6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 22 Sep 2024 09:21:10 +0200 Subject: iommu/dma: remove most stubs in iommu-dma.h The direct calls from mapping.c all guarded by use_dma_iommu(), so don't bother to provide stubs, but instead just expose the prototypes unconditionally. Signed-off-by: Christoph Hellwig Reviewed-by: Leon Romanovsky --- include/linux/iommu-dma.h | 108 ++++------------------------------------------ 1 file changed, 8 insertions(+), 100 deletions(-) (limited to 'include') diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h index 7bf145a52d6a..508beaa44c39 100644 --- a/include/linux/iommu-dma.h +++ b/include/linux/iommu-dma.h @@ -14,6 +14,13 @@ static inline bool use_dma_iommu(struct device *dev) { return dev->dma_iommu; } +#else +static inline bool use_dma_iommu(struct device *dev) +{ + return false; +} +#endif /* CONFIG_IOMMU_DMA */ + dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs); @@ -58,104 +65,5 @@ void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir); void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir); -#else -static inline bool use_dma_iommu(struct device *dev) -{ - return false; -} -static inline dma_addr_t iommu_dma_map_page(struct device *dev, - struct page *page, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - return DMA_MAPPING_ERROR; -} -static inline void iommu_dma_unmap_page(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ -} -static inline int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - return -EINVAL; -} -static inline void iommu_dma_unmap_sg(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ -} -static inline void *iommu_dma_alloc(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t gfp, unsigned long attrs) -{ - return NULL; -} -static inline int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - return -EINVAL; -} -static inline int iommu_dma_get_sgtable(struct device *dev, - struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, - size_t size, unsigned long attrs) -{ - return -EINVAL; -} -static inline unsigned long iommu_dma_get_merge_boundary(struct device *dev) -{ - return 0; -} -static inline size_t iommu_dma_opt_mapping_size(void) -{ - return 0; -} -static inline size_t iommu_dma_max_mapping_size(struct device *dev) -{ - return 0; -} -static inline void iommu_dma_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t handle, unsigned long attrs) -{ -} -static inline dma_addr_t iommu_dma_map_resource(struct device *dev, - phys_addr_t phys, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - return DMA_MAPPING_ERROR; -} -static inline void iommu_dma_unmap_resource(struct device *dev, - dma_addr_t handle, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ -} -static inline struct sg_table * -iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, - enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) -{ - return NULL; -} -static inline void iommu_dma_free_noncontiguous(struct device *dev, size_t size, - struct sg_table *sgt, enum dma_data_direction dir) -{ -} -static inline void iommu_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir) -{ -} -static inline void iommu_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ -} -static inline void iommu_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ -} -static inline void iommu_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ -} -#endif /* CONFIG_IOMMU_DMA */ + #endif /* _LINUX_IOMMU_DMA_H */ -- cgit v1.2.3 From d0dd066a0fa26d55c19ace9e89dedd9504c5bcba Mon Sep 17 00:00:00 2001 From: "Christoph Lameter (Ampere)" Date: Wed, 12 Jun 2024 09:49:56 -0700 Subject: seqcount: replace smp_rmb() in read_seqcount() with load acquire Many architectures support load acquire which can replace a memory barrier and save some cycles. A typical sequence do { seq = read_seqcount_begin(&s); } while (read_seqcount_retry(&s, seq); requires 13 cycles on an N1 Neoverse arm64 core (Ampere Altra, to be specific) for an empty loop. Two read memory barriers are needed. One for each of the seqcount_* functions. We can replace the first read barrier with a load acquire of the seqcount which saves us one barrier. On the Altra doing so reduces the cycle count from 13 to 8. According to ARM, this is a general improvement for the ARM64 architecture and not specific to a certain processor. See https://developer.arm.com/documentation/102336/0100/Load-Acquire-and-Store-Release-instructions "Weaker ordering requirements that are imposed by Load-Acquire and Store-Release instructions allow for micro-architectural optimizations, which could reduce some of the performance impacts that are otherwise imposed by an explicit memory barrier. If the ordering requirement is satisfied using either a Load-Acquire or Store-Release, then it would be preferable to use these instructions instead of a DMB" [ NOTE! This is my original minimal patch that unconditionally switches over to using smp_load_acquire(), instead of the much more involved and subtle patch that Christoph Lameter wrote that made it conditional. But Christoph gets authorship credit because I had initially thought that we needed the more complex model, and Christoph ran with it it and did the work. Only after looking at code generation for all the relevant architectures, did I come to the conclusion that nobody actually really needs the old "smp_rmb()" model. Even architectures without load-acquire support generally do as well or better with smp_load_acquire(). So credit to Christoph, but if this then causes issues on other architectures, put the blame solidly on me. Also note as part of the ruthless simplification, this gets rid of the overly subtle optimization where some code uses a non-barrier version of the sequence count (see the __read_seqcount_begin() users in fs/namei.c). They then play games with their own barriers and/or with nested sequence counts. Those optimizations are literally meaningless on x86, and questionable elsewhere. If somebody can show that they matter, we need to re-do them more cleanly than "use an internal helper". - Linus ] Signed-off-by: Christoph Lameter (Ampere) Link: https://lore.kernel.org/all/20240912-seq_optimize-v3-1-8ee25e04dffa@gentwo.org/ Signed-off-by: Linus Torvalds --- include/linux/seqlock.h | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index d90d8ee29d81..fffeb754880f 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -157,7 +157,7 @@ __seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s) \ static __always_inline unsigned \ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ { \ - unsigned seq = READ_ONCE(s->seqcount.sequence); \ + unsigned seq = smp_load_acquire(&s->seqcount.sequence); \ \ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ return seq; \ @@ -170,7 +170,7 @@ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ * Re-read the sequence counter since the (possibly \ * preempted) writer made progress. \ */ \ - seq = READ_ONCE(s->seqcount.sequence); \ + seq = smp_load_acquire(&s->seqcount.sequence); \ } \ \ return seq; \ @@ -208,7 +208,7 @@ static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s) static inline unsigned __seqprop_sequence(const seqcount_t *s) { - return READ_ONCE(s->sequence); + return smp_load_acquire(&s->sequence); } static inline bool __seqprop_preemptible(const seqcount_t *s) @@ -263,17 +263,9 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) #define seqprop_assert(s) __seqprop(s, assert)(s) /** - * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier + * __read_seqcount_begin() - begin a seqcount_t read section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * - * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() - * barrier. Callers should ensure that smp_rmb() or equivalent ordering is - * provided before actually loading any of the variables that are to be - * protected in this critical section. - * - * Use carefully, only in critical code, and comment how the barrier is - * provided. - * * Return: count to be passed to read_seqcount_retry() */ #define __read_seqcount_begin(s) \ @@ -293,13 +285,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) * * Return: count to be passed to read_seqcount_retry() */ -#define raw_read_seqcount_begin(s) \ -({ \ - unsigned _seq = __read_seqcount_begin(s); \ - \ - smp_rmb(); \ - _seq; \ -}) +#define raw_read_seqcount_begin(s) __read_seqcount_begin(s) /** * read_seqcount_begin() - begin a seqcount_t read critical section @@ -328,7 +314,6 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) ({ \ unsigned __seq = seqprop_sequence(s); \ \ - smp_rmb(); \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ __seq; \ }) -- cgit v1.2.3 From ffcdc4c628e1a30489da10dd78358e89c823b341 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Fri, 6 Sep 2024 16:34:52 +0200 Subject: fs/mnt_idmapping: introduce an invalid_mnt_idmap Link: https://lore.kernel.org/linux-fsdevel/20240904-baugrube-erhoben-b3c1c49a2645@brauner/ Suggested-by: Christian Brauner Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Christian Brauner Signed-off-by: Miklos Szeredi --- include/linux/mnt_idmapping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index cd4d5c8781f5..b1b219bc3422 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -9,6 +9,7 @@ struct mnt_idmap; struct user_namespace; extern struct mnt_idmap nop_mnt_idmap; +extern struct mnt_idmap invalid_mnt_idmap; extern struct user_namespace init_user_ns; typedef struct { -- cgit v1.2.3 From c8770db2d54437a5f49417ae7b46f7de23d14db6 Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Tue, 10 Sep 2024 15:08:22 -0400 Subject: tcp: check skb is non-NULL in tcp_rto_delta_us() We have some machines running stock Ubuntu 20.04.6 which is their 5.4.0-174-generic kernel that are running ceph and recently hit a null ptr dereference in tcp_rearm_rto(). Initially hitting it from the TLP path, but then later we also saw it getting hit from the RACK case as well. Here are examples of the oops messages we saw in each of those cases: Jul 26 15:05:02 rx [11061395.780353] BUG: kernel NULL pointer dereference, address: 0000000000000020 Jul 26 15:05:02 rx [11061395.787572] #PF: supervisor read access in kernel mode Jul 26 15:05:02 rx [11061395.792971] #PF: error_code(0x0000) - not-present page Jul 26 15:05:02 rx [11061395.798362] PGD 0 P4D 0 Jul 26 15:05:02 rx [11061395.801164] Oops: 0000 [#1] SMP NOPTI Jul 26 15:05:02 rx [11061395.805091] CPU: 0 PID: 9180 Comm: msgr-worker-1 Tainted: G W 5.4.0-174-generic #193-Ubuntu Jul 26 15:05:02 rx [11061395.814996] Hardware name: Supermicro SMC 2x26 os-gen8 64C NVME-Y 256G/H12SSW-NTR, BIOS 2.5.V1.2U.NVMe.UEFI 05/09/2023 Jul 26 15:05:02 rx [11061395.825952] RIP: 0010:tcp_rearm_rto+0xe4/0x160 Jul 26 15:05:02 rx [11061395.830656] Code: 87 ca 04 00 00 00 5b 41 5c 41 5d 5d c3 c3 49 8b bc 24 40 06 00 00 eb 8d 48 bb cf f7 53 e3 a5 9b c4 20 4c 89 ef e8 0c fe 0e 00 <48> 8b 78 20 48 c1 ef 03 48 89 f8 41 8b bc 24 80 04 00 00 48 f7 e3 Jul 26 15:05:02 rx [11061395.849665] RSP: 0018:ffffb75d40003e08 EFLAGS: 00010246 Jul 26 15:05:02 rx [11061395.855149] RAX: 0000000000000000 RBX: 20c49ba5e353f7cf RCX: 0000000000000000 Jul 26 15:05:02 rx [11061395.862542] RDX: 0000000062177c30 RSI: 000000000000231c RDI: ffff9874ad283a60 Jul 26 15:05:02 rx [11061395.869933] RBP: ffffb75d40003e20 R08: 0000000000000000 R09: ffff987605e20aa8 Jul 26 15:05:02 rx [11061395.877318] R10: ffffb75d40003f00 R11: ffffb75d4460f740 R12: ffff9874ad283900 Jul 26 15:05:02 rx [11061395.884710] R13: ffff9874ad283a60 R14: ffff9874ad283980 R15: ffff9874ad283d30 Jul 26 15:05:02 rx [11061395.892095] FS: 00007f1ef4a2e700(0000) GS:ffff987605e00000(0000) knlGS:0000000000000000 Jul 26 15:05:02 rx [11061395.900438] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Jul 26 15:05:02 rx [11061395.906435] CR2: 0000000000000020 CR3: 0000003e450ba003 CR4: 0000000000760ef0 Jul 26 15:05:02 rx [11061395.913822] PKRU: 55555554 Jul 26 15:05:02 rx [11061395.916786] Call Trace: Jul 26 15:05:02 rx [11061395.919488] Jul 26 15:05:02 rx [11061395.921765] ? show_regs.cold+0x1a/0x1f Jul 26 15:05:02 rx [11061395.925859] ? __die+0x90/0xd9 Jul 26 15:05:02 rx [11061395.929169] ? no_context+0x196/0x380 Jul 26 15:05:02 rx [11061395.933088] ? ip6_protocol_deliver_rcu+0x4e0/0x4e0 Jul 26 15:05:02 rx [11061395.938216] ? ip6_sublist_rcv_finish+0x3d/0x50 Jul 26 15:05:02 rx [11061395.943000] ? __bad_area_nosemaphore+0x50/0x1a0 Jul 26 15:05:02 rx [11061395.947873] ? bad_area_nosemaphore+0x16/0x20 Jul 26 15:05:02 rx [11061395.952486] ? do_user_addr_fault+0x267/0x450 Jul 26 15:05:02 rx [11061395.957104] ? ipv6_list_rcv+0x112/0x140 Jul 26 15:05:02 rx [11061395.961279] ? __do_page_fault+0x58/0x90 Jul 26 15:05:02 rx [11061395.965458] ? do_page_fault+0x2c/0xe0 Jul 26 15:05:02 rx [11061395.969465] ? page_fault+0x34/0x40 Jul 26 15:05:02 rx [11061395.973217] ? tcp_rearm_rto+0xe4/0x160 Jul 26 15:05:02 rx [11061395.977313] ? tcp_rearm_rto+0xe4/0x160 Jul 26 15:05:02 rx [11061395.981408] tcp_send_loss_probe+0x10b/0x220 Jul 26 15:05:02 rx [11061395.985937] tcp_write_timer_handler+0x1b4/0x240 Jul 26 15:05:02 rx [11061395.990809] tcp_write_timer+0x9e/0xe0 Jul 26 15:05:02 rx [11061395.994814] ? tcp_write_timer_handler+0x240/0x240 Jul 26 15:05:02 rx [11061395.999866] call_timer_fn+0x32/0x130 Jul 26 15:05:02 rx [11061396.003782] __run_timers.part.0+0x180/0x280 Jul 26 15:05:02 rx [11061396.008309] ? recalibrate_cpu_khz+0x10/0x10 Jul 26 15:05:02 rx [11061396.012841] ? native_x2apic_icr_write+0x30/0x30 Jul 26 15:05:02 rx [11061396.017718] ? lapic_next_event+0x21/0x30 Jul 26 15:05:02 rx [11061396.021984] ? clockevents_program_event+0x8f/0xe0 Jul 26 15:05:02 rx [11061396.027035] run_timer_softirq+0x2a/0x50 Jul 26 15:05:02 rx [11061396.031212] __do_softirq+0xd1/0x2c1 Jul 26 15:05:02 rx [11061396.035044] do_softirq_own_stack+0x2a/0x40 Jul 26 15:05:02 rx [11061396.039480] Jul 26 15:05:02 rx [11061396.041840] do_softirq.part.0+0x46/0x50 Jul 26 15:05:02 rx [11061396.046022] __local_bh_enable_ip+0x50/0x60 Jul 26 15:05:02 rx [11061396.050460] _raw_spin_unlock_bh+0x1e/0x20 Jul 26 15:05:02 rx [11061396.054817] nf_conntrack_tcp_packet+0x29e/0xbe0 [nf_conntrack] Jul 26 15:05:02 rx [11061396.060994] ? get_l4proto+0xe7/0x190 [nf_conntrack] Jul 26 15:05:02 rx [11061396.066220] nf_conntrack_in+0xe9/0x670 [nf_conntrack] Jul 26 15:05:02 rx [11061396.071618] ipv6_conntrack_local+0x14/0x20 [nf_conntrack] Jul 26 15:05:02 rx [11061396.077356] nf_hook_slow+0x45/0xb0 Jul 26 15:05:02 rx [11061396.081098] ip6_xmit+0x3f0/0x5d0 Jul 26 15:05:02 rx [11061396.084670] ? ipv6_anycast_cleanup+0x50/0x50 Jul 26 15:05:02 rx [11061396.089282] ? __sk_dst_check+0x38/0x70 Jul 26 15:05:02 rx [11061396.093381] ? inet6_csk_route_socket+0x13b/0x200 Jul 26 15:05:02 rx [11061396.098346] inet6_csk_xmit+0xa7/0xf0 Jul 26 15:05:02 rx [11061396.102263] __tcp_transmit_skb+0x550/0xb30 Jul 26 15:05:02 rx [11061396.106701] tcp_write_xmit+0x3c6/0xc20 Jul 26 15:05:02 rx [11061396.110792] ? __alloc_skb+0x98/0x1d0 Jul 26 15:05:02 rx [11061396.114708] __tcp_push_pending_frames+0x37/0x100 Jul 26 15:05:02 rx [11061396.119667] tcp_push+0xfd/0x100 Jul 26 15:05:02 rx [11061396.123150] tcp_sendmsg_locked+0xc70/0xdd0 Jul 26 15:05:02 rx [11061396.127588] tcp_sendmsg+0x2d/0x50 Jul 26 15:05:02 rx [11061396.131245] inet6_sendmsg+0x43/0x70 Jul 26 15:05:02 rx [11061396.135075] __sock_sendmsg+0x48/0x70 Jul 26 15:05:02 rx [11061396.138994] ____sys_sendmsg+0x212/0x280 Jul 26 15:05:02 rx [11061396.143172] ___sys_sendmsg+0x88/0xd0 Jul 26 15:05:02 rx [11061396.147098] ? __seccomp_filter+0x7e/0x6b0 Jul 26 15:05:02 rx [11061396.151446] ? __switch_to+0x39c/0x460 Jul 26 15:05:02 rx [11061396.155453] ? __switch_to_asm+0x42/0x80 Jul 26 15:05:02 rx [11061396.159636] ? __switch_to_asm+0x5a/0x80 Jul 26 15:05:02 rx [11061396.163816] __sys_sendmsg+0x5c/0xa0 Jul 26 15:05:02 rx [11061396.167647] __x64_sys_sendmsg+0x1f/0x30 Jul 26 15:05:02 rx [11061396.171832] do_syscall_64+0x57/0x190 Jul 26 15:05:02 rx [11061396.175748] entry_SYSCALL_64_after_hwframe+0x5c/0xc1 Jul 26 15:05:02 rx [11061396.181055] RIP: 0033:0x7f1ef692618d Jul 26 15:05:02 rx [11061396.184893] Code: 28 89 54 24 1c 48 89 74 24 10 89 7c 24 08 e8 ca ee ff ff 8b 54 24 1c 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 2f 44 89 c7 48 89 44 24 08 e8 fe ee ff ff 48 Jul 26 15:05:02 rx [11061396.203889] RSP: 002b:00007f1ef4a26aa0 EFLAGS: 00000293 ORIG_RAX: 000000000000002e Jul 26 15:05:02 rx [11061396.211708] RAX: ffffffffffffffda RBX: 000000000000084b RCX: 00007f1ef692618d Jul 26 15:05:02 rx [11061396.219091] RDX: 0000000000004000 RSI: 00007f1ef4a26b10 RDI: 0000000000000275 Jul 26 15:05:02 rx [11061396.226475] RBP: 0000000000004000 R08: 0000000000000000 R09: 0000000000000020 Jul 26 15:05:02 rx [11061396.233859] R10: 0000000000000000 R11: 0000000000000293 R12: 000000000000084b Jul 26 15:05:02 rx [11061396.241243] R13: 00007f1ef4a26b10 R14: 0000000000000275 R15: 000055592030f1e8 Jul 26 15:05:02 rx [11061396.248628] Modules linked in: vrf bridge stp llc vxlan ip6_udp_tunnel udp_tunnel nls_iso8859_1 amd64_edac_mod edac_mce_amd kvm_amd kvm crct10dif_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper wmi_bmof ipmi_ssif input_leds joydev rndis_host cdc_ether usbnet mii ast drm_vram_helper ttm drm_kms_helper i2c_algo_bit fb_sys_fops syscopyarea sysfillrect sysimgblt ccp mac_hid ipmi_si ipmi_devintf ipmi_msghandler nft_ct sch_fq_codel nf_tables_set nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink ramoops reed_solomon efi_pstore drm ip_tables x_tables autofs4 raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid0 multipath linear mlx5_ib ib_uverbs ib_core raid1 mlx5_core hid_generic pci_hyperv_intf crc32_pclmul tls usbhid ahci mlxfw bnxt_en libahci hid nvme i2c_piix4 nvme_core wmi Jul 26 15:05:02 rx [11061396.324334] CR2: 0000000000000020 Jul 26 15:05:02 rx [11061396.327944] ---[ end trace 68a2b679d1cfb4f1 ]--- Jul 26 15:05:02 rx [11061396.433435] RIP: 0010:tcp_rearm_rto+0xe4/0x160 Jul 26 15:05:02 rx [11061396.438137] Code: 87 ca 04 00 00 00 5b 41 5c 41 5d 5d c3 c3 49 8b bc 24 40 06 00 00 eb 8d 48 bb cf f7 53 e3 a5 9b c4 20 4c 89 ef e8 0c fe 0e 00 <48> 8b 78 20 48 c1 ef 03 48 89 f8 41 8b bc 24 80 04 00 00 48 f7 e3 Jul 26 15:05:02 rx [11061396.457144] RSP: 0018:ffffb75d40003e08 EFLAGS: 00010246 Jul 26 15:05:02 rx [11061396.462629] RAX: 0000000000000000 RBX: 20c49ba5e353f7cf RCX: 0000000000000000 Jul 26 15:05:02 rx [11061396.470012] RDX: 0000000062177c30 RSI: 000000000000231c RDI: ffff9874ad283a60 Jul 26 15:05:02 rx [11061396.477396] RBP: ffffb75d40003e20 R08: 0000000000000000 R09: ffff987605e20aa8 Jul 26 15:05:02 rx [11061396.484779] R10: ffffb75d40003f00 R11: ffffb75d4460f740 R12: ffff9874ad283900 Jul 26 15:05:02 rx [11061396.492164] R13: ffff9874ad283a60 R14: ffff9874ad283980 R15: ffff9874ad283d30 Jul 26 15:05:02 rx [11061396.499547] FS: 00007f1ef4a2e700(0000) GS:ffff987605e00000(0000) knlGS:0000000000000000 Jul 26 15:05:02 rx [11061396.507886] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Jul 26 15:05:02 rx [11061396.513884] CR2: 0000000000000020 CR3: 0000003e450ba003 CR4: 0000000000760ef0 Jul 26 15:05:02 rx [11061396.521267] PKRU: 55555554 Jul 26 15:05:02 rx [11061396.524230] Kernel panic - not syncing: Fatal exception in interrupt Jul 26 15:05:02 rx [11061396.530885] Kernel Offset: 0x1b200000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Jul 26 15:05:03 rx [11061396.660181] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- After we hit this we disabled TLP by setting tcp_early_retrans to 0 and then hit the crash in the RACK case: Aug 7 07:26:16 rx [1006006.265582] BUG: kernel NULL pointer dereference, address: 0000000000000020 Aug 7 07:26:16 rx [1006006.272719] #PF: supervisor read access in kernel mode Aug 7 07:26:16 rx [1006006.278030] #PF: error_code(0x0000) - not-present page Aug 7 07:26:16 rx [1006006.283343] PGD 0 P4D 0 Aug 7 07:26:16 rx [1006006.286057] Oops: 0000 [#1] SMP NOPTI Aug 7 07:26:16 rx [1006006.289896] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G W 5.4.0-174-generic #193-Ubuntu Aug 7 07:26:16 rx [1006006.299107] Hardware name: Supermicro SMC 2x26 os-gen8 64C NVME-Y 256G/H12SSW-NTR, BIOS 2.5.V1.2U.NVMe.UEFI 05/09/2023 Aug 7 07:26:16 rx [1006006.309970] RIP: 0010:tcp_rearm_rto+0xe4/0x160 Aug 7 07:26:16 rx [1006006.314584] Code: 87 ca 04 00 00 00 5b 41 5c 41 5d 5d c3 c3 49 8b bc 24 40 06 00 00 eb 8d 48 bb cf f7 53 e3 a5 9b c4 20 4c 89 ef e8 0c fe 0e 00 <48> 8b 78 20 48 c1 ef 03 48 89 f8 41 8b bc 24 80 04 00 00 48 f7 e3 Aug 7 07:26:16 rx [1006006.333499] RSP: 0018:ffffb42600a50960 EFLAGS: 00010246 Aug 7 07:26:16 rx [1006006.338895] RAX: 0000000000000000 RBX: 20c49ba5e353f7cf RCX: 0000000000000000 Aug 7 07:26:16 rx [1006006.346193] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff92d687ed8160 Aug 7 07:26:16 rx [1006006.353489] RBP: ffffb42600a50978 R08: 0000000000000000 R09: 00000000cd896dcc Aug 7 07:26:16 rx [1006006.360786] R10: ffff92dc3404f400 R11: 0000000000000001 R12: ffff92d687ed8000 Aug 7 07:26:16 rx [1006006.368084] R13: ffff92d687ed8160 R14: 00000000cd896dcc R15: 00000000cd8fca81 Aug 7 07:26:16 rx [1006006.375381] FS: 0000000000000000(0000) GS:ffff93158ad40000(0000) knlGS:0000000000000000 Aug 7 07:26:16 rx [1006006.383632] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Aug 7 07:26:16 rx [1006006.389544] CR2: 0000000000000020 CR3: 0000003e775ce006 CR4: 0000000000760ee0 Aug 7 07:26:16 rx [1006006.396839] PKRU: 55555554 Aug 7 07:26:16 rx [1006006.399717] Call Trace: Aug 7 07:26:16 rx [1006006.402335] Aug 7 07:26:16 rx [1006006.404525] ? show_regs.cold+0x1a/0x1f Aug 7 07:26:16 rx [1006006.408532] ? __die+0x90/0xd9 Aug 7 07:26:16 rx [1006006.411760] ? no_context+0x196/0x380 Aug 7 07:26:16 rx [1006006.415599] ? __bad_area_nosemaphore+0x50/0x1a0 Aug 7 07:26:16 rx [1006006.420392] ? _raw_spin_lock+0x1e/0x30 Aug 7 07:26:16 rx [1006006.424401] ? bad_area_nosemaphore+0x16/0x20 Aug 7 07:26:16 rx [1006006.428927] ? do_user_addr_fault+0x267/0x450 Aug 7 07:26:16 rx [1006006.433450] ? __do_page_fault+0x58/0x90 Aug 7 07:26:16 rx [1006006.437542] ? do_page_fault+0x2c/0xe0 Aug 7 07:26:16 rx [1006006.441470] ? page_fault+0x34/0x40 Aug 7 07:26:16 rx [1006006.445134] ? tcp_rearm_rto+0xe4/0x160 Aug 7 07:26:16 rx [1006006.449145] tcp_ack+0xa32/0xb30 Aug 7 07:26:16 rx [1006006.452542] tcp_rcv_established+0x13c/0x670 Aug 7 07:26:16 rx [1006006.456981] ? sk_filter_trim_cap+0x48/0x220 Aug 7 07:26:16 rx [1006006.461419] tcp_v6_do_rcv+0xdb/0x450 Aug 7 07:26:16 rx [1006006.465257] tcp_v6_rcv+0xc2b/0xd10 Aug 7 07:26:16 rx [1006006.468918] ip6_protocol_deliver_rcu+0xd3/0x4e0 Aug 7 07:26:16 rx [1006006.473706] ip6_input_finish+0x15/0x20 Aug 7 07:26:16 rx [1006006.477710] ip6_input+0xa2/0xb0 Aug 7 07:26:16 rx [1006006.481109] ? ip6_protocol_deliver_rcu+0x4e0/0x4e0 Aug 7 07:26:16 rx [1006006.486151] ip6_sublist_rcv_finish+0x3d/0x50 Aug 7 07:26:16 rx [1006006.490679] ip6_sublist_rcv+0x1aa/0x250 Aug 7 07:26:16 rx [1006006.494779] ? ip6_rcv_finish_core.isra.0+0xa0/0xa0 Aug 7 07:26:16 rx [1006006.499828] ipv6_list_rcv+0x112/0x140 Aug 7 07:26:16 rx [1006006.503748] __netif_receive_skb_list_core+0x1a4/0x250 Aug 7 07:26:16 rx [1006006.509057] netif_receive_skb_list_internal+0x1a1/0x2b0 Aug 7 07:26:16 rx [1006006.514538] gro_normal_list.part.0+0x1e/0x40 Aug 7 07:26:16 rx [1006006.519068] napi_complete_done+0x91/0x130 Aug 7 07:26:16 rx [1006006.523352] mlx5e_napi_poll+0x18e/0x610 [mlx5_core] Aug 7 07:26:16 rx [1006006.528481] net_rx_action+0x142/0x390 Aug 7 07:26:16 rx [1006006.532398] __do_softirq+0xd1/0x2c1 Aug 7 07:26:16 rx [1006006.536142] irq_exit+0xae/0xb0 Aug 7 07:26:16 rx [1006006.539452] do_IRQ+0x5a/0xf0 Aug 7 07:26:16 rx [1006006.542590] common_interrupt+0xf/0xf Aug 7 07:26:16 rx [1006006.546421] Aug 7 07:26:16 rx [1006006.548695] RIP: 0010:native_safe_halt+0xe/0x10 Aug 7 07:26:16 rx [1006006.553399] Code: 7b ff ff ff eb bd 90 90 90 90 90 90 e9 07 00 00 00 0f 00 2d 36 2c 50 00 f4 c3 66 90 e9 07 00 00 00 0f 00 2d 26 2c 50 00 fb f4 90 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 e8 dd 5e 61 ff 65 Aug 7 07:26:16 rx [1006006.572309] RSP: 0018:ffffb42600177e70 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffc2 Aug 7 07:26:16 rx [1006006.580040] RAX: ffffffff8ed08b20 RBX: 0000000000000005 RCX: 0000000000000001 Aug 7 07:26:16 rx [1006006.587337] RDX: 00000000f48eeca2 RSI: 0000000000000082 RDI: 0000000000000082 Aug 7 07:26:16 rx [1006006.594635] RBP: ffffb42600177e90 R08: 0000000000000000 R09: 000000000000020f Aug 7 07:26:16 rx [1006006.601931] R10: 0000000000100000 R11: 0000000000000000 R12: 0000000000000005 Aug 7 07:26:16 rx [1006006.609229] R13: ffff93157deb5f00 R14: 0000000000000000 R15: 0000000000000000 Aug 7 07:26:16 rx [1006006.616530] ? __cpuidle_text_start+0x8/0x8 Aug 7 07:26:16 rx [1006006.620886] ? default_idle+0x20/0x140 Aug 7 07:26:16 rx [1006006.624804] arch_cpu_idle+0x15/0x20 Aug 7 07:26:16 rx [1006006.628545] default_idle_call+0x23/0x30 Aug 7 07:26:16 rx [1006006.632640] do_idle+0x1fb/0x270 Aug 7 07:26:16 rx [1006006.636035] cpu_startup_entry+0x20/0x30 Aug 7 07:26:16 rx [1006006.640126] start_secondary+0x178/0x1d0 Aug 7 07:26:16 rx [1006006.644218] secondary_startup_64+0xa4/0xb0 Aug 7 07:26:17 rx [1006006.648568] Modules linked in: vrf bridge stp llc vxlan ip6_udp_tunnel udp_tunnel nls_iso8859_1 nft_ct amd64_edac_mod edac_mce_amd kvm_amd kvm crct10dif_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper wmi_bmof ipmi_ssif input_leds joydev rndis_host cdc_ether usbnet ast mii drm_vram_helper ttm drm_kms_helper i2c_algo_bit fb_sys_fops syscopyarea sysfillrect sysimgblt ccp mac_hid ipmi_si ipmi_devintf ipmi_msghandler sch_fq_codel nf_tables_set nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink ramoops reed_solomon efi_pstore drm ip_tables x_tables autofs4 raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid0 multipath linear mlx5_ib ib_uverbs ib_core raid1 hid_generic mlx5_core pci_hyperv_intf crc32_pclmul usbhid ahci tls mlxfw bnxt_en hid libahci nvme i2c_piix4 nvme_core wmi [last unloaded: cpuid] Aug 7 07:26:17 rx [1006006.726180] CR2: 0000000000000020 Aug 7 07:26:17 rx [1006006.729718] ---[ end trace e0e2e37e4e612984 ]--- Prior to seeing the first crash and on other machines we also see the warning in tcp_send_loss_probe() where packets_out is non-zero, but both transmit and retrans queues are empty so we know the box is seeing some accounting issue in this area: Jul 26 09:15:27 kernel: ------------[ cut here ]------------ Jul 26 09:15:27 kernel: invalid inflight: 2 state 1 cwnd 68 mss 8988 Jul 26 09:15:27 kernel: WARNING: CPU: 16 PID: 0 at net/ipv4/tcp_output.c:2605 tcp_send_loss_probe+0x214/0x220 Jul 26 09:15:27 kernel: Modules linked in: vrf bridge stp llc vxlan ip6_udp_tunnel udp_tunnel nls_iso8859_1 nft_ct amd64_edac_mod edac_mce_amd kvm_amd kvm crct10dif_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper wmi_bmof ipmi_ssif joydev input_leds rndis_host cdc_ether usbnet mii ast drm_vram_helper ttm drm_kms_he> Jul 26 09:15:27 kernel: CPU: 16 PID: 0 Comm: swapper/16 Not tainted 5.4.0-174-generic #193-Ubuntu Jul 26 09:15:27 kernel: Hardware name: Supermicro SMC 2x26 os-gen8 64C NVME-Y 256G/H12SSW-NTR, BIOS 2.5.V1.2U.NVMe.UEFI 05/09/2023 Jul 26 09:15:27 kernel: RIP: 0010:tcp_send_loss_probe+0x214/0x220 Jul 26 09:15:27 kernel: Code: 08 26 01 00 75 e2 41 0f b6 54 24 12 41 8b 8c 24 c0 06 00 00 45 89 f0 48 c7 c7 e0 b4 20 a7 c6 05 8d 08 26 01 01 e8 4a c0 0f 00 <0f> 0b eb ba 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 Jul 26 09:15:27 kernel: RSP: 0018:ffffb7838088ce00 EFLAGS: 00010286 Jul 26 09:15:27 kernel: RAX: 0000000000000000 RBX: ffff9b84b5630430 RCX: 0000000000000006 Jul 26 09:15:27 kernel: RDX: 0000000000000007 RSI: 0000000000000096 RDI: ffff9b8e4621c8c0 Jul 26 09:15:27 kernel: RBP: ffffb7838088ce18 R08: 0000000000000927 R09: 0000000000000004 Jul 26 09:15:27 kernel: R10: 0000000000000000 R11: 0000000000000001 R12: ffff9b84b5630000 Jul 26 09:15:27 kernel: R13: 0000000000000000 R14: 000000000000231c R15: ffff9b84b5630430 Jul 26 09:15:27 kernel: FS: 0000000000000000(0000) GS:ffff9b8e46200000(0000) knlGS:0000000000000000 Jul 26 09:15:27 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Jul 26 09:15:27 kernel: CR2: 000056238cec2380 CR3: 0000003e49ede005 CR4: 0000000000760ee0 Jul 26 09:15:27 kernel: PKRU: 55555554 Jul 26 09:15:27 kernel: Call Trace: Jul 26 09:15:27 kernel: Jul 26 09:15:27 kernel: ? show_regs.cold+0x1a/0x1f Jul 26 09:15:27 kernel: ? __warn+0x98/0xe0 Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220 Jul 26 09:15:27 kernel: ? report_bug+0xd1/0x100 Jul 26 09:15:27 kernel: ? do_error_trap+0x9b/0xc0 Jul 26 09:15:27 kernel: ? do_invalid_op+0x3c/0x50 Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220 Jul 26 09:15:27 kernel: ? invalid_op+0x1e/0x30 Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220 Jul 26 09:15:27 kernel: tcp_write_timer_handler+0x1b4/0x240 Jul 26 09:15:27 kernel: tcp_write_timer+0x9e/0xe0 Jul 26 09:15:27 kernel: ? tcp_write_timer_handler+0x240/0x240 Jul 26 09:15:27 kernel: call_timer_fn+0x32/0x130 Jul 26 09:15:27 kernel: __run_timers.part.0+0x180/0x280 Jul 26 09:15:27 kernel: ? timerqueue_add+0x9b/0xb0 Jul 26 09:15:27 kernel: ? enqueue_hrtimer+0x3d/0x90 Jul 26 09:15:27 kernel: ? do_error_trap+0x9b/0xc0 Jul 26 09:15:27 kernel: ? do_invalid_op+0x3c/0x50 Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220 Jul 26 09:15:27 kernel: ? invalid_op+0x1e/0x30 Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220 Jul 26 09:15:27 kernel: tcp_write_timer_handler+0x1b4/0x240 Jul 26 09:15:27 kernel: tcp_write_timer+0x9e/0xe0 Jul 26 09:15:27 kernel: ? tcp_write_timer_handler+0x240/0x240 Jul 26 09:15:27 kernel: call_timer_fn+0x32/0x130 Jul 26 09:15:27 kernel: __run_timers.part.0+0x180/0x280 Jul 26 09:15:27 kernel: ? timerqueue_add+0x9b/0xb0 Jul 26 09:15:27 kernel: ? enqueue_hrtimer+0x3d/0x90 Jul 26 09:15:27 kernel: ? recalibrate_cpu_khz+0x10/0x10 Jul 26 09:15:27 kernel: ? ktime_get+0x3e/0xa0 Jul 26 09:15:27 kernel: ? native_x2apic_icr_write+0x30/0x30 Jul 26 09:15:27 kernel: run_timer_softirq+0x2a/0x50 Jul 26 09:15:27 kernel: __do_softirq+0xd1/0x2c1 Jul 26 09:15:27 kernel: irq_exit+0xae/0xb0 Jul 26 09:15:27 kernel: smp_apic_timer_interrupt+0x7b/0x140 Jul 26 09:15:27 kernel: apic_timer_interrupt+0xf/0x20 Jul 26 09:15:27 kernel: Jul 26 09:15:27 kernel: RIP: 0010:native_safe_halt+0xe/0x10 Jul 26 09:15:27 kernel: Code: 7b ff ff ff eb bd 90 90 90 90 90 90 e9 07 00 00 00 0f 00 2d 36 2c 50 00 f4 c3 66 90 e9 07 00 00 00 0f 00 2d 26 2c 50 00 fb f4 90 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 e8 dd 5e 61 ff 65 Jul 26 09:15:27 kernel: RSP: 0018:ffffb783801cfe70 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 Jul 26 09:15:27 kernel: RAX: ffffffffa6908b20 RBX: 0000000000000010 RCX: 0000000000000001 Jul 26 09:15:27 kernel: RDX: 000000006fc0c97e RSI: 0000000000000082 RDI: 0000000000000082 Jul 26 09:15:27 kernel: RBP: ffffb783801cfe90 R08: 0000000000000000 R09: 0000000000000225 Jul 26 09:15:27 kernel: R10: 0000000000100000 R11: 0000000000000000 R12: 0000000000000010 Jul 26 09:15:27 kernel: R13: ffff9b8e390b0000 R14: 0000000000000000 R15: 0000000000000000 Jul 26 09:15:27 kernel: ? __cpuidle_text_start+0x8/0x8 Jul 26 09:15:27 kernel: ? default_idle+0x20/0x140 Jul 26 09:15:27 kernel: arch_cpu_idle+0x15/0x20 Jul 26 09:15:27 kernel: default_idle_call+0x23/0x30 Jul 26 09:15:27 kernel: do_idle+0x1fb/0x270 Jul 26 09:15:27 kernel: cpu_startup_entry+0x20/0x30 Jul 26 09:15:27 kernel: start_secondary+0x178/0x1d0 Jul 26 09:15:27 kernel: secondary_startup_64+0xa4/0xb0 Jul 26 09:15:27 kernel: ---[ end trace e7ac822987e33be1 ]--- The NULL ptr deref is coming from tcp_rto_delta_us() attempting to pull an skb off the head of the retransmit queue and then dereferencing that skb to get the skb_mstamp_ns value via tcp_skb_timestamp_us(skb). The crash is the same one that was reported a # of years ago here: https://lore.kernel.org/netdev/86c0f836-9a7c-438b-d81a-839be45f1f58@gmail.com/T/#t and the kernel we're running has the fix which was added to resolve this issue. Unfortunately we've been unsuccessful so far in reproducing this problem in the lab and do not have the luxury of pushing out a new kernel to try and test if newer kernels resolve this issue at the moment. I realize this is a report against both an Ubuntu kernel and also an older 5.4 kernel. I have reported this issue to Ubuntu here: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2077657 however I feel like since this issue has possibly cropped up again it makes sense to build in some protection in this path (even on the latest kernel versions) since the code in question just blindly assumes there's a valid skb without testing if it's NULL b/f it looks at the timestamp. Given we have seen crashes in this path before and now this case it seems like we should protect ourselves for when packets_out accounting is incorrect. While we should fix that root cause we should also just make sure the skb is not NULL before dereferencing it. Also add a warn once here to capture some information if/when the problem case is hit again. Fixes: e1a10ef7fa87 ("tcp: introduce tcp_rto_delta_us() helper for xmit timer fix") Signed-off-by: Josh Hunt Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index f77f812bfbe7..d1948d357dad 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2435,9 +2435,26 @@ static inline s64 tcp_rto_delta_us(const struct sock *sk) { const struct sk_buff *skb = tcp_rtx_queue_head(sk); u32 rto = inet_csk(sk)->icsk_rto; - u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto); - return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; + if (likely(skb)) { + u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto); + + return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; + } else { + WARN_ONCE(1, + "rtx queue emtpy: " + "out:%u sacked:%u lost:%u retrans:%u " + "tlp_high_seq:%u sk_state:%u ca_state:%u " + "advmss:%u mss_cache:%u pmtu:%u\n", + tcp_sk(sk)->packets_out, tcp_sk(sk)->sacked_out, + tcp_sk(sk)->lost_out, tcp_sk(sk)->retrans_out, + tcp_sk(sk)->tlp_high_seq, sk->sk_state, + inet_csk(sk)->icsk_ca_state, + tcp_sk(sk)->advmss, tcp_sk(sk)->mss_cache, + inet_csk(sk)->icsk_pmtu_cookie); + return jiffies_to_usecs(rto); + } + } /* -- cgit v1.2.3 From f8eb5bd9a818cc5f2a1e50b22b0091830b28cc36 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 23 Sep 2024 08:58:31 -0700 Subject: mm: fix build on 32-bit targets without MAX_PHYSMEM_BITS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The merge resolution to deal with the conflict between commits ea72ce5da228 ("x86/kaslr: Expose and use the end of the physical memory address space") and 99185c10d5d9 ("resource, kunit: add test case for region_intersects()") ended up being broken in configurations didn't define a MAX_PHYSMEM_BITS and that had a 32-bit 'phys_addr_t'. The fallback to using all bits set (ie "(-1ULL)") ended up causing a build error: kernel/resource.c: In function ‘gfr_start’: include/linux/minmax.h:93:30: error: conversion from ‘long long unsigned int’ to ‘resource_size_t’ {aka ‘unsigned int’} changes value from ‘18446744073709551615’ to ‘4294967295’ [-Werror=overflow] this was reported by Geert for m68k, but he points out that it happens on other 32-bit architectures too, eg mips, xtensa, parisc, and powerpc. Limiting 'PHYSMEM_END' to a 'phys_addr_t' (which is the same as 'resource_size_t') fixes the build, but Geert points out that it will then cause a silent overflow in mm/sparse.c: unsigned long max_sparsemem_pfn = (PHYSMEM_END + 1) >> PAGE_SHIFT; so we actually do want PHYSMEM_END to be defined a 64-bit type - just not all ones, and not larger than 'phys_addr_t'. The proper fix is probably to not have some kind of default fallback at all, but just make sure every architecture has a valid MAX_PHYSMEM_BITS. But in the meantime, this just applies the rule that PHYSMEM_END is the largest value that fits in a 'phys_addr_t', but does not have the high bit set in 64 bits. Ugly, ugly. Reported-by: Geert Uytterhoeven Cc: Andrew Morton Cc: Huang Ying Cc: Thomas Gleixner Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index b62437447077..ecf63d2b0582 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -101,7 +101,7 @@ extern int mmap_rnd_compat_bits __read_mostly; # ifdef MAX_PHYSMEM_BITS # define PHYSMEM_END ((1ULL << MAX_PHYSMEM_BITS) - 1) # else -# define PHYSMEM_END (-1ULL) +# define PHYSMEM_END (((phys_addr_t)-1)&~(1ULL<<63)) # endif #endif -- cgit v1.2.3 From d98f72272500f505cd7e152ffa456e64ee3855f0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Sep 2024 12:32:03 +1000 Subject: nfs: simplify and guarantee owner uniqueness. I have evidence of an Linux NFS client getting NFS4ERR_BAD_SEQID to a v4.0 LOCK request to a Linux server (which had fixed the problem with RELEASE_LOCKOWNER bug fixed). The LOCK request presented a "new" lock owner so there are two seq ids in the request: that for the open file, and that for the new lock. Given the context I am confident that the new lock owner was reported to have the wrong seqid. As lock owner identifiers are reused, the server must still have a lock owner active which the client thinks is no longer active. I wasn't able to determine a root-cause but the simplest fix seems to be to ensure lock owners are always unique much as open owners are (thanks to a time stamp). The easiest way to ensure uniqueness is with a 64bit counter for each server. That will never cycle (if updated once a nanosecond the last 584 years. A single NFS server would not handle open/lock requests nearly that fast, and a Linux node is unlikely to have an uptime approaching that). This patch removes the 2 ida and instead uses a per-server atomic64_t to provide uniqueness. Note that the lock owner already encodes the id as 64 bits even though it is a 32bit value. So changing to a 64bit value does not change the encoding of the lock owner. The open owner encoding is now 4 bytes larger. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 3 +-- include/linux/nfs_xdr.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 1df86ab98c77..e1e47ebd83ef 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -234,8 +234,7 @@ struct nfs_server { /* the following fields are protected by nfs_client->cl_lock */ struct rb_root state_owners; #endif - struct ida openowner_id; - struct ida lockowner_id; + atomic64_t owner_ctr; struct list_head state_owners_lru; struct list_head layouts; struct list_head delegations; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 45623af3e7b8..96ba04ab24f3 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -446,7 +446,7 @@ struct nfs42_clone_res { struct stateowner_id { __u64 create_time; - __u32 uniquifier; + __u64 uniquifier; }; struct nfs4_open_delegation { -- cgit v1.2.3 From 0b108e83795c9c23101f584ef7e3ab4f1f120ef0 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Mon, 19 Aug 2024 08:58:59 -0700 Subject: SUNRPC: convert RPC_TASK_* constants to enum The RPC_TASK_* constants are defined as macros, which means that most kernel builds will not contain their definitions in the debuginfo. However, it's quite useful for debuggers to be able to view the task state constant and interpret it correctly. Conversion to an enum will ensure the constants are present in debuginfo and can be interpreted by debuggers without needing to hard-code them and track their changes. Signed-off-by: Stephen Brennan Signed-off-by: Anna Schumaker --- include/linux/sunrpc/sched.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 0c77ba488bba..fec1e8a1570c 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -151,13 +151,15 @@ struct rpc_task_setup { #define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT) #define RPC_IS_MOVEABLE(t) ((t)->tk_flags & RPC_TASK_MOVEABLE) -#define RPC_TASK_RUNNING 0 -#define RPC_TASK_QUEUED 1 -#define RPC_TASK_ACTIVE 2 -#define RPC_TASK_NEED_XMIT 3 -#define RPC_TASK_NEED_RECV 4 -#define RPC_TASK_MSG_PIN_WAIT 5 -#define RPC_TASK_SIGNALLED 6 +enum { + RPC_TASK_RUNNING, + RPC_TASK_QUEUED, + RPC_TASK_ACTIVE, + RPC_TASK_NEED_XMIT, + RPC_TASK_NEED_RECV, + RPC_TASK_MSG_PIN_WAIT, + RPC_TASK_SIGNALLED, +}; #define rpc_test_and_set_running(t) \ test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) -- cgit v1.2.3 From dfb07e990a0d019d7ae9b78dd4260620ce32e79a Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Wed, 24 Jul 2024 14:07:12 +0300 Subject: nfs: add 'noalignwrite' option for lock-less 'lost writes' prevention There are some applications that write to predefined non-overlapping file offsets from multiple clients and therefore don't need to rely on file locking. However, if these applications want non-aligned offsets and sizes they need to either use locks or risk data corruption, as the NFS client defaults to extending writes to whole pages. This commit adds a new mount option `noalignwrite`, which allows to turn that off and avoid the need of locking, as long as these applications don't overlap on offsets. Signed-off-by: Dan Aloni Reviewed-by: Jeff Layton Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index e1e47ebd83ef..c49bfdded5c1 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -158,6 +158,7 @@ struct nfs_server { #define NFS_MOUNT_WRITE_WAIT 0x02000000 #define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000 #define NFS_MOUNT_SHUTDOWN 0x08000000 +#define NFS_MOUNT_NO_ALIGNWRITE 0x10000000 unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ -- cgit v1.2.3 From 4806ded4c14c5e8fdc6ce885d83221a78c06a428 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 5 Sep 2024 15:09:35 -0400 Subject: nfs_common: factor out nfs_errtbl and nfs_stat_to_errno Common nfs_stat_to_errno() is used by both fs/nfs/nfs2xdr.c and fs/nfs/nfs3xdr.c Will also be used by fs/nfsd/localio.c Signed-off-by: Mike Snitzer Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Anna Schumaker --- include/linux/nfs_common.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/nfs_common.h (limited to 'include') diff --git a/include/linux/nfs_common.h b/include/linux/nfs_common.h new file mode 100644 index 000000000000..3395c4a4d372 --- /dev/null +++ b/include/linux/nfs_common.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file contains constants and methods used by both NFS client and server. + */ +#ifndef _LINUX_NFS_COMMON_H +#define _LINUX_NFS_COMMON_H + +#include +#include + +/* Mapping from NFS error code to "errno" error code. */ +#define errno_NFSERR_IO EIO + +int nfs_stat_to_errno(enum nfs_stat status); + +#endif /* _LINUX_NFS_COMMON_H */ -- cgit v1.2.3 From 1fcb16674e37e434efe68ec3e142229f35b6b9e1 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 5 Sep 2024 15:09:36 -0400 Subject: nfs_common: factor out nfs4_errtbl and nfs4_stat_to_errno Common nfs4_stat_to_errno() is used by fs/nfs/nfs4xdr.c and will be used by fs/nfs/localio.c Signed-off-by: Mike Snitzer Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Anna Schumaker --- include/linux/nfs_common.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_common.h b/include/linux/nfs_common.h index 3395c4a4d372..5fc02df88252 100644 --- a/include/linux/nfs_common.h +++ b/include/linux/nfs_common.h @@ -12,5 +12,6 @@ #define errno_NFSERR_IO EIO int nfs_stat_to_errno(enum nfs_stat status); +int nfs4_stat_to_errno(int stat); #endif /* _LINUX_NFS_COMMON_H */ -- cgit v1.2.3 From 1545e488b1f908b10f6dff0c278c6b7a37122de8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 5 Sep 2024 15:09:37 -0400 Subject: nfs: factor out {encode,decode}_opaque_fixed to nfs_xdr.h Eliminates duplicate functions in various files to allow for additional callers. Signed-off-by: Mike Snitzer Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 96ba04ab24f3..12d8e47bc5a3 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1853,6 +1853,24 @@ struct nfs_rpc_ops { void (*disable_swap)(struct inode *inode); }; +/* + * Helper functions used by NFS client and/or server + */ +static inline void encode_opaque_fixed(struct xdr_stream *xdr, + const void *buf, size_t len) +{ + WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0); +} + +static inline int decode_opaque_fixed(struct xdr_stream *xdr, + void *buf, size_t len) +{ + ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len); + if (unlikely(ret < 0)) + return -EIO; + return 0; +} + /* * Function vectors etc. for the NFS client */ @@ -1866,4 +1884,4 @@ extern const struct rpc_version nfs_version4; extern const struct rpc_version nfsacl_version3; extern const struct rpc_program nfsacl_program; -#endif +#endif /* _LINUX_NFS_XDR_H */ -- cgit v1.2.3 From 199f2128741077087a2ab33889a6868830465033 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 5 Sep 2024 15:09:46 -0400 Subject: SUNRPC: add svcauth_map_clnt_to_svc_cred_local Add new funtion svcauth_map_clnt_to_svc_cred_local which maps a generic cred to a svc_cred suitable for use in nfsd. This is needed by the localio code to map nfs client creds to nfs server credentials. Following from net/sunrpc/auth_unix.c:unx_marshal() it is clear that ->fsuid and ->fsgid must be used (rather than ->uid and ->gid). In addition, these uid and gid must be translated with from_kuid_munged() so local client uses correct uid and gid when acting as local server. Jeff Layton noted: This is where the magic happens. Since we're working in kuid_t/kgid_t, we don't need to worry about further idmapping. Suggested-by: NeilBrown # to approximate unx_marshal() Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust Co-developed-by: Mike Snitzer Signed-off-by: Mike Snitzer Reviewed-by: Chuck Lever Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Anna Schumaker --- include/linux/sunrpc/svcauth.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 63cf6fb26dcc..2e111153f7cd 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -157,6 +158,10 @@ extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp); extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops); extern void svc_auth_unregister(rpc_authflavor_t flavor); +extern void svcauth_map_clnt_to_svc_cred_local(struct rpc_clnt *clnt, + const struct cred *, + struct svc_cred *); + extern struct auth_domain *unix_domain_find(char *name); extern void auth_domain_put(struct auth_domain *item); extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); -- cgit v1.2.3 From 86ab08beb3f07f6e51922a8b8f662a5ec7012d35 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 5 Sep 2024 15:09:47 -0400 Subject: SUNRPC: replace program list with program array A service created with svc_create_pooled() can be given a linked list of programs and all of these will be served. Using a linked list makes it cumbersome when there are several programs that can be optionally selected with CONFIG settings. After this patch is applied, API consumers must use only svc_create_pooled() when creating an RPC service that listens for more than one RPC program. Signed-off-by: NeilBrown Signed-off-by: Mike Snitzer Acked-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/sunrpc/svc.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index c419a61f60e5..e68fecf6eab5 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -67,9 +67,10 @@ enum { * We currently do not support more than one RPC program per daemon. */ struct svc_serv { - struct svc_program * sv_program; /* RPC program */ + struct svc_program * sv_programs; /* RPC programs */ struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; + unsigned int sv_nprogs; /* Number of sv_programs */ unsigned int sv_nrthreads; /* # of server threads */ unsigned int sv_maxconn; /* max connections allowed or * '0' causing max to be based @@ -360,10 +361,9 @@ struct svc_process_info { }; /* - * List of RPC programs on the same transport endpoint + * RPC program - an array of these can use the same transport endpoint */ struct svc_program { - struct svc_program * pg_next; /* other programs (same xprt) */ u32 pg_prog; /* program number */ unsigned int pg_lovers; /* lowest version */ unsigned int pg_hivers; /* highest version */ @@ -441,6 +441,7 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp, void svc_rqst_release_pages(struct svc_rqst *rqstp); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *prog, + unsigned int nprog, struct svc_stat *stats, unsigned int bufsize, int (*threadfn)(void *data)); -- cgit v1.2.3 From 2a33a85be45178198245e1f656e6224c899895e4 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 5 Sep 2024 15:09:48 -0400 Subject: nfs_common: add NFS LOCALIO auxiliary protocol enablement fs/nfs_common/nfslocalio.c provides interfaces that enable an NFS client to generate a nonce (single-use UUID) and associated nfs_uuid_t struct, register it with nfs_common for subsequent lookup and verification by the NFS server and if matched the NFS server populates members in the nfs_uuid_t struct. nfs_common's nfs_uuids list is the basis for localio enablement, as such it has members that point to nfsd memory for direct use by the client (e.g. 'net' is the server's network namespace, through it the client can access nn->nfsd_serv). This commit also provides the base nfs_uuid_t interfaces to allow proper net namespace refcounting for the LOCALIO use case. CONFIG_NFS_LOCALIO controls the nfs_common, NFS server and NFS client enablement for LOCALIO. If both NFS_FS=m and NFSD=m then NFS_COMMON_LOCALIO_SUPPORT=m and nfs_localio.ko is built (and provides nfs_common's LOCALIO support). # lsmod | grep nfs_localio nfs_localio 12288 2 nfsd,nfs sunrpc 745472 35 nfs_localio,nfsd,auth_rpcgss,lockd,nfsv3,nfs Signed-off-by: Mike Snitzer Co-developed-by: NeilBrown Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfslocalio.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 include/linux/nfslocalio.h (limited to 'include') diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h new file mode 100644 index 000000000000..4165ff8390c1 --- /dev/null +++ b/include/linux/nfslocalio.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Mike Snitzer + * Copyright (C) 2024 NeilBrown + */ +#ifndef __LINUX_NFSLOCALIO_H +#define __LINUX_NFSLOCALIO_H + +#include +#include +#include +#include +#include +#include + +/* + * Useful to allow a client to negotiate if localio + * possible with its server. + * + * See Documentation/filesystems/nfs/localio.rst for more detail. + */ +typedef struct { + uuid_t uuid; + struct list_head list; + struct net *net; /* nfsd's network namespace */ + struct auth_domain *dom; /* auth_domain for localio */ +} nfs_uuid_t; + +void nfs_uuid_begin(nfs_uuid_t *); +void nfs_uuid_end(nfs_uuid_t *); +void nfs_uuid_is_local(const uuid_t *, struct list_head *, + struct net *, struct auth_domain *, struct module *); +void nfs_uuid_invalidate_clients(struct list_head *list); +void nfs_uuid_invalidate_one_client(nfs_uuid_t *nfs_uuid); + +#endif /* __LINUX_NFSLOCALIO_H */ -- cgit v1.2.3 From a61e147e6be6e763d9c6dec8061d2893c0bb3423 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 5 Sep 2024 15:09:49 -0400 Subject: nfs_common: prepare for the NFS client to use nfsd_file for LOCALIO The next commit will introduce nfsd_open_local_fh() which returns an nfsd_file structure. This commit exposes LOCALIO's required NFSD symbols to the NFS client: - Make nfsd_open_local_fh() symbol and other required NFSD symbols available to NFS in a global 'nfs_to' nfsd_localio_operations struct (global access suggested by Trond, nfsd_localio_operations suggested by NeilBrown). The next commit will also introduce nfsd_localio_ops_init() that init_nfsd() will call to initialize 'nfs_to'. - Introduce nfsd_file_file() that provides access to nfsd_file's backing file. Keeps nfsd_file structure opaque to NFS client (as suggested by Jeff Layton). - Introduce nfsd_file_put_local() that will put the reference to the nfsd_file's associated nn->nfsd_serv and then put the reference to the nfsd_file (as suggested by NeilBrown). Suggested-by: Trond Myklebust # nfs_to Suggested-by: NeilBrown # nfsd_localio_operations Suggested-by: Jeff Layton # nfsd_file_file Signed-off-by: Mike Snitzer Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfslocalio.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 4165ff8390c1..1a39d7d727bd 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -22,7 +23,7 @@ typedef struct { uuid_t uuid; struct list_head list; - struct net *net; /* nfsd's network namespace */ + struct net __rcu *net; /* nfsd's network namespace */ struct auth_domain *dom; /* auth_domain for localio */ } nfs_uuid_t; @@ -33,4 +34,28 @@ void nfs_uuid_is_local(const uuid_t *, struct list_head *, void nfs_uuid_invalidate_clients(struct list_head *list); void nfs_uuid_invalidate_one_client(nfs_uuid_t *nfs_uuid); +struct nfsd_file; + +/* localio needs to map filehandle -> struct nfsd_file */ +extern struct nfsd_file * +nfsd_open_local_fh(struct net *, struct auth_domain *, struct rpc_clnt *, + const struct cred *, const struct nfs_fh *, + const fmode_t) __must_hold(rcu); + +struct nfsd_localio_operations { + bool (*nfsd_serv_try_get)(struct net *); + void (*nfsd_serv_put)(struct net *); + struct nfsd_file *(*nfsd_open_local_fh)(struct net *, + struct auth_domain *, + struct rpc_clnt *, + const struct cred *, + const struct nfs_fh *, + const fmode_t); + void (*nfsd_file_put_local)(struct nfsd_file *); + struct file *(*nfsd_file_file)(struct nfsd_file *); +} ____cacheline_aligned; + +extern void nfsd_localio_ops_init(void); +extern const struct nfsd_localio_operations *nfs_to; + #endif /* __LINUX_NFSLOCALIO_H */ -- cgit v1.2.3 From fa4983862e506d395acc1b8d14dbebf63acc2e82 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 5 Sep 2024 15:09:50 -0400 Subject: nfsd: add LOCALIO support Add server support for bypassing NFS for localhost reads, writes, and commits. This is only useful when both the client and server are running on the same host. If nfsd_open_local_fh() fails then the NFS client will both retry and fallback to normal network-based read, write and commit operations if localio is no longer supported. Care is taken to ensure the same NFS security mechanisms are used (authentication, etc) regardless of whether localio or regular NFS access is used. The auth_domain established as part of the traditional NFS client access to the NFS server is also used for localio. Store auth_domain for localio in nfsd_uuid_t and transfer it to the client if it is local to the server. Relative to containers, localio gives the client access to the network namespace the server has. This is required to allow the client to access the server's per-namespace nfsd_net struct. This commit also introduces the use of NFSD's percpu_ref to interlock nfsd_destroy_serv and nfsd_open_local_fh, to ensure nn->nfsd_serv is not destroyed while in use by nfsd_open_local_fh and other LOCALIO client code. CONFIG_NFS_LOCALIO enables NFS server support for LOCALIO. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust Co-developed-by: Mike Snitzer Signed-off-by: Mike Snitzer Co-developed-by: NeilBrown Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/nfslocalio.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 1a39d7d727bd..546f46b4e46f 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -6,6 +6,8 @@ #ifndef __LINUX_NFSLOCALIO_H #define __LINUX_NFSLOCALIO_H +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + #include #include #include @@ -58,4 +60,10 @@ struct nfsd_localio_operations { extern void nfsd_localio_ops_init(void); extern const struct nfsd_localio_operations *nfs_to; +#else /* CONFIG_NFS_LOCALIO */ +static inline void nfsd_localio_ops_init(void) +{ +} +#endif /* CONFIG_NFS_LOCALIO */ + #endif /* __LINUX_NFSLOCALIO_H */ -- cgit v1.2.3 From 946af9b3a0e7571c01447107d5e8968401e659ba Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 5 Sep 2024 15:09:51 -0400 Subject: nfsd: implement server support for NFS_LOCALIO_PROGRAM The LOCALIO auxiliary RPC protocol consists of a single "UUID_IS_LOCAL" RPC method that allows the Linux NFS client to verify the local Linux NFS server can see the nonce (single-use UUID) the client generated and made available in nfs_common. The server expects this protocol to use the same transport as NFS and NFSACL for its RPCs. This protocol isn't part of an IETF standard, nor does it need to be considering it is Linux-to-Linux auxiliary RPC protocol that amounts to an implementation detail. The UUID_IS_LOCAL method encodes the client generated uuid_t in terms of the fixed UUID_SIZE (16 bytes). The fixed size opaque encode and decode XDR methods are used instead of the less efficient variable sized methods. The RPC program number for the NFS_LOCALIO_PROGRAM is 400122 (as assigned by IANA, see https://www.iana.org/assignments/rpc-program-numbers/ ): Linux Kernel Organization 400122 nfslocalio Signed-off-by: Mike Snitzer [neilb: factored out and simplified single localio protocol] Co-developed-by: NeilBrown Signed-off-by: NeilBrown Acked-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/nfs.h b/include/linux/nfs.h index ceb70a926b95..73da75908d95 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -13,6 +13,13 @@ #include #include +/* The LOCALIO program is entirely private to Linux and is + * NOT part of the uapi. + */ +#define NFS_LOCALIO_PROGRAM 400122 +#define LOCALIOPROC_NULL 0 +#define LOCALIOPROC_UUID_IS_LOCAL 1 + /* * This is the kernel NFS client file handle representation */ -- cgit v1.2.3 From df24c483e28f7f9a421afde15d0497e61bc2d3ea Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 5 Sep 2024 15:09:52 -0400 Subject: nfs: pass struct nfsd_file to nfs_init_pgio and nfs_init_commit The nfsd_file will be passed, in future commits, by callers that enable LOCALIO support (for both regular NFS and pNFS IO). [Derived from patch authored by Weston Andros Adamson, but switched from passing struct file to struct nfsd_file] Signed-off-by: Mike Snitzer Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfslocalio.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 546f46b4e46f..2c4e0fd9da6b 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -6,6 +6,9 @@ #ifndef __LINUX_NFSLOCALIO_H #define __LINUX_NFSLOCALIO_H +/* nfsd_file structure is purposely kept opaque to NFS client */ +struct nfsd_file; + #if IS_ENABLED(CONFIG_NFS_LOCALIO) #include @@ -36,8 +39,6 @@ void nfs_uuid_is_local(const uuid_t *, struct list_head *, void nfs_uuid_invalidate_clients(struct list_head *list); void nfs_uuid_invalidate_one_client(nfs_uuid_t *nfs_uuid); -struct nfsd_file; - /* localio needs to map filehandle -> struct nfsd_file */ extern struct nfsd_file * nfsd_open_local_fh(struct net *, struct auth_domain *, struct rpc_clnt *, -- cgit v1.2.3 From 70ba381e1a431245c137ed597ec6a05991c79bd9 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 5 Sep 2024 15:09:53 -0400 Subject: nfs: add LOCALIO support Add client support for bypassing NFS for localhost reads, writes, and commits. This is only useful when the client and the server are running on the same host. nfs_local_probe() is stubbed out, later commits will enable client and server handshake via a Linux-only LOCALIO auxiliary RPC protocol. This has dynamic binding with the nfsd module (via nfs_localio module which is part of nfs_common). LOCALIO will only work if nfsd is already loaded. The "localio_enabled" nfs kernel module parameter can be used to disable and enable the ability to use LOCALIO support. CONFIG_NFS_LOCALIO enables NFS client support for LOCALIO. Lastly, LOCALIO uses an nfsd_file to initiate all IO. To make proper use of nfsd_file (and nfsd's filecache) its lifetime (duration before nfsd_file_put is called) must extend until after commit, read and write operations. So rather than immediately drop the nfsd_file reference in nfs_local_open_fh(), that doesn't happen until nfs_local_pgio_release() for read/write and not until nfs_local_release_commit_data() for commit. The same applies to the reference held on nfsd's nn->nfsd_serv. Both objects' lifetimes and associated references are managed through calls to nfs_to->nfsd_file_put_local(). Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust Co-developed-by: Mike Snitzer Signed-off-by: Mike Snitzer Signed-off-by: NeilBrown # nfs_open_local_fh Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfs.h | 2 ++ include/linux/nfs_fs_sb.h | 9 +++++++++ include/linux/nfslocalio.h | 4 ++++ 3 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 73da75908d95..9ad727ddfedb 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -8,6 +8,8 @@ #ifndef _LINUX_NFS_H #define _LINUX_NFS_H +#include +#include #include #include #include diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index c49bfdded5c1..853df3fcd4c2 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -49,6 +50,7 @@ struct nfs_client { #define NFS_CS_DS 7 /* - Server is a DS */ #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ #define NFS_CS_PNFS 9 /* - Server used for pnfs */ +#define NFS_CS_LOCAL_IO 10 /* - client is local */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ @@ -125,6 +127,13 @@ struct nfs_client { struct net *cl_net; struct list_head pending_cb_stateids; struct rcu_head rcu; + +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + struct timespec64 cl_nfssvc_boot; + seqlock_t cl_boot_lock; + nfs_uuid_t cl_uuid; + spinlock_t cl_localio_lock; +#endif /* CONFIG_NFS_LOCALIO */ }; /* diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 2c4e0fd9da6b..b353abe00357 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -61,6 +61,10 @@ struct nfsd_localio_operations { extern void nfsd_localio_ops_init(void); extern const struct nfsd_localio_operations *nfs_to; +struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *, + struct rpc_clnt *, const struct cred *, + const struct nfs_fh *, const fmode_t); + #else /* CONFIG_NFS_LOCALIO */ static inline void nfsd_localio_ops_init(void) { -- cgit v1.2.3 From 88801d043b1d16caae76a5e2e5991e8b1f55ce7f Mon Sep 17 00:00:00 2001 From: Jiqian Chen Date: Tue, 24 Sep 2024 14:14:35 +0800 Subject: xen/pci: Add a function to reset device for xen When device on dom0 side has been reset, the vpci on Xen side won't get notification, so that the cached state in vpci is all out of date with the real device state. To solve that problem, add a new function to clear all vpci device state when device is reset on dom0 side. And call that function in pcistub_init_device. Because when using "pci-assignable-add" to assign a passthrough device in Xen, it will reset passthrough device and the vpci state will out of date, and then device will fail to restore bar state. Signed-off-by: Jiqian Chen Signed-off-by: Huang Rui Signed-off-by: Jiqian Chen Reviewed-by: Stefano Stabellini Message-ID: <20240924061437.2636766-2-Jiqian.Chen@amd.com> Signed-off-by: Juergen Gross --- include/xen/interface/physdev.h | 17 +++++++++++++++++ include/xen/pci.h | 6 ++++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index a237af867873..df74e65a884b 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -256,6 +256,13 @@ struct physdev_pci_device_add { */ #define PHYSDEVOP_prepare_msix 30 #define PHYSDEVOP_release_msix 31 +/* + * Notify the hypervisor that a PCI device has been reset, so that any + * internally cached state is regenerated. Should be called after any + * device reset performed by the hardware domain. + */ +#define PHYSDEVOP_pci_device_reset 32 + struct physdev_pci_device { /* IN */ uint16_t seg; @@ -263,6 +270,16 @@ struct physdev_pci_device { uint8_t devfn; }; +struct pci_device_reset { + struct physdev_pci_device dev; +#define PCI_DEVICE_RESET_COLD 0x0 +#define PCI_DEVICE_RESET_WARM 0x1 +#define PCI_DEVICE_RESET_HOT 0x2 +#define PCI_DEVICE_RESET_FLR 0x3 +#define PCI_DEVICE_RESET_MASK 0x3 + uint32_t flags; +}; + #define PHYSDEVOP_DBGP_RESET_PREPARE 1 #define PHYSDEVOP_DBGP_RESET_DONE 2 diff --git a/include/xen/pci.h b/include/xen/pci.h index b8337cf85fd1..424b8ea89ca8 100644 --- a/include/xen/pci.h +++ b/include/xen/pci.h @@ -4,10 +4,16 @@ #define __XEN_PCI_H__ #if defined(CONFIG_XEN_DOM0) +int xen_reset_device(const struct pci_dev *dev); int xen_find_device_domain_owner(struct pci_dev *dev); int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); int xen_unregister_device_domain_owner(struct pci_dev *dev); #else +static inline int xen_reset_device(const struct pci_dev *dev) +{ + return -1; +} + static inline int xen_find_device_domain_owner(struct pci_dev *dev) { return -1; -- cgit v1.2.3 From b166b8ab4189743a717cb93f50d6fcca3a46770d Mon Sep 17 00:00:00 2001 From: Jiqian Chen Date: Tue, 24 Sep 2024 14:14:36 +0800 Subject: xen/pvh: Setup gsi for passthrough device In PVH dom0, the gsis don't get registered, but the gsi of a passthrough device must be configured for it to be able to be mapped into a domU. When assigning a device to passthrough, proactively setup the gsi of the device during that process. Signed-off-by: Jiqian Chen Signed-off-by: Huang Rui Signed-off-by: Jiqian Chen Reviewed-by: Stefano Stabellini Message-ID: <20240924061437.2636766-3-Jiqian.Chen@amd.com> Signed-off-by: Juergen Gross --- include/linux/acpi.h | 1 + include/xen/acpi.h | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0687a442fec7..02ded9f53a6b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -362,6 +362,7 @@ void acpi_unregister_gsi (u32 gsi); struct pci_dev; +struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin); int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); bool acpi_isa_irq_available(int irq); diff --git a/include/xen/acpi.h b/include/xen/acpi.h index b1e11863144d..3bcfe82d9078 100644 --- a/include/xen/acpi.h +++ b/include/xen/acpi.h @@ -67,10 +67,28 @@ static inline void xen_acpi_sleep_register(void) acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel; } } +int xen_pvh_setup_gsi(int gsi, int trigger, int polarity); +int xen_acpi_get_gsi_info(struct pci_dev *dev, + int *gsi_out, + int *trigger_out, + int *polarity_out); #else static inline void xen_acpi_sleep_register(void) { } + +static inline int xen_pvh_setup_gsi(int gsi, int trigger, int polarity) +{ + return -1; +} + +static inline int xen_acpi_get_gsi_info(struct pci_dev *dev, + int *gsi_out, + int *trigger_out, + int *polarity_out) +{ + return -1; +} #endif #endif /* _XEN_ACPI_H */ -- cgit v1.2.3 From 2fae6bb7be320270801b3c3b040189bd7daa8056 Mon Sep 17 00:00:00 2001 From: Jiqian Chen Date: Tue, 24 Sep 2024 14:14:37 +0800 Subject: xen/privcmd: Add new syscall to get gsi from dev On PVH dom0, when passthrough a device to domU, QEMU and xl tools want to use gsi number to do pirq mapping, see QEMU code xen_pt_realize->xc_physdev_map_pirq, and xl code pci_add_dm_done->xc_physdev_map_pirq, but in current codes, the gsi number is got from file /sys/bus/pci/devices//irq, that is wrong, because irq is not equal with gsi, they are in different spaces, so pirq mapping fails. And in current linux codes, there is no method to get gsi for userspace. For above purpose, record gsi of pcistub devices when init pcistub and add a new syscall into privcmd to let userspace can get gsi when they have a need. Signed-off-by: Jiqian Chen Signed-off-by: Huang Rui Signed-off-by: Jiqian Chen Reviewed-by: Stefano Stabellini Message-ID: <20240924061437.2636766-4-Jiqian.Chen@amd.com> Signed-off-by: Juergen Gross --- include/uapi/xen/privcmd.h | 7 +++++++ include/xen/acpi.h | 9 +++++++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h index 8b8c5d1420fe..8e2c8fd44764 100644 --- a/include/uapi/xen/privcmd.h +++ b/include/uapi/xen/privcmd.h @@ -126,6 +126,11 @@ struct privcmd_ioeventfd { __u8 pad[2]; }; +struct privcmd_pcidev_get_gsi { + __u32 sbdf; + __u32 gsi; +}; + /* * @cmd: IOCTL_PRIVCMD_HYPERCALL * @arg: &privcmd_hypercall_t @@ -157,5 +162,7 @@ struct privcmd_ioeventfd { _IOW('P', 8, struct privcmd_irqfd) #define IOCTL_PRIVCMD_IOEVENTFD \ _IOW('P', 9, struct privcmd_ioeventfd) +#define IOCTL_PRIVCMD_PCIDEV_GET_GSI \ + _IOC(_IOC_NONE, 'P', 10, sizeof(struct privcmd_pcidev_get_gsi)) #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ diff --git a/include/xen/acpi.h b/include/xen/acpi.h index 3bcfe82d9078..daa96a22d257 100644 --- a/include/xen/acpi.h +++ b/include/xen/acpi.h @@ -91,4 +91,13 @@ static inline int xen_acpi_get_gsi_info(struct pci_dev *dev, } #endif +#ifdef CONFIG_XEN_PCI_STUB +int pcistub_get_gsi_from_sbdf(unsigned int sbdf); +#else +static inline int pcistub_get_gsi_from_sbdf(unsigned int sbdf) +{ + return -1; +} +#endif + #endif /* _XEN_ACPI_H */ -- cgit v1.2.3 From 2efddb5575cd9f5f4d61ad417c92365a5f18d2f1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 25 Sep 2024 10:57:00 +0200 Subject: Revert "driver core: shut down devices asynchronously" This reverts commit 8064952c65045f05ee2671fe437770e50c151776. The series is being reverted before -rc1 as there are still reports of lockups on shutdown, so it's not quite ready for "prime time." Reported-by: Andrey Skvortsov Link: https://lore.kernel.org/r/ZvMkkhyJrohaajuk@skv.local Cc: Christoph Hellwig Cc: David Jeffery Cc: Keith Busch Cc: Laurence Oberman Cc: Nathan Chancellor Cc: Sagi Grimberg Cc: Stuart Hayes Signed-off-by: Greg Kroah-Hartman --- include/linux/device/driver.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 14c9211b82d6..5c04b8e3833b 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -56,7 +56,6 @@ enum probe_type { * @mod_name: Used for built-in modules. * @suppress_bind_attrs: Disables bind/unbind via sysfs. * @probe_type: Type of the probe (synchronous or asynchronous) to use. - * @async_shutdown_enable: Enables devices to be shutdown asynchronously. * @of_match_table: The open firmware table. * @acpi_match_table: The ACPI match table. * @probe: Called to query the existence of a specific device, @@ -103,7 +102,6 @@ struct device_driver { bool suppress_bind_attrs; /* disables bind/unbind via sysfs */ enum probe_type probe_type; - bool async_shutdown_enable; const struct of_device_id *of_match_table; const struct acpi_device_id *acpi_match_table; -- cgit v1.2.3 From ce4db753de21abfb7516ef64aff907813e8a8e3e Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 26 Aug 2024 11:25:52 +0800 Subject: kprobes: Remove obsoleted declaration for init_test_probes The init_test_probes() have been removed since commit e44e81c5b90f ("kprobes: convert tests to kunit"), and now it is useless, so remove it. Link: https://lore.kernel.org/all/20240826032552.4016314-1-cuigaosheng1@huawei.com/ Signed-off-by: Gaosheng Cui Signed-off-by: Masami Hiramatsu (Google) --- include/linux/kprobes.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 5fcbc254d186..8c4f3bb24429 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -269,15 +269,6 @@ extern unsigned long __stop_kprobe_blacklist[]; extern struct kretprobe_blackpoint kretprobe_blacklist[]; -#ifdef CONFIG_KPROBES_SANITY_TEST -extern int init_test_probes(void); -#else /* !CONFIG_KPROBES_SANITY_TEST */ -static inline int init_test_probes(void) -{ - return 0; -} -#endif /* CONFIG_KPROBES_SANITY_TEST */ - extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); -- cgit v1.2.3 From 08377ed24feef66866d0c6aabcae0aec515cf2ca Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 23 Aug 2024 15:36:26 -0400 Subject: xen: sync elfnote.h from xen tree Sync Xen's elfnote.h header from xen.git to pull in the XEN_ELFNOTE_PHYS32_RELOC define. xen commit dfc9fab00378 ("x86/PVH: Support relocatable dom0 kernels") This is a copy except for the removal of the emacs editor config at the end of the file. Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Message-ID: <20240823193630.2583107-2-jason.andryuk@amd.com> Signed-off-by: Juergen Gross --- include/xen/interface/elfnote.h | 93 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h index 38deb1214613..918f47d87d7a 100644 --- a/include/xen/interface/elfnote.h +++ b/include/xen/interface/elfnote.h @@ -11,7 +11,9 @@ #define __XEN_PUBLIC_ELFNOTE_H__ /* - * The notes should live in a SHT_NOTE segment and have "Xen" in the + * `incontents 200 elfnotes ELF notes + * + * The notes should live in a PT_NOTE segment and have "Xen" in the * name field. * * Numeric types are either 4 or 8 bytes depending on the content of @@ -22,6 +24,8 @@ * * String values (for non-legacy) are NULL terminated ASCII, also known * as ASCIZ type. + * + * Xen only uses ELF Notes contained in x86 binaries. */ /* @@ -52,7 +56,7 @@ #define XEN_ELFNOTE_VIRT_BASE 3 /* - * The offset of the ELF paddr field from the acutal required + * The offset of the ELF paddr field from the actual required * pseudo-physical address (numeric). * * This is used to maintain backwards compatibility with older kernels @@ -92,7 +96,12 @@ #define XEN_ELFNOTE_LOADER 8 /* - * The kernel supports PAE (x86/32 only, string = "yes" or "no"). + * The kernel supports PAE (x86/32 only, string = "yes", "no" or + * "bimodal"). + * + * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting + * may be given as "yes,bimodal" which will cause older Xen to treat + * this kernel as PAE. * * LEGACY: PAE (n.b. The legacy interface included a provision to * indicate 'extended-cr3' support allowing L3 page tables to be @@ -149,7 +158,9 @@ * The (non-default) location the initial phys-to-machine map should be * placed at by the hypervisor (Dom0) or the tools (DomU). * The kernel must be prepared for this mapping to be established using - * large pages, despite such otherwise not being available to guests. + * large pages, despite such otherwise not being available to guests. Note + * that these large pages may be misaligned in PFN space (they'll obviously + * be aligned in MFN and virtual address spaces). * The kernel must also be able to handle the page table pages used for * this mapping not being accessible through the initial mapping. * (Only x86-64 supports this at present.) @@ -185,9 +196,81 @@ */ #define XEN_ELFNOTE_PHYS32_ENTRY 18 +/* + * Physical loading constraints for PVH kernels + * + * The presence of this note indicates the kernel supports relocating itself. + * + * The note may include up to three 32bit values to place constraints on the + * guest physical loading addresses and alignment for a PVH kernel. Values + * are read in the following order: + * - a required start alignment (default 0x200000) + * - a minimum address for the start of the image (default 0; see below) + * - a maximum address for the last byte of the image (default 0xffffffff) + * + * When this note specifies an alignment value, it is used. Otherwise the + * maximum p_align value from loadable ELF Program Headers is used, if it is + * greater than or equal to 4k (0x1000). Otherwise, the default is used. + */ +#define XEN_ELFNOTE_PHYS32_RELOC 19 + /* * The number of the highest elfnote defined. */ -#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_RELOC + +/* + * System information exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO + * note in case of a system crash. This note will contain various + * information about the system, see xen/include/xen/elfcore.h. + */ +#define XEN_ELFNOTE_CRASH_INFO 0x1000001 + +/* + * System registers exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS + * note per cpu in case of a system crash. This note is architecture + * specific and will contain registers not saved in the "CORE" note. + * See xen/include/xen/elfcore.h for more information. + */ +#define XEN_ELFNOTE_CRASH_REGS 0x1000002 + + +/* + * xen dump-core none note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE + * in its dump file to indicate that the file is xen dump-core + * file. This note doesn't have any other information. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000 + +/* + * xen dump-core header note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER + * in its dump file. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001 + +/* + * xen dump-core xen version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION + * in its dump file. It contains the xen version obtained via the + * XENVER hypercall. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002 + +/* + * xen dump-core format version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION + * in its dump file. It contains a format version identifier. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003 #endif /* __XEN_PUBLIC_ELFNOTE_H__ */ -- cgit v1.2.3 From d5dbf8b48a4620db771f399ed7fce32d447f04a6 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sun, 18 Aug 2024 19:42:58 +0900 Subject: tracepoint: Support iterating over tracepoints on modules Add for_each_module_tracepoint() for iterating over tracepoints on modules. This is similar to the for_each_kernel_tracepoint() but only for the tracepoints on modules (not including kernel built-in tracepoints). Link: https://lore.kernel.org/all/172397777800.286558.14554748203446214056.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) --- include/linux/tracepoint.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 6be396bb4297..837fcf8ec0d5 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -64,6 +64,8 @@ struct tp_module { bool trace_module_has_bad_taint(struct module *mod); extern int register_tracepoint_module_notifier(struct notifier_block *nb); extern int unregister_tracepoint_module_notifier(struct notifier_block *nb); +void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *), + void *priv); #else static inline bool trace_module_has_bad_taint(struct module *mod) { @@ -79,6 +81,11 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb) { return 0; } +static inline +void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *), + void *priv) +{ +} #endif /* CONFIG_MODULES */ /* -- cgit v1.2.3 From d4df54f338e43c790460674a3cc7db35b8395421 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sun, 18 Aug 2024 19:43:07 +0900 Subject: tracepoint: Support iterating tracepoints in a loading module Add for_each_tracepoint_in_module() function to iterate tracepoints in a module. This API is needed for handling tracepoints in a loading module from tracepoint_module_notifier callback function. This also update for_each_module_tracepoint() to pass the module to callback function so that it can find module easily. Link: https://lore.kernel.org/all/172397778740.286558.15781131277732977643.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) --- include/linux/tracepoint.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 837fcf8ec0d5..93a9f3070b48 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -64,8 +64,13 @@ struct tp_module { bool trace_module_has_bad_taint(struct module *mod); extern int register_tracepoint_module_notifier(struct notifier_block *nb); extern int unregister_tracepoint_module_notifier(struct notifier_block *nb); -void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *), +void for_each_module_tracepoint(void (*fct)(struct tracepoint *, + struct module *, void *), void *priv); +void for_each_tracepoint_in_module(struct module *, + void (*fct)(struct tracepoint *, + struct module *, void *), + void *priv); #else static inline bool trace_module_has_bad_taint(struct module *mod) { @@ -82,10 +87,18 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb) return 0; } static inline -void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *), +void for_each_module_tracepoint(void (*fct)(struct tracepoint *, + struct module *, void *), void *priv) { } +static inline +void for_each_tracepoint_in_module(struct module *mod, + void (*fct)(struct tracepoint *, + struct module *, void *), + void *priv) +{ +} #endif /* CONFIG_MODULES */ /* -- cgit v1.2.3 From 04e906839a053f092ef53f4fb2d610983412b904 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 19 Sep 2024 14:33:42 +0200 Subject: usbnet: fix cyclical race on disconnect with work queue The work can submit URBs and the URBs can schedule the work. This cycle needs to be broken, when a device is to be stopped. Use a flag to do so. This is a design issue as old as the driver. Signed-off-by: Oliver Neukum Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") CC: stable@vger.kernel.org Link: https://patch.msgid.link/20240919123525.688065-1-oneukum@suse.com Signed-off-by: Paolo Abeni --- include/linux/usb/usbnet.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 9f08a584d707..0b9f1e598e3a 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -76,8 +76,23 @@ struct usbnet { # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 # define EVENT_NO_IP_ALIGN 13 +/* This one is special, as it indicates that the device is going away + * there are cyclic dependencies between tasklet, timer and bh + * that must be broken + */ +# define EVENT_UNPLUG 31 }; +static inline bool usbnet_going_away(struct usbnet *ubn) +{ + return test_bit(EVENT_UNPLUG, &ubn->flags); +} + +static inline void usbnet_mark_going_away(struct usbnet *ubn) +{ + set_bit(EVENT_UNPLUG, &ubn->flags); +} + static inline struct usb_driver *driver_of(struct usb_interface *intf) { return to_usb_driver(intf->dev.driver); -- cgit v1.2.3 From 8af79d3edb5fd2dce35ea0a71595b6d4f9962350 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Sep 2024 15:13:39 +0200 Subject: netfilter: nfnetlink_queue: remove old clash resolution logic For historical reasons there are two clash resolution spots in netfilter, one in nfnetlink_queue and one in conntrack core. nfnetlink_queue one was added first: If a colliding entry is found, NAT NAT transformation is reversed by calling nat engine again with altered tuple. See commit 368982cd7d1b ("netfilter: nfnetlink_queue: resolve clash for unconfirmed conntracks") for details. One problem is that nf_reroute() won't take an action if the queueing doesn't occur in the OUTPUT hook, i.e. when queueing in forward or postrouting, packet will be sent via the wrong path. Another problem is that the scenario addressed (2nd UDP packet sent with identical addresses while first packet is still being processed) can also occur without any nfqueue involvement due to threaded resolvers doing A and AAAA requests back-to-back. This lead us to add clash resolution logic to the conntrack core, see commit 6a757c07e51f ("netfilter: conntrack: allow insertion of clashing entries"). Instead of fixing the nfqueue based logic, lets remove it and let conntrack core handle this instead. Retain the ->update hook for sake of nfqueue based conntrack helpers. We could axe this hook completely but we'd have to split confirm and helper logic again, see commit ee04805ff54a ("netfilter: conntrack: make conntrack userspace helpers work again"). This SHOULD NOT be backported to kernels earlier than v5.6; they lack adequate clash resolution handling. Patch was originally written by Pablo Neira Ayuso. Reported-by: Antonio Ojea Closes: https://bugzilla.netfilter.org/show_bug.cgi?id=1766 Signed-off-by: Florian Westphal Tested-by: Antonio Ojea Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2683b2b77612..2b8aac2c70ad 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -376,15 +376,11 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, struct nf_conn; enum nf_nat_manip_type; struct nlattr; -enum ip_conntrack_dir; struct nf_nat_hook { int (*parse_nat_setup)(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr); void (*decode_session)(struct sk_buff *skb, struct flowi *fl); - unsigned int (*manip_pkt)(struct sk_buff *skb, struct nf_conn *ct, - enum nf_nat_manip_type mtype, - enum ip_conntrack_dir dir); void (*remove_nat_bysrc)(struct nf_conn *ct); }; -- cgit v1.2.3 From bfc4a245a794841cba5cf287034a0f60d3087402 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Sep 2024 08:35:24 +0200 Subject: dma-mapping: fix DMA API tracing for chained scatterlists scatterlist allocations can be chained, and thus all iterations need to use the chain-aware iterators. Switch the newly added tracing to use the proper iterators so that they work with chained scatterlists. Fixes: 038eb433dc14 ("dma-mapping: add tracing for dma-mapping API calls") Reported-by: syzbot+95e4ef83a3024384ec7a@syzkaller.appspotmail.com Signed-off-by: Christoph Hellwig Reviewed-by: Sean Anderson Tested-by: syzbot+95e4ef83a3024384ec7a@syzkaller.appspotmail.com --- include/trace/events/dma.h | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index f57f05331d73..569f86a44aaa 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -176,9 +176,9 @@ TRACE_EVENT(dma_free, ); TRACE_EVENT(dma_map_sg, - TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, int ents, enum dma_data_direction dir, unsigned long attrs), - TP_ARGS(dev, sg, nents, ents, dir, attrs), + TP_ARGS(dev, sgl, nents, ents, dir, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) @@ -190,17 +190,17 @@ TRACE_EVENT(dma_map_sg, ), TP_fast_assign( + struct scatterlist *sg; int i; __assign_str(device); - for (i = 0; i < nents; i++) - ((u64 *)__get_dynamic_array(phys_addrs))[i] = - sg_phys(sg + i); - for (i = 0; i < ents; i++) { + for_each_sg(sgl, sg, nents, i) + ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg); + for_each_sg(sgl, sg, ents, i) { ((u64 *)__get_dynamic_array(dma_addrs))[i] = - sg_dma_address(sg + i); + sg_dma_address(sg); ((unsigned int *)__get_dynamic_array(lengths))[i] = - sg_dma_len(sg + i); + sg_dma_len(sg); } __entry->dir = dir; __entry->attrs = attrs; @@ -222,9 +222,9 @@ TRACE_EVENT(dma_map_sg, ); TRACE_EVENT(dma_unmap_sg, - TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs), - TP_ARGS(dev, sg, nents, dir, attrs), + TP_ARGS(dev, sgl, nents, dir, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) @@ -234,12 +234,12 @@ TRACE_EVENT(dma_unmap_sg, ), TP_fast_assign( + struct scatterlist *sg; int i; __assign_str(device); - for (i = 0; i < nents; i++) - ((u64 *)__get_dynamic_array(addrs))[i] = - sg_phys(sg + i); + for_each_sg(sgl, sg, nents, i) + ((u64 *)__get_dynamic_array(addrs))[i] = sg_phys(sg); __entry->dir = dir; __entry->attrs = attrs; ), @@ -290,9 +290,9 @@ DEFINE_EVENT(dma_sync_single, dma_sync_single_for_device, TP_ARGS(dev, dma_addr, size, dir)); DECLARE_EVENT_CLASS(dma_sync_sg, - TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir), - TP_ARGS(dev, sg, nents, dir), + TP_ARGS(dev, sgl, nents, dir), TP_STRUCT__entry( __string(device, dev_name(dev)) @@ -302,14 +302,15 @@ DECLARE_EVENT_CLASS(dma_sync_sg, ), TP_fast_assign( + struct scatterlist *sg; int i; __assign_str(device); - for (i = 0; i < nents; i++) { + for_each_sg(sgl, sg, nents, i) { ((u64 *)__get_dynamic_array(dma_addrs))[i] = - sg_dma_address(sg + i); + sg_dma_address(sg); ((unsigned int *)__get_dynamic_array(lengths))[i] = - sg_dma_len(sg + i); + sg_dma_len(sg); } __entry->dir = dir; ), -- cgit v1.2.3 From a78282e2c94f4ca80a2d7c56e4d1e9546be5596d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 26 Sep 2024 11:39:02 -0700 Subject: Revert "binfmt_elf, coredump: Log the reason of the failed core dumps" This reverts commit fb97d2eb542faf19a8725afbd75cbc2518903210. The logging was questionable to begin with, but it seems to actively deadlock on the task lock. "On second thought, let's not log core dump failures. 'Tis a silly place" because if you can't tell your core dump is truncated, maybe you should just fix your debugger instead of adding bugs to the kernel. Reported-by: Vegard Nossum Link: https://lore.kernel.org/all/d122ece6-3606-49de-ae4d-8da88846bef2@oracle.com/ Signed-off-by: Linus Torvalds --- include/linux/coredump.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index edeb8532ce0f..45e598fe3476 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -42,7 +42,7 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len); -extern int do_coredump(const kernel_siginfo_t *siginfo); +extern void do_coredump(const kernel_siginfo_t *siginfo); /* * Logging for the coredump code, ratelimited. @@ -62,11 +62,7 @@ extern int do_coredump(const kernel_siginfo_t *siginfo); #define coredump_report_failure(fmt, ...) __COREDUMP_PRINTK(KERN_WARNING, fmt, ##__VA_ARGS__) #else -static inline int do_coredump(const kernel_siginfo_t *siginfo) -{ - /* Coredump support is not available, can't fail. */ - return 0; -} +static inline void do_coredump(const kernel_siginfo_t *siginfo) {} #define coredump_report(...) #define coredump_report_failure(...) -- cgit v1.2.3 From 26a8ea80929c518bdec5e53a5776f95919b7c88e Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Tue, 3 Sep 2024 07:25:19 -0700 Subject: mm/hugetlb: fix memfd_pin_folios resv_huge_pages leak memfd_pin_folios followed by unpin_folios leaves resv_huge_pages elevated if the pages were not already faulted in. During a normal page fault, resv_huge_pages is consumed here: hugetlb_fault() alloc_hugetlb_folio() dequeue_hugetlb_folio_vma() dequeue_hugetlb_folio_nodemask() dequeue_hugetlb_folio_node_exact() free_huge_pages-- resv_huge_pages-- During memfd_pin_folios, the page is created by calling alloc_hugetlb_folio_nodemask instead of alloc_hugetlb_folio, and resv_huge_pages is not modified: memfd_alloc_folio() alloc_hugetlb_folio_nodemask() dequeue_hugetlb_folio_nodemask() dequeue_hugetlb_folio_node_exact() free_huge_pages-- alloc_hugetlb_folio_nodemask has other callers that must not modify resv_huge_pages. Therefore, to fix, define an alternate version of alloc_hugetlb_folio_nodemask for this call site that adjusts resv_huge_pages. Link: https://lkml.kernel.org/r/1725373521-451395-4-git-send-email-steven.sistare@oracle.com Fixes: 89c1905d9c14 ("mm/gup: introduce memfd_pin_folios() for pinning memfd folios") Signed-off-by: Steve Sistare Acked-by: Vivek Kasireddy Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: Matthew Wilcox Cc: Muchun Song Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 98c47c394b89..e4697539b665 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -692,6 +692,9 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask, bool allow_alloc_fallback); +struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask); + int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, @@ -1059,6 +1062,13 @@ static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, return NULL; } +static inline struct folio * +alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask) +{ + return NULL; +} + static inline struct folio * alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask, -- cgit v1.2.3 From c5b1184decc819756ae549ba54c63b6790c4ddfd Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 24 Sep 2024 14:27:10 +0800 Subject: compiler.h: specify correct attribute for .rodata..c_jump_table Currently, there is an assembler message when generating kernel/bpf/core.o under CONFIG_OBJTOOL with LoongArch compiler toolchain: Warning: setting incorrect section attributes for .rodata..c_jump_table This is because the section ".rodata..c_jump_table" should be readonly, but there is a "W" (writable) part of the flags: $ readelf -S kernel/bpf/core.o | grep -A 1 "rodata..c" [34] .rodata..c_j[...] PROGBITS 0000000000000000 0000d2e0 0000000000000800 0000000000000000 WA 0 0 8 There is no above issue on x86 due to the generated section flag is only "A" (allocatable). In order to silence the warning on LoongArch, specify the attribute like ".rodata..c_jump_table,\"a\",@progbits #" explicitly, then the section attribute of ".rodata..c_jump_table" must be readonly in the kernel/bpf/core.o file. Before: $ objdump -h kernel/bpf/core.o | grep -A 1 "rodata..c" 21 .rodata..c_jump_table 00000800 0000000000000000 0000000000000000 0000d2e0 2**3 CONTENTS, ALLOC, LOAD, RELOC, DATA After: $ objdump -h kernel/bpf/core.o | grep -A 1 "rodata..c" 21 .rodata..c_jump_table 00000800 0000000000000000 0000000000000000 0000d2e0 2**3 CONTENTS, ALLOC, LOAD, RELOC, READONLY, DATA By the way, AFAICT, maybe the root cause is related with the different compiler behavior of various archs, so to some extent this change is a workaround for LoongArch, and also there is no effect for x86 which is the only port supported by objtool before LoongArch with this patch. Link: https://lkml.kernel.org/r/20240924062710.1243-1-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: [6.9+] Signed-off-by: Andrew Morton --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index ec55bcce4146..4d4e23b6e3e7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -133,7 +133,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define annotate_unreachable() __annotate_unreachable(__COUNTER__) /* Annotate a C jump table to allow objtool to follow the code flow */ -#define __annotate_jump_table __section(".rodata..c_jump_table") +#define __annotate_jump_table __section(".rodata..c_jump_table,\"a\",@progbits #") #else /* !CONFIG_OBJTOOL */ #define annotate_reachable() -- cgit v1.2.3 From cb787f4ac0c2e439ea8d7e6387b925f74576bdf8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 27 Sep 2024 02:56:11 +0100 Subject: [tree-wide] finally take no_llseek out no_llseek had been defined to NULL two years ago, in commit 868941b14441 ("fs: remove no_llseek") To quote that commit, At -rc1 we'll need do a mechanical removal of no_llseek - git grep -l -w no_llseek | grep -v porting.rst | while read i; do sed -i '/\/d' $i done would do it. Unfortunately, that hadn't been done. Linus, could you do that now, so that we could finally put that thing to rest? All instances are of the form .llseek = no_llseek, so it's obviously safe. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/debugfs.h | 1 - include/linux/fs.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index c9c65b132c0f..0928a6c8ae1e 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -57,7 +57,6 @@ static const struct file_operations __fops = { \ .release = simple_attr_release, \ .read = debugfs_attr_read, \ .write = (__is_signed) ? debugfs_attr_write_signed : debugfs_attr_write, \ - .llseek = no_llseek, \ } #define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ diff --git a/include/linux/fs.h b/include/linux/fs.h index eae5b67e4a15..e3c603d01337 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3234,7 +3234,6 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *, extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t noop_llseek(struct file *file, loff_t offset, int whence); -#define no_llseek NULL extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, -- cgit v1.2.3